{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2020:PO4JOGGP4OERALIKHHES464WPZ","short_pith_number":"pith:PO4JOGGP","schema_version":"1.0","canonical_sha256":"7bb89718cfe389102d0a39c92e7b967e7ec41f5b5d071c55fb56b2415b404335","source":{"kind":"arxiv","id":"2012.02985","version":5},"attestation_state":"computed","paper":{"title":"Selecting the number of components in PCA via random signflips","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ME","stat.TH"],"primary_cat":"math.ST","authors_text":"David Hong, Edgar Dobriban, Yue Sheng","submitted_at":"2020-12-05T09:29:21Z","abstract_excerpt":"Principal component analysis (PCA) is a foundational tool in modern data analysis, and a crucial step in PCA is selecting the number of components to keep. However, classical selection methods (e.g., scree plots, parallel analysis, etc.) lack statistical guarantees in the increasingly common setting of large-dimensional data with heterogeneous noise, i.e., where each entry may have a different noise variance. Moreover, it turns out that these methods, which are highly effective for homogeneous noise, can fail dramatically for data with heterogeneous noise. This paper proposes a new method call"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2012.02985","kind":"arxiv","version":5},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"math.ST","submitted_at":"2020-12-05T09:29:21Z","cross_cats_sorted":["stat.ME","stat.TH"],"title_canon_sha256":"bac451c158501c34eca295c7a8b749eead3efb752ea09920592ff5f134ad7769","abstract_canon_sha256":"6a301b694f74fa533aede1a374d7c93b62e9c2eef320a3ba771f538a92fea34a"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-28T02:04:38.282943Z","signature_b64":"lImTerzShZPzO99rH+S/VgdJDQ3MX4/7hJj8fV6o2CmFtdQ40kukTcuooaLRKFvzla7kdjB4UB24Mg/YtmJkDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"7bb89718cfe389102d0a39c92e7b967e7ec41f5b5d071c55fb56b2415b404335","last_reissued_at":"2026-05-28T02:04:38.282395Z","signature_status":"signed_v1","first_computed_at":"2026-05-28T02:04:38.282395Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Selecting the number of components in PCA via random signflips","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ME","stat.TH"],"primary_cat":"math.ST","authors_text":"David Hong, Edgar Dobriban, Yue Sheng","submitted_at":"2020-12-05T09:29:21Z","abstract_excerpt":"Principal component analysis (PCA) is a foundational tool in modern data analysis, and a crucial step in PCA is selecting the number of components to keep. However, classical selection methods (e.g., scree plots, parallel analysis, etc.) lack statistical guarantees in the increasingly common setting of large-dimensional data with heterogeneous noise, i.e., where each entry may have a different noise variance. Moreover, it turns out that these methods, which are highly effective for homogeneous noise, can fail dramatically for data with heterogeneous noise. This paper proposes a new method call"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2012.02985","kind":"arxiv","version":5},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2012.02985/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2012.02985","created_at":"2026-05-28T02:04:38.282467+00:00"},{"alias_kind":"arxiv_version","alias_value":"2012.02985v5","created_at":"2026-05-28T02:04:38.282467+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2012.02985","created_at":"2026-05-28T02:04:38.282467+00:00"},{"alias_kind":"pith_short_12","alias_value":"PO4JOGGP4OER","created_at":"2026-05-28T02:04:38.282467+00:00"},{"alias_kind":"pith_short_16","alias_value":"PO4JOGGP4OERALIK","created_at":"2026-05-28T02:04:38.282467+00:00"},{"alias_kind":"pith_short_8","alias_value":"PO4JOGGP","created_at":"2026-05-28T02:04:38.282467+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/PO4JOGGP4OERALIKHHES464WPZ","json":"https://pith.science/pith/PO4JOGGP4OERALIKHHES464WPZ.json","graph_json":"https://pith.science/api/pith-number/PO4JOGGP4OERALIKHHES464WPZ/graph.json","events_json":"https://pith.science/api/pith-number/PO4JOGGP4OERALIKHHES464WPZ/events.json","paper":"https://pith.science/paper/PO4JOGGP"},"agent_actions":{"view_html":"https://pith.science/pith/PO4JOGGP4OERALIKHHES464WPZ","download_json":"https://pith.science/pith/PO4JOGGP4OERALIKHHES464WPZ.json","view_paper":"https://pith.science/paper/PO4JOGGP","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2012.02985&json=true","fetch_graph":"https://pith.science/api/pith-number/PO4JOGGP4OERALIKHHES464WPZ/graph.json","fetch_events":"https://pith.science/api/pith-number/PO4JOGGP4OERALIKHHES464WPZ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/PO4JOGGP4OERALIKHHES464WPZ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/PO4JOGGP4OERALIKHHES464WPZ/action/storage_attestation","attest_author":"https://pith.science/pith/PO4JOGGP4OERALIKHHES464WPZ/action/author_attestation","sign_citation":"https://pith.science/pith/PO4JOGGP4OERALIKHHES464WPZ/action/citation_signature","submit_replication":"https://pith.science/pith/PO4JOGGP4OERALIKHHES464WPZ/action/replication_record"}},"created_at":"2026-05-28T02:04:38.282467+00:00","updated_at":"2026-05-28T02:04:38.282467+00:00"}