{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2020:PO4JOGGP4OERALIKHHES464WPZ","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"6a301b694f74fa533aede1a374d7c93b62e9c2eef320a3ba771f538a92fea34a","cross_cats_sorted":["stat.ME","stat.TH"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"math.ST","submitted_at":"2020-12-05T09:29:21Z","title_canon_sha256":"bac451c158501c34eca295c7a8b749eead3efb752ea09920592ff5f134ad7769"},"schema_version":"1.0","source":{"id":"2012.02985","kind":"arxiv","version":5}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2012.02985","created_at":"2026-05-28T02:04:38Z"},{"alias_kind":"arxiv_version","alias_value":"2012.02985v5","created_at":"2026-05-28T02:04:38Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2012.02985","created_at":"2026-05-28T02:04:38Z"},{"alias_kind":"pith_short_12","alias_value":"PO4JOGGP4OER","created_at":"2026-05-28T02:04:38Z"},{"alias_kind":"pith_short_16","alias_value":"PO4JOGGP4OERALIK","created_at":"2026-05-28T02:04:38Z"},{"alias_kind":"pith_short_8","alias_value":"PO4JOGGP","created_at":"2026-05-28T02:04:38Z"}],"graph_snapshots":[{"event_id":"sha256:1e2a8832707fd3b6227828fe91c6e26148650396574c6fc429103ec555e2a67a","target":"graph","created_at":"2026-05-28T02:04:38Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2012.02985/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Principal component analysis (PCA) is a foundational tool in modern data analysis, and a crucial step in PCA is selecting the number of components to keep. However, classical selection methods (e.g., scree plots, parallel analysis, etc.) lack statistical guarantees in the increasingly common setting of large-dimensional data with heterogeneous noise, i.e., where each entry may have a different noise variance. Moreover, it turns out that these methods, which are highly effective for homogeneous noise, can fail dramatically for data with heterogeneous noise. This paper proposes a new method call","authors_text":"David Hong, Edgar Dobriban, Yue Sheng","cross_cats":["stat.ME","stat.TH"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"math.ST","submitted_at":"2020-12-05T09:29:21Z","title":"Selecting the number of components in PCA via random signflips"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2012.02985","kind":"arxiv","version":5},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:41a068d53c47c4ee245b66be970c20b9be713f42c264e34025122da61be1524e","target":"record","created_at":"2026-05-28T02:04:38Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"6a301b694f74fa533aede1a374d7c93b62e9c2eef320a3ba771f538a92fea34a","cross_cats_sorted":["stat.ME","stat.TH"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"math.ST","submitted_at":"2020-12-05T09:29:21Z","title_canon_sha256":"bac451c158501c34eca295c7a8b749eead3efb752ea09920592ff5f134ad7769"},"schema_version":"1.0","source":{"id":"2012.02985","kind":"arxiv","version":5}},"canonical_sha256":"7bb89718cfe389102d0a39c92e7b967e7ec41f5b5d071c55fb56b2415b404335","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"7bb89718cfe389102d0a39c92e7b967e7ec41f5b5d071c55fb56b2415b404335","first_computed_at":"2026-05-28T02:04:38.282395Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-28T02:04:38.282395Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"lImTerzShZPzO99rH+S/VgdJDQ3MX4/7hJj8fV6o2CmFtdQ40kukTcuooaLRKFvzla7kdjB4UB24Mg/YtmJkDg==","signature_status":"signed_v1","signed_at":"2026-05-28T02:04:38.282943Z","signed_message":"canonical_sha256_bytes"},"source_id":"2012.02985","source_kind":"arxiv","source_version":5}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:41a068d53c47c4ee245b66be970c20b9be713f42c264e34025122da61be1524e","sha256:1e2a8832707fd3b6227828fe91c6e26148650396574c6fc429103ec555e2a67a"],"state_sha256":"d8569e8919bd7de362ae4a5a9d7febc14577a9a1fd2c5c1d2b6608d0a699891b"}