{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:HBKPP6JMGIJSASGXSLPFTMIGMA","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"dea87f0c846c783c42649fb5072adb8481e34cbb7d74e3b123b4521e645ff46a","cross_cats_sorted":["cs.CY","stat.AP"],"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.AI","submitted_at":"2026-05-24T21:59:08Z","title_canon_sha256":"e17ed357778eca1f4c18f6beb0c6d42f4cccb3b045fcfcaaad1629452585e1eb"},"schema_version":"1.0","source":{"id":"2605.25272","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.25272","created_at":"2026-05-26T02:04:26Z"},{"alias_kind":"arxiv_version","alias_value":"2605.25272v1","created_at":"2026-05-26T02:04:26Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.25272","created_at":"2026-05-26T02:04:26Z"},{"alias_kind":"pith_short_12","alias_value":"HBKPP6JMGIJS","created_at":"2026-05-26T02:04:26Z"},{"alias_kind":"pith_short_16","alias_value":"HBKPP6JMGIJSASGX","created_at":"2026-05-26T02:04:26Z"},{"alias_kind":"pith_short_8","alias_value":"HBKPP6JM","created_at":"2026-05-26T02:04:26Z"}],"graph_snapshots":[{"event_id":"sha256:0a5e86fc08f707c254feb5daf3567b98ff65e20758040fd3313fc38c22c9d43a","target":"graph","created_at":"2026-05-26T02:04:26Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.25272/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"While aggregate leaderboard scores drive AI development, they contain substantial measurement noise whose sources and magnitudes remain unquantified, making it unclear when rankings reflect genuine capability differences versus evaluation artifacts. We introduce a framework for measuring the latent landscape in AI benchmark ecosystems. Applying Confirmatory Factor Analysis (CFA) and Generalizability Theory to 4,000+ models from the Open LLM Leaderboard, we decompose sources of ranking variance and establish: (1) structures assumed in current reporting practice underestimate the strength of rel","authors_text":"Anka Reuel, Benjamin Domingue, Hansol Lee, Jodi M. Casabianca, Lijin Zhang, Michael Hardy, Sang Truong, Sanmi Koyejo, Yash Dave","cross_cats":["cs.CY","stat.AP"],"headline":"","license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.AI","submitted_at":"2026-05-24T21:59:08Z","title":"AI Cartography: Mapping the Latent Landscape of AI Benchmark Ecosystems"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.25272","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:68f28e861efbecc6352f0c5876d5946d1834553ce792f9725c1f032450a9fa33","target":"record","created_at":"2026-05-26T02:04:26Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"dea87f0c846c783c42649fb5072adb8481e34cbb7d74e3b123b4521e645ff46a","cross_cats_sorted":["cs.CY","stat.AP"],"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.AI","submitted_at":"2026-05-24T21:59:08Z","title_canon_sha256":"e17ed357778eca1f4c18f6beb0c6d42f4cccb3b045fcfcaaad1629452585e1eb"},"schema_version":"1.0","source":{"id":"2605.25272","kind":"arxiv","version":1}},"canonical_sha256":"3854f7f92c32132048d792de59b1066007e5ee2e1580c41ee511a12db1a53196","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"3854f7f92c32132048d792de59b1066007e5ee2e1580c41ee511a12db1a53196","first_computed_at":"2026-05-26T02:04:26.706079Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-26T02:04:26.706079Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"yEmEISothSfOKUyGZgEJjrXHtzIiDsnVye3Bypqol3xsoAIWvlLezLgy4mSI2Q7IW6LTfAdFmztcP5OuIa97Cg==","signature_status":"signed_v1","signed_at":"2026-05-26T02:04:26.706640Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.25272","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:68f28e861efbecc6352f0c5876d5946d1834553ce792f9725c1f032450a9fa33","sha256:0a5e86fc08f707c254feb5daf3567b98ff65e20758040fd3313fc38c22c9d43a"],"state_sha256":"cff98a52ee63ce9c3ab643bd72bedd589558284203ba1bd66d3bd51b65eb4bf4"}