{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:EA7TJTSPSECLRNWSJ3MCH4IBAI","short_pith_number":"pith:EA7TJTSP","canonical_record":{"source":{"id":"2602.16763","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-02-18T16:51:37Z","cross_cats_sorted":[],"title_canon_sha256":"740e7cacf3dc24eff992402efe177492f43c30bcabe7e265a6174d4f2a289962","abstract_canon_sha256":"5a848594b9968ec5574e9dd94ef438073f2293a2436a012fc800f8c889a32869"},"schema_version":"1.0"},"canonical_sha256":"203f34ce4f9104b8b6d24ed823f1010234c7ede8d7c26eec25899b067153ebe4","source":{"kind":"arxiv","id":"2602.16763","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2602.16763","created_at":"2026-06-02T01:03:43Z"},{"alias_kind":"arxiv_version","alias_value":"2602.16763v2","created_at":"2026-06-02T01:03:43Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2602.16763","created_at":"2026-06-02T01:03:43Z"},{"alias_kind":"pith_short_12","alias_value":"EA7TJTSPSECL","created_at":"2026-06-02T01:03:43Z"},{"alias_kind":"pith_short_16","alias_value":"EA7TJTSPSECLRNWS","created_at":"2026-06-02T01:03:43Z"},{"alias_kind":"pith_short_8","alias_value":"EA7TJTSP","created_at":"2026-06-02T01:03:43Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:EA7TJTSPSECLRNWSJ3MCH4IBAI","target":"record","payload":{"canonical_record":{"source":{"id":"2602.16763","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-02-18T16:51:37Z","cross_cats_sorted":[],"title_canon_sha256":"740e7cacf3dc24eff992402efe177492f43c30bcabe7e265a6174d4f2a289962","abstract_canon_sha256":"5a848594b9968ec5574e9dd94ef438073f2293a2436a012fc800f8c889a32869"},"schema_version":"1.0"},"canonical_sha256":"203f34ce4f9104b8b6d24ed823f1010234c7ede8d7c26eec25899b067153ebe4","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-02T01:03:43.761980Z","signature_b64":"d+BWA8w0ByJ1DWQ384F4W8penQGv0QOcTO1iNJRKaHpsH+buhjL9EZhpTK5/H2frN49lzSwtgrtd7NLjWD53Aw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"203f34ce4f9104b8b6d24ed823f1010234c7ede8d7c26eec25899b067153ebe4","last_reissued_at":"2026-06-02T01:03:43.761414Z","signature_status":"signed_v1","first_computed_at":"2026-06-02T01:03:43.761414Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2602.16763","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-02T01:03:43Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"meCpCoG1AaO4ogOW1L+/pchVqPkya9UdkLuiqDxBqOpPwxwU6eKbQfsAGv2BoiOOcHOtupJuo2FTsYelyp3PAw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-02T22:20:26.875599Z"},"content_sha256":"d9f571ecf5b999f659bba0aa9ffee8847aebebadbf813c588945dd8609dd4ffe","schema_version":"1.0","event_id":"sha256:d9f571ecf5b999f659bba0aa9ffee8847aebebadbf813c588945dd8609dd4ffe"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:EA7TJTSPSECLRNWSJ3MCH4IBAI","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"When AI Benchmarks Plateau: A Systematic Study of Benchmark Saturation","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Anka Reuel, Arjun Subramonian, Avijit Ghosh, Chenxi Whitehouse, Christina Knight, Dayeon Ki, Eliya Habba, Hossein A. Rahmani, Irene Solaiman, Jan Batzner, Jeba Sania, Jennifer Mickel, Jenny Chim, Jingwei Ni, Jyoutir Raj, Leshem Choshen, Marek \\v{S}uppa, Mrinmaya Sachan, Mubashara Akhtar, Mykel Kochenderfer, Pawan Sasanka Ammanamanchi, Prajna Soni, Robert Scholz, Ruchit Rawal, Sanchit Ahuja, Sanmi Koyejo, Shubham Singh, Siddhesh Pawar, Srishti Yadav, Stella Biderman, Subramanyam Sahoo, Usman Gohar, Vil\\'em Zouhar, Yanan Long, Yiyang Nan, Yu Fan, Zeerak Talat","submitted_at":"2026-02-18T16:51:37Z","abstract_excerpt":"Artificial intelligence benchmarks are an important mechanism for measuring model progress and guiding deployment decisions. However, benchmarks quickly \"saturate\", making it difficult to differentiate models and diminishing their long-term value. In this study, we define benchmark saturation and analyze it across 60 language model benchmarks using 14 properties that relate to saturation. We find that nearly half of the our benchmarks exhibit saturation, with rates increasing with age. Further, we find that resilience to saturation is impacted by expert-curation, not by public test data. Our r"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2602.16763","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2602.16763/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-02T01:03:43Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"0pzokXUgr1+J/Tqe9JHOEEHmjQNOH8gf2XIjEf3BjQnwaptx6Xjs5d8HOSd3DDr4lQunoQAmEJuX+X0zkKm6Bg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-02T22:20:26.875987Z"},"content_sha256":"a37018dcaedfde8bf0815943611e9776d871cafd5b6273268398b91e57d927fc","schema_version":"1.0","event_id":"sha256:a37018dcaedfde8bf0815943611e9776d871cafd5b6273268398b91e57d927fc"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/EA7TJTSPSECLRNWSJ3MCH4IBAI/bundle.json","state_url":"https://pith.science/pith/EA7TJTSPSECLRNWSJ3MCH4IBAI/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/EA7TJTSPSECLRNWSJ3MCH4IBAI/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-02T22:20:26Z","links":{"resolver":"https://pith.science/pith/EA7TJTSPSECLRNWSJ3MCH4IBAI","bundle":"https://pith.science/pith/EA7TJTSPSECLRNWSJ3MCH4IBAI/bundle.json","state":"https://pith.science/pith/EA7TJTSPSECLRNWSJ3MCH4IBAI/state.json","well_known_bundle":"https://pith.science/.well-known/pith/EA7TJTSPSECLRNWSJ3MCH4IBAI/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:EA7TJTSPSECLRNWSJ3MCH4IBAI","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"5a848594b9968ec5574e9dd94ef438073f2293a2436a012fc800f8c889a32869","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-02-18T16:51:37Z","title_canon_sha256":"740e7cacf3dc24eff992402efe177492f43c30bcabe7e265a6174d4f2a289962"},"schema_version":"1.0","source":{"id":"2602.16763","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2602.16763","created_at":"2026-06-02T01:03:43Z"},{"alias_kind":"arxiv_version","alias_value":"2602.16763v2","created_at":"2026-06-02T01:03:43Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2602.16763","created_at":"2026-06-02T01:03:43Z"},{"alias_kind":"pith_short_12","alias_value":"EA7TJTSPSECL","created_at":"2026-06-02T01:03:43Z"},{"alias_kind":"pith_short_16","alias_value":"EA7TJTSPSECLRNWS","created_at":"2026-06-02T01:03:43Z"},{"alias_kind":"pith_short_8","alias_value":"EA7TJTSP","created_at":"2026-06-02T01:03:43Z"}],"graph_snapshots":[{"event_id":"sha256:a37018dcaedfde8bf0815943611e9776d871cafd5b6273268398b91e57d927fc","target":"graph","created_at":"2026-06-02T01:03:43Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2602.16763/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Artificial intelligence benchmarks are an important mechanism for measuring model progress and guiding deployment decisions. However, benchmarks quickly \"saturate\", making it difficult to differentiate models and diminishing their long-term value. In this study, we define benchmark saturation and analyze it across 60 language model benchmarks using 14 properties that relate to saturation. We find that nearly half of the our benchmarks exhibit saturation, with rates increasing with age. Further, we find that resilience to saturation is impacted by expert-curation, not by public test data. Our r","authors_text":"Anka Reuel, Arjun Subramonian, Avijit Ghosh, Chenxi Whitehouse, Christina Knight, Dayeon Ki, Eliya Habba, Hossein A. Rahmani, Irene Solaiman, Jan Batzner, Jeba Sania, Jennifer Mickel, Jenny Chim, Jingwei Ni, Jyoutir Raj, Leshem Choshen, Marek \\v{S}uppa, Mrinmaya Sachan, Mubashara Akhtar, Mykel Kochenderfer, Pawan Sasanka Ammanamanchi, Prajna Soni, Robert Scholz, Ruchit Rawal, Sanchit Ahuja, Sanmi Koyejo, Shubham Singh, Siddhesh Pawar, Srishti Yadav, Stella Biderman, Subramanyam Sahoo, Usman Gohar, Vil\\'em Zouhar, Yanan Long, Yiyang Nan, Yu Fan, Zeerak Talat","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-02-18T16:51:37Z","title":"When AI Benchmarks Plateau: A Systematic Study of Benchmark Saturation"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2602.16763","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:d9f571ecf5b999f659bba0aa9ffee8847aebebadbf813c588945dd8609dd4ffe","target":"record","created_at":"2026-06-02T01:03:43Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"5a848594b9968ec5574e9dd94ef438073f2293a2436a012fc800f8c889a32869","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-02-18T16:51:37Z","title_canon_sha256":"740e7cacf3dc24eff992402efe177492f43c30bcabe7e265a6174d4f2a289962"},"schema_version":"1.0","source":{"id":"2602.16763","kind":"arxiv","version":2}},"canonical_sha256":"203f34ce4f9104b8b6d24ed823f1010234c7ede8d7c26eec25899b067153ebe4","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"203f34ce4f9104b8b6d24ed823f1010234c7ede8d7c26eec25899b067153ebe4","first_computed_at":"2026-06-02T01:03:43.761414Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-02T01:03:43.761414Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"d+BWA8w0ByJ1DWQ384F4W8penQGv0QOcTO1iNJRKaHpsH+buhjL9EZhpTK5/H2frN49lzSwtgrtd7NLjWD53Aw==","signature_status":"signed_v1","signed_at":"2026-06-02T01:03:43.761980Z","signed_message":"canonical_sha256_bytes"},"source_id":"2602.16763","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:d9f571ecf5b999f659bba0aa9ffee8847aebebadbf813c588945dd8609dd4ffe","sha256:a37018dcaedfde8bf0815943611e9776d871cafd5b6273268398b91e57d927fc"],"state_sha256":"5fc6fd0ce536a6d38cc6f5d774a75c99e68506ab4121ad473a4fb91d5e570090"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"1MlTmxioEWYla/ZSUbNaNqTMwUDej3D8KvOw9KfV1y7pcoVjhPgGZdTGxD7hMBHY770v1nAF+np8A906pEXfAQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-02T22:20:26.878110Z","bundle_sha256":"788c2e94eac1e3073d0703fc4b30a60ee5305cc82d152dea5b51e06cc6ec95a0"}}