{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:JUP2Q7U5OJMPVWDHDC4JXKLRS5","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"c666d70bf85b4a5989194f51c6434f3e73d3397e9c1081c732e66dc8f0811ec8","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2025-05-17T04:34:48Z","title_canon_sha256":"597cec621d6cb1f3586f0bdb55926f5783a602101251a516d1835619aa5bbd88"},"schema_version":"1.0","source":{"id":"2505.11831","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2505.11831","created_at":"2026-05-17T23:38:50Z"},{"alias_kind":"arxiv_version","alias_value":"2505.11831v2","created_at":"2026-05-17T23:38:50Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2505.11831","created_at":"2026-05-17T23:38:50Z"},{"alias_kind":"pith_short_12","alias_value":"JUP2Q7U5OJMP","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"JUP2Q7U5OJMPVWDH","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"JUP2Q7U5","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:e1fc03e63086257a04af330303e426cb56cf6588ff541e2ef6ab7f8c64076647","target":"graph","created_at":"2026-05-17T23:38:50Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"ARC-AGI-2 incorporates a newly curated and expanded set of tasks specifically designed to provide a more granular signal to assess abstract reasoning and problem-solving abilities at higher levels of fluid intelligence."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The newly selected tasks genuinely require higher levels of fluid intelligence with only minimal prior knowledge, and the human testing protocol produces a reliable and representative baseline."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"ARC-AGI-2 adds a larger, more complex set of tasks to the original ARC-AGI benchmark to give finer-grained measurement of fluid intelligence in AI."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"ARC-AGI-2 introduces an expanded set of tasks to evaluate higher levels of abstract reasoning in AI systems."}],"snapshot_sha256":"a67d66da04baded035b7fa20132dcde441bbfb28ea7a45f96eb6681492283edd"},"formal_canon":{"evidence_count":3,"snapshot_sha256":"54f0f11ce90d1acbf158e729524215e13365c12af772479d07a4466ff1fc4f06"},"paper":{"abstract_excerpt":"The Abstraction and Reasoning Corpus for Artificial General Intelligence (ARC-AGI), introduced in 2019, established a challenging benchmark for evaluating the general fluid intelligence of artificial systems via a set of unique, novel tasks only requiring minimal prior knowledge. While ARC-AGI has spurred significant research activity over the past five years, recent AI progress calls for benchmarks capable of finer-grained evaluation at higher levels of cognitive complexity. We introduce ARC-AGI-2, an upgraded version of the benchmark. ARC-AGI-2 preserves the input-output pair task format of ","authors_text":"Bryan Landers, Francois Chollet, Gregory Kamradt, Henry Pinkard, Mike Knoop","cross_cats":[],"headline":"ARC-AGI-2 introduces an expanded set of tasks to evaluate higher levels of abstract reasoning in AI systems.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2025-05-17T04:34:48Z","title":"ARC-AGI-2: A New Challenge for Frontier AI Reasoning Systems"},"references":{"count":13,"internal_anchors":0,"resolved_work":13,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"ARC Prize - Leaderboard.https://arcprize.org/leaderboard","work_id":"554fed08-e4ae-4231-bd02-0429d9a3a2dd","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"ARC Prize - Policy.https://arcprize.org/policy","work_id":"6db1d41d-6cb8-4ebf-83ed-b6446c01cb39","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"Kaggle competition","work_id":"f2b15558-bb48-42b5-8df2-e1a2b24d0dc0","year":2020},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"Lab42 competi- tion","work_id":"ff6dbefe-0344-49fd-993a-56618353547c","year":2022},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"Lab42 competi- tion","work_id":"612eb557-b344-4e61-ba1a-a7cfb956ca8e","year":2023}],"snapshot_sha256":"26c8c782e89c6f0fab091170754f0008a278e39827819865d1f325adefa0e17a"},"source":{"id":"2505.11831","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-15T16:45:48.667064Z","id":"51689693-5cd4-4e9c-9783-4e24d71b017c","model_set":{"reader":"grok-4.3"},"one_line_summary":"ARC-AGI-2 adds a larger, more complex set of tasks to the original ARC-AGI benchmark to give finer-grained measurement of fluid intelligence in AI.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"ARC-AGI-2 introduces an expanded set of tasks to evaluate higher levels of abstract reasoning in AI systems.","strongest_claim":"ARC-AGI-2 incorporates a newly curated and expanded set of tasks specifically designed to provide a more granular signal to assess abstract reasoning and problem-solving abilities at higher levels of fluid intelligence.","weakest_assumption":"The newly selected tasks genuinely require higher levels of fluid intelligence with only minimal prior knowledge, and the human testing protocol produces a reliable and representative baseline."}},"verdict_id":"51689693-5cd4-4e9c-9783-4e24d71b017c"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:052ceb6723d4d9fba484b61dbe4aaaa281fbf85e9e5b77369ef7df1dd290d734","target":"record","created_at":"2026-05-17T23:38:50Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"c666d70bf85b4a5989194f51c6434f3e73d3397e9c1081c732e66dc8f0811ec8","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2025-05-17T04:34:48Z","title_canon_sha256":"597cec621d6cb1f3586f0bdb55926f5783a602101251a516d1835619aa5bbd88"},"schema_version":"1.0","source":{"id":"2505.11831","kind":"arxiv","version":2}},"canonical_sha256":"4d1fa87e9d7258fad86718b89ba97197530ba15f60f9eb57cc3f5466b683aef2","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"4d1fa87e9d7258fad86718b89ba97197530ba15f60f9eb57cc3f5466b683aef2","first_computed_at":"2026-05-17T23:38:50.833466Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:38:50.833466Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"Xz6mDPujL5SHXXWAz7DKkOMGR4OBN4TBb9Dx3im1aM2iIJ0cAMYNgmH4Uu3LxXIPZXciUVGTaK1FN8i7j7YaBA==","signature_status":"signed_v1","signed_at":"2026-05-17T23:38:50.833892Z","signed_message":"canonical_sha256_bytes"},"source_id":"2505.11831","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:052ceb6723d4d9fba484b61dbe4aaaa281fbf85e9e5b77369ef7df1dd290d734","sha256:e1fc03e63086257a04af330303e426cb56cf6588ff541e2ef6ab7f8c64076647"],"state_sha256":"f6a9511c1bbe7fbebc4bdda1de2039974d8ccd1de791389985b8e14be3cff8fb"}