{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2024:LAMQRSQMP6D7WJSHDMY7D6RD5O","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"15b96d2206b7385c6251113cf6382dd4dfd673d0492d321994064f46348f4c80","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2024-11-07T17:07:35Z","title_canon_sha256":"e39b5e54a321b6dd2a2dcd2586cbb61b3fd68e79c2758b8eeaa45692171d911f"},"schema_version":"1.0","source":{"id":"2411.04872","kind":"arxiv","version":7}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2411.04872","created_at":"2026-05-17T23:38:46Z"},{"alias_kind":"arxiv_version","alias_value":"2411.04872v7","created_at":"2026-05-17T23:38:46Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2411.04872","created_at":"2026-05-17T23:38:46Z"},{"alias_kind":"pith_short_12","alias_value":"LAMQRSQMP6D7","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"LAMQRSQMP6D7WJSH","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"LAMQRSQM","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:6896da89cc00d2a75a559296471913a9fc0f685463db802c35dae4abade4686e","target":"graph","created_at":"2026-05-17T23:38:46Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Current state-of-the-art AI models solve under 2% of problems, revealing a vast gap between AI capabilities and the prowess of the mathematical community."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The problems are genuinely original and unpublished with no data contamination risk, and automated verification reliably measures true mathematical reasoning ability."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"FrontierMath is a new benchmark of hundreds of original hard math problems that current AI models solve less than 2% of."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"FrontierMath shows that current AI models solve under 2% of hundreds of original expert-level mathematics problems."}],"snapshot_sha256":"d1533291bb17f4fcdd10470af2ef410e3853ceefb5e3d0db3efc952c906a1845"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We introduce FrontierMath, a benchmark of hundreds of original, exceptionally challenging mathematics problems crafted and vetted by expert mathematicians. The questions cover most major branches of modern mathematics -- from computationally intensive problems in number theory and real analysis to abstract questions in algebraic geometry and category theory. Solving a typical problem requires multiple hours of effort from a researcher in the relevant branch of mathematics, and for the upper end questions, multiple days. FrontierMath uses new, unpublished problems and automated verification to ","authors_text":"Alex Gunning, Anson Ho, Bogdan Grechuk, Caroline Falkman Olsson, Diego Chicharro, Ege Erdil, Elizabeth Pratt, Elliot Glazer, Emily de Oliveira Santos, Evan Chen, Grant Barkley, Jaime Sevilla, Jean-Stanislas Denain, Lionel Levine, Mark Wildon, Matej Vrzala, Matthew Barnett, Natalie Stewart, Olli J\\\"arviniemi, Qiuyu Ren, Robert Sandler, Shreepranav Varma Enugandla, Tamay Besiroglu, Tetiana Grechuk","cross_cats":[],"headline":"FrontierMath shows that current AI models solve under 2% of hundreds of original expert-level mathematics problems.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2024-11-07T17:07:35Z","title":"FrontierMath: A Benchmark for Evaluating Advanced Mathematical Reasoning in AI"},"references":{"count":32,"internal_anchors":2,"resolved_work":32,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"MSC2020 Mathematics Subject Classification System , author =","work_id":"5e2dc496-fa18-47d5-a4ac-19f19351c418","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"Training verifiers to solve math word problems, 2021 , author =","work_id":"877a76df-63bd-4ed4-af21-d24219973188","year":2021},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"Advances in neural information processing systems , volume=","work_id":"d428ff68-61a7-4024-83b4-d1b47a2ad468","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"Measuring mathematical problem solving with the math dataset , author =","work_id":"743e28cf-6d1f-4301-a1ce-15d28821dd87","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"Math Olympiad Hardness Scale (MOHS) , author =","work_id":"b6a79b50-e6c0-412d-b3e0-21e76a92e058","year":null}],"snapshot_sha256":"e78a26e92f3862c6e21b005e21fbe7111a5555f6855b742f0ac72d69ddaaadc9"},"source":{"id":"2411.04872","kind":"arxiv","version":7},"verdict":{"created_at":"2026-05-17T00:37:28.338908Z","id":"04ce5a15-2b12-4a1a-bc37-462ab30495b4","model_set":{"reader":"grok-4.3"},"one_line_summary":"FrontierMath is a new benchmark of hundreds of original hard math problems that current AI models solve less than 2% of.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"FrontierMath shows that current AI models solve under 2% of hundreds of original expert-level mathematics problems.","strongest_claim":"Current state-of-the-art AI models solve under 2% of problems, revealing a vast gap between AI capabilities and the prowess of the mathematical community.","weakest_assumption":"The problems are genuinely original and unpublished with no data contamination risk, and automated verification reliably measures true mathematical reasoning ability."}},"verdict_id":"04ce5a15-2b12-4a1a-bc37-462ab30495b4"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:1f6c700a0a25ba26f45fafa3856709fe046048d9bc84278abe3cd04ec5c20614","target":"record","created_at":"2026-05-17T23:38:46Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"15b96d2206b7385c6251113cf6382dd4dfd673d0492d321994064f46348f4c80","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2024-11-07T17:07:35Z","title_canon_sha256":"e39b5e54a321b6dd2a2dcd2586cbb61b3fd68e79c2758b8eeaa45692171d911f"},"schema_version":"1.0","source":{"id":"2411.04872","kind":"arxiv","version":7}},"canonical_sha256":"581908ca0c7f87fb26471b31f1fa23eb8f8f8f1f751e14f32836153128aaaeec","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"581908ca0c7f87fb26471b31f1fa23eb8f8f8f1f751e14f32836153128aaaeec","first_computed_at":"2026-05-17T23:38:46.078189Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:38:46.078189Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"OcYZqz0Zp63FbZvmrI5ZapnieqCfz6D3dG/R6AyMxLeK+Y5JzoS2+yKTnx8/fLdolDnn1I3a7ZfUBilJuH2RBg==","signature_status":"signed_v1","signed_at":"2026-05-17T23:38:46.078731Z","signed_message":"canonical_sha256_bytes"},"source_id":"2411.04872","source_kind":"arxiv","source_version":7}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:1f6c700a0a25ba26f45fafa3856709fe046048d9bc84278abe3cd04ec5c20614","sha256:6896da89cc00d2a75a559296471913a9fc0f685463db802c35dae4abade4686e"],"state_sha256":"3e5bcb7153abb3f40fc087fd317a602403b11676df83aaf07dd47638a69b7924"}