{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2024:LAMQRSQMP6D7WJSHDMY7D6RD5O","short_pith_number":"pith:LAMQRSQM","canonical_record":{"source":{"id":"2411.04872","kind":"arxiv","version":7},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2024-11-07T17:07:35Z","cross_cats_sorted":[],"title_canon_sha256":"e39b5e54a321b6dd2a2dcd2586cbb61b3fd68e79c2758b8eeaa45692171d911f","abstract_canon_sha256":"15b96d2206b7385c6251113cf6382dd4dfd673d0492d321994064f46348f4c80"},"schema_version":"1.0"},"canonical_sha256":"581908ca0c7f87fb26471b31f1fa23eb8f8f8f1f751e14f32836153128aaaeec","source":{"kind":"arxiv","id":"2411.04872","version":7},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2411.04872","created_at":"2026-05-17T23:38:46Z"},{"alias_kind":"arxiv_version","alias_value":"2411.04872v7","created_at":"2026-05-17T23:38:46Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2411.04872","created_at":"2026-05-17T23:38:46Z"},{"alias_kind":"pith_short_12","alias_value":"LAMQRSQMP6D7","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"LAMQRSQMP6D7WJSH","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"LAMQRSQM","created_at":"2026-05-18T12:33:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2024:LAMQRSQMP6D7WJSHDMY7D6RD5O","target":"record","payload":{"canonical_record":{"source":{"id":"2411.04872","kind":"arxiv","version":7},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2024-11-07T17:07:35Z","cross_cats_sorted":[],"title_canon_sha256":"e39b5e54a321b6dd2a2dcd2586cbb61b3fd68e79c2758b8eeaa45692171d911f","abstract_canon_sha256":"15b96d2206b7385c6251113cf6382dd4dfd673d0492d321994064f46348f4c80"},"schema_version":"1.0"},"canonical_sha256":"581908ca0c7f87fb26471b31f1fa23eb8f8f8f1f751e14f32836153128aaaeec","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:38:46.078731Z","signature_b64":"OcYZqz0Zp63FbZvmrI5ZapnieqCfz6D3dG/R6AyMxLeK+Y5JzoS2+yKTnx8/fLdolDnn1I3a7ZfUBilJuH2RBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"581908ca0c7f87fb26471b31f1fa23eb8f8f8f1f751e14f32836153128aaaeec","last_reissued_at":"2026-05-17T23:38:46.078189Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:38:46.078189Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2411.04872","source_version":7,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:46Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"iYjG0CQdFJvHhLKuoWuOYZSjxLMBTVLuOp+cwa+sG4icI7YRD/E+MFTqMjYa9tXQR99u+QqeLJC+zxXHSpPIBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-22T02:53:04.018990Z"},"content_sha256":"1f6c700a0a25ba26f45fafa3856709fe046048d9bc84278abe3cd04ec5c20614","schema_version":"1.0","event_id":"sha256:1f6c700a0a25ba26f45fafa3856709fe046048d9bc84278abe3cd04ec5c20614"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2024:LAMQRSQMP6D7WJSHDMY7D6RD5O","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"FrontierMath: A Benchmark for Evaluating Advanced Mathematical Reasoning in AI","license":"http://creativecommons.org/licenses/by/4.0/","headline":"FrontierMath shows that current AI models solve under 2% of hundreds of original expert-level mathematics problems.","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Alex Gunning, Anson Ho, Bogdan Grechuk, Caroline Falkman Olsson, Diego Chicharro, Ege Erdil, Elizabeth Pratt, Elliot Glazer, Emily de Oliveira Santos, Evan Chen, Grant Barkley, Jaime Sevilla, Jean-Stanislas Denain, Lionel Levine, Mark Wildon, Matej Vrzala, Matthew Barnett, Natalie Stewart, Olli J\\\"arviniemi, Qiuyu Ren, Robert Sandler, Shreepranav Varma Enugandla, Tamay Besiroglu, Tetiana Grechuk","submitted_at":"2024-11-07T17:07:35Z","abstract_excerpt":"We introduce FrontierMath, a benchmark of hundreds of original, exceptionally challenging mathematics problems crafted and vetted by expert mathematicians. The questions cover most major branches of modern mathematics -- from computationally intensive problems in number theory and real analysis to abstract questions in algebraic geometry and category theory. Solving a typical problem requires multiple hours of effort from a researcher in the relevant branch of mathematics, and for the upper end questions, multiple days. FrontierMath uses new, unpublished problems and automated verification to "},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Current state-of-the-art AI models solve under 2% of problems, revealing a vast gap between AI capabilities and the prowess of the mathematical community.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The problems are genuinely original and unpublished with no data contamination risk, and automated verification reliably measures true mathematical reasoning ability.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"FrontierMath is a new benchmark of hundreds of original hard math problems that current AI models solve less than 2% of.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"FrontierMath shows that current AI models solve under 2% of hundreds of original expert-level mathematics problems.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"d1533291bb17f4fcdd10470af2ef410e3853ceefb5e3d0db3efc952c906a1845"},"source":{"id":"2411.04872","kind":"arxiv","version":7},"verdict":{"id":"04ce5a15-2b12-4a1a-bc37-462ab30495b4","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-17T00:37:28.338908Z","strongest_claim":"Current state-of-the-art AI models solve under 2% of problems, revealing a vast gap between AI capabilities and the prowess of the mathematical community.","one_line_summary":"FrontierMath is a new benchmark of hundreds of original hard math problems that current AI models solve less than 2% of.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The problems are genuinely original and unpublished with no data contamination risk, and automated verification reliably measures true mathematical reasoning ability.","pith_extraction_headline":"FrontierMath shows that current AI models solve under 2% of hundreds of original expert-level mathematics problems."},"references":{"count":32,"sample":[{"doi":"","year":null,"title":"MSC2020 Mathematics Subject Classification System , author =","work_id":"5e2dc496-fa18-47d5-a4ac-19f19351c418","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2021,"title":"Training verifiers to solve math word problems, 2021 , author =","work_id":"877a76df-63bd-4ed4-af21-d24219973188","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"Advances in neural information processing systems , volume=","work_id":"d428ff68-61a7-4024-83b4-d1b47a2ad468","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"Measuring mathematical problem solving with the math dataset , author =","work_id":"743e28cf-6d1f-4301-a1ce-15d28821dd87","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"Math Olympiad Hardness Scale (MOHS) , author =","work_id":"b6a79b50-e6c0-412d-b3e0-21e76a92e058","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":32,"snapshot_sha256":"e78a26e92f3862c6e21b005e21fbe7111a5555f6855b742f0ac72d69ddaaadc9","internal_anchors":2},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"04ce5a15-2b12-4a1a-bc37-462ab30495b4"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:46Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"bjf31uXaxm+FSEFBrY9sETUlgia4cE9Qy3qEKyt+7VS1SEBb9eti3SqHIJC9IYIf54xhhrwch/sGPPE215HJCA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-22T02:53:04.019928Z"},"content_sha256":"6896da89cc00d2a75a559296471913a9fc0f685463db802c35dae4abade4686e","schema_version":"1.0","event_id":"sha256:6896da89cc00d2a75a559296471913a9fc0f685463db802c35dae4abade4686e"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/LAMQRSQMP6D7WJSHDMY7D6RD5O/bundle.json","state_url":"https://pith.science/pith/LAMQRSQMP6D7WJSHDMY7D6RD5O/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/LAMQRSQMP6D7WJSHDMY7D6RD5O/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-22T02:53:04Z","links":{"resolver":"https://pith.science/pith/LAMQRSQMP6D7WJSHDMY7D6RD5O","bundle":"https://pith.science/pith/LAMQRSQMP6D7WJSHDMY7D6RD5O/bundle.json","state":"https://pith.science/pith/LAMQRSQMP6D7WJSHDMY7D6RD5O/state.json","well_known_bundle":"https://pith.science/.well-known/pith/LAMQRSQMP6D7WJSHDMY7D6RD5O/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2024:LAMQRSQMP6D7WJSHDMY7D6RD5O","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"15b96d2206b7385c6251113cf6382dd4dfd673d0492d321994064f46348f4c80","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2024-11-07T17:07:35Z","title_canon_sha256":"e39b5e54a321b6dd2a2dcd2586cbb61b3fd68e79c2758b8eeaa45692171d911f"},"schema_version":"1.0","source":{"id":"2411.04872","kind":"arxiv","version":7}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2411.04872","created_at":"2026-05-17T23:38:46Z"},{"alias_kind":"arxiv_version","alias_value":"2411.04872v7","created_at":"2026-05-17T23:38:46Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2411.04872","created_at":"2026-05-17T23:38:46Z"},{"alias_kind":"pith_short_12","alias_value":"LAMQRSQMP6D7","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"LAMQRSQMP6D7WJSH","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"LAMQRSQM","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:6896da89cc00d2a75a559296471913a9fc0f685463db802c35dae4abade4686e","target":"graph","created_at":"2026-05-17T23:38:46Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Current state-of-the-art AI models solve under 2% of problems, revealing a vast gap between AI capabilities and the prowess of the mathematical community."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The problems are genuinely original and unpublished with no data contamination risk, and automated verification reliably measures true mathematical reasoning ability."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"FrontierMath is a new benchmark of hundreds of original hard math problems that current AI models solve less than 2% of."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"FrontierMath shows that current AI models solve under 2% of hundreds of original expert-level mathematics problems."}],"snapshot_sha256":"d1533291bb17f4fcdd10470af2ef410e3853ceefb5e3d0db3efc952c906a1845"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We introduce FrontierMath, a benchmark of hundreds of original, exceptionally challenging mathematics problems crafted and vetted by expert mathematicians. The questions cover most major branches of modern mathematics -- from computationally intensive problems in number theory and real analysis to abstract questions in algebraic geometry and category theory. Solving a typical problem requires multiple hours of effort from a researcher in the relevant branch of mathematics, and for the upper end questions, multiple days. FrontierMath uses new, unpublished problems and automated verification to ","authors_text":"Alex Gunning, Anson Ho, Bogdan Grechuk, Caroline Falkman Olsson, Diego Chicharro, Ege Erdil, Elizabeth Pratt, Elliot Glazer, Emily de Oliveira Santos, Evan Chen, Grant Barkley, Jaime Sevilla, Jean-Stanislas Denain, Lionel Levine, Mark Wildon, Matej Vrzala, Matthew Barnett, Natalie Stewart, Olli J\\\"arviniemi, Qiuyu Ren, Robert Sandler, Shreepranav Varma Enugandla, Tamay Besiroglu, Tetiana Grechuk","cross_cats":[],"headline":"FrontierMath shows that current AI models solve under 2% of hundreds of original expert-level mathematics problems.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2024-11-07T17:07:35Z","title":"FrontierMath: A Benchmark for Evaluating Advanced Mathematical Reasoning in AI"},"references":{"count":32,"internal_anchors":2,"resolved_work":32,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"MSC2020 Mathematics Subject Classification System , author =","work_id":"5e2dc496-fa18-47d5-a4ac-19f19351c418","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"Training verifiers to solve math word problems, 2021 , author =","work_id":"877a76df-63bd-4ed4-af21-d24219973188","year":2021},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"Advances in neural information processing systems , volume=","work_id":"d428ff68-61a7-4024-83b4-d1b47a2ad468","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"Measuring mathematical problem solving with the math dataset , author =","work_id":"743e28cf-6d1f-4301-a1ce-15d28821dd87","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"Math Olympiad Hardness Scale (MOHS) , author =","work_id":"b6a79b50-e6c0-412d-b3e0-21e76a92e058","year":null}],"snapshot_sha256":"e78a26e92f3862c6e21b005e21fbe7111a5555f6855b742f0ac72d69ddaaadc9"},"source":{"id":"2411.04872","kind":"arxiv","version":7},"verdict":{"created_at":"2026-05-17T00:37:28.338908Z","id":"04ce5a15-2b12-4a1a-bc37-462ab30495b4","model_set":{"reader":"grok-4.3"},"one_line_summary":"FrontierMath is a new benchmark of hundreds of original hard math problems that current AI models solve less than 2% of.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"FrontierMath shows that current AI models solve under 2% of hundreds of original expert-level mathematics problems.","strongest_claim":"Current state-of-the-art AI models solve under 2% of problems, revealing a vast gap between AI capabilities and the prowess of the mathematical community.","weakest_assumption":"The problems are genuinely original and unpublished with no data contamination risk, and automated verification reliably measures true mathematical reasoning ability."}},"verdict_id":"04ce5a15-2b12-4a1a-bc37-462ab30495b4"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:1f6c700a0a25ba26f45fafa3856709fe046048d9bc84278abe3cd04ec5c20614","target":"record","created_at":"2026-05-17T23:38:46Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"15b96d2206b7385c6251113cf6382dd4dfd673d0492d321994064f46348f4c80","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2024-11-07T17:07:35Z","title_canon_sha256":"e39b5e54a321b6dd2a2dcd2586cbb61b3fd68e79c2758b8eeaa45692171d911f"},"schema_version":"1.0","source":{"id":"2411.04872","kind":"arxiv","version":7}},"canonical_sha256":"581908ca0c7f87fb26471b31f1fa23eb8f8f8f1f751e14f32836153128aaaeec","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"581908ca0c7f87fb26471b31f1fa23eb8f8f8f1f751e14f32836153128aaaeec","first_computed_at":"2026-05-17T23:38:46.078189Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:38:46.078189Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"OcYZqz0Zp63FbZvmrI5ZapnieqCfz6D3dG/R6AyMxLeK+Y5JzoS2+yKTnx8/fLdolDnn1I3a7ZfUBilJuH2RBg==","signature_status":"signed_v1","signed_at":"2026-05-17T23:38:46.078731Z","signed_message":"canonical_sha256_bytes"},"source_id":"2411.04872","source_kind":"arxiv","source_version":7}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:1f6c700a0a25ba26f45fafa3856709fe046048d9bc84278abe3cd04ec5c20614","sha256:6896da89cc00d2a75a559296471913a9fc0f685463db802c35dae4abade4686e"],"state_sha256":"3e5bcb7153abb3f40fc087fd317a602403b11676df83aaf07dd47638a69b7924"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"aaxTIqhQo0IgkmpOlN4e3l+BFO77SHlNHwXjqBI4q3dSqx20I7Hzr0zj6cpmz9O3+IJrrhmMsaUJac0l5t3pCg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-22T02:53:04.023526Z","bundle_sha256":"7b6d0c69945d9b7e9710dc2fb9c2b29267c36312c3fe72f64c58ff7c64d8bf65"}}