{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:YIYJJ6SUOSRFXPZTEDKCKBY6ZP","short_pith_number":"pith:YIYJJ6SU","schema_version":"1.0","canonical_sha256":"c23094fa5474a25bbf3320d425071ecbdcb4aa9cf91f2bf6a9d1432987e83621","source":{"kind":"arxiv","id":"2606.21972","version":1},"attestation_state":"computed","paper":{"title":"Human vs Machine Mathematical Difficulty on Project Euler: An Experimental Analysis","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["math.HO"],"primary_cat":"cs.AI","authors_text":"David Holmes, Johannes Schmitt","submitted_at":"2026-06-20T10:01:18Z","abstract_excerpt":"We study how the effort and success probability of frontier AI systems scale with human difficulty on problems from Project Euler, an online platform of computational mathematics problems. Our dataset, from the MathArena benchmark, consists of 3840 attempts across 50 problems and 26 model configurations, with problem difficulty measured by the site's public human solve times. Motivated by a proposal of Timothy Gowers, we test a power-law relation $t_{\\text{machine}} = a \\cdot t_{\\text{human}}^b$ between generated-token cost per successful answer and human time, and find $b < 1$ for 20 of the 2"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.21972","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-06-20T10:01:18Z","cross_cats_sorted":["math.HO"],"title_canon_sha256":"9e51c91a506cbca2f2584d985ec71eb3f80f32dca80f57acd98adf23146d7fa8","abstract_canon_sha256":"9ae77db6ca8c6d752e2b168159299f36a39ea5afa2ac9833c73f8ccc2dc2497d"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-23T02:13:04.657670Z","signature_b64":"G61DSM2jZzH3sYJDUE0RWO/8VfhOiuvni0QVNt2RBiCvNAIEFZ1XOZHHDgS05/b4h+Y/jopVfEK4Z++AEUNzDw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"c23094fa5474a25bbf3320d425071ecbdcb4aa9cf91f2bf6a9d1432987e83621","last_reissued_at":"2026-06-23T02:13:04.657266Z","signature_status":"signed_v1","first_computed_at":"2026-06-23T02:13:04.657266Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Human vs Machine Mathematical Difficulty on Project Euler: An Experimental Analysis","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["math.HO"],"primary_cat":"cs.AI","authors_text":"David Holmes, Johannes Schmitt","submitted_at":"2026-06-20T10:01:18Z","abstract_excerpt":"We study how the effort and success probability of frontier AI systems scale with human difficulty on problems from Project Euler, an online platform of computational mathematics problems. Our dataset, from the MathArena benchmark, consists of 3840 attempts across 50 problems and 26 model configurations, with problem difficulty measured by the site's public human solve times. Motivated by a proposal of Timothy Gowers, we test a power-law relation $t_{\\text{machine}} = a \\cdot t_{\\text{human}}^b$ between generated-token cost per successful answer and human time, and find $b < 1$ for 20 of the 2"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.21972","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.21972/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.21972","created_at":"2026-06-23T02:13:04.657335+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.21972v1","created_at":"2026-06-23T02:13:04.657335+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.21972","created_at":"2026-06-23T02:13:04.657335+00:00"},{"alias_kind":"pith_short_12","alias_value":"YIYJJ6SUOSRF","created_at":"2026-06-23T02:13:04.657335+00:00"},{"alias_kind":"pith_short_16","alias_value":"YIYJJ6SUOSRFXPZT","created_at":"2026-06-23T02:13:04.657335+00:00"},{"alias_kind":"pith_short_8","alias_value":"YIYJJ6SU","created_at":"2026-06-23T02:13:04.657335+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/YIYJJ6SUOSRFXPZTEDKCKBY6ZP","json":"https://pith.science/pith/YIYJJ6SUOSRFXPZTEDKCKBY6ZP.json","graph_json":"https://pith.science/api/pith-number/YIYJJ6SUOSRFXPZTEDKCKBY6ZP/graph.json","events_json":"https://pith.science/api/pith-number/YIYJJ6SUOSRFXPZTEDKCKBY6ZP/events.json","paper":"https://pith.science/paper/YIYJJ6SU"},"agent_actions":{"view_html":"https://pith.science/pith/YIYJJ6SUOSRFXPZTEDKCKBY6ZP","download_json":"https://pith.science/pith/YIYJJ6SUOSRFXPZTEDKCKBY6ZP.json","view_paper":"https://pith.science/paper/YIYJJ6SU","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.21972&json=true","fetch_graph":"https://pith.science/api/pith-number/YIYJJ6SUOSRFXPZTEDKCKBY6ZP/graph.json","fetch_events":"https://pith.science/api/pith-number/YIYJJ6SUOSRFXPZTEDKCKBY6ZP/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/YIYJJ6SUOSRFXPZTEDKCKBY6ZP/action/timestamp_anchor","attest_storage":"https://pith.science/pith/YIYJJ6SUOSRFXPZTEDKCKBY6ZP/action/storage_attestation","attest_author":"https://pith.science/pith/YIYJJ6SUOSRFXPZTEDKCKBY6ZP/action/author_attestation","sign_citation":"https://pith.science/pith/YIYJJ6SUOSRFXPZTEDKCKBY6ZP/action/citation_signature","submit_replication":"https://pith.science/pith/YIYJJ6SUOSRFXPZTEDKCKBY6ZP/action/replication_record"}},"created_at":"2026-06-23T02:13:04.657335+00:00","updated_at":"2026-06-23T02:13:04.657335+00:00"}