{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:X2F6UCDI6MEM4W3AFDEXJKQZ7U","short_pith_number":"pith:X2F6UCDI","schema_version":"1.0","canonical_sha256":"be8bea0868f308ce5b6028c974aa19fd0d840be5bb78715dd4d6beab3c27fb4f","source":{"kind":"arxiv","id":"2605.19723","version":1},"attestation_state":"computed","paper":{"title":"Mathematical Reasoning in Large Language Models: Benchmarks, Architectures, Evaluation, and Open Challenges","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CL","authors_text":"Aamir Shahzad, Husnain Amjad, Mehwish Fatima, Raja Khurram Shahzad","submitted_at":"2026-05-19T11:56:03Z","abstract_excerpt":"Mathematical reasoning is essential for problem-solving in education, science, and industry, serving as a crucial benchmark for evaluating artificial intelligence systems. As Large Language Models (LLMs) improve their reasoning capabilities, understanding how well they perform mathematical reasoning has become increasingly important. This survey synthesizes recent advancements in mathematical reasoning with LLMs through a structured analysis of datasets, architectures, training strategies, and evaluation protocols. Our systematic review encompasses approximately 120 peer-reviewed studies and p"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.19723","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-05-19T11:56:03Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"ab1898354c9728b5fd9ffc645441d854e1d6b647b3a4831019f056d6a91306f1","abstract_canon_sha256":"8a8c08d5b678e0fc7e53a13fbd680ca0e41bd91a466b0eb9360249d2fcc40c3c"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T01:06:11.023778Z","signature_b64":"Q5cNSp/1JtJMgUkuz9vUknwO1TrAbxP3pCfY/JX7fTARO8CjKIBQCVRPwoKeUODJZHKqvwLqJdHmYgSQ0tVDDA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"be8bea0868f308ce5b6028c974aa19fd0d840be5bb78715dd4d6beab3c27fb4f","last_reissued_at":"2026-05-20T01:06:11.022997Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T01:06:11.022997Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Mathematical Reasoning in Large Language Models: Benchmarks, Architectures, Evaluation, and Open Challenges","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CL","authors_text":"Aamir Shahzad, Husnain Amjad, Mehwish Fatima, Raja Khurram Shahzad","submitted_at":"2026-05-19T11:56:03Z","abstract_excerpt":"Mathematical reasoning is essential for problem-solving in education, science, and industry, serving as a crucial benchmark for evaluating artificial intelligence systems. As Large Language Models (LLMs) improve their reasoning capabilities, understanding how well they perform mathematical reasoning has become increasingly important. This survey synthesizes recent advancements in mathematical reasoning with LLMs through a structured analysis of datasets, architectures, training strategies, and evaluation protocols. Our systematic review encompasses approximately 120 peer-reviewed studies and p"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.19723","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.19723/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.19723","created_at":"2026-05-20T01:06:11.023123+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.19723v1","created_at":"2026-05-20T01:06:11.023123+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.19723","created_at":"2026-05-20T01:06:11.023123+00:00"},{"alias_kind":"pith_short_12","alias_value":"X2F6UCDI6MEM","created_at":"2026-05-20T01:06:11.023123+00:00"},{"alias_kind":"pith_short_16","alias_value":"X2F6UCDI6MEM4W3A","created_at":"2026-05-20T01:06:11.023123+00:00"},{"alias_kind":"pith_short_8","alias_value":"X2F6UCDI","created_at":"2026-05-20T01:06:11.023123+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/X2F6UCDI6MEM4W3AFDEXJKQZ7U","json":"https://pith.science/pith/X2F6UCDI6MEM4W3AFDEXJKQZ7U.json","graph_json":"https://pith.science/api/pith-number/X2F6UCDI6MEM4W3AFDEXJKQZ7U/graph.json","events_json":"https://pith.science/api/pith-number/X2F6UCDI6MEM4W3AFDEXJKQZ7U/events.json","paper":"https://pith.science/paper/X2F6UCDI"},"agent_actions":{"view_html":"https://pith.science/pith/X2F6UCDI6MEM4W3AFDEXJKQZ7U","download_json":"https://pith.science/pith/X2F6UCDI6MEM4W3AFDEXJKQZ7U.json","view_paper":"https://pith.science/paper/X2F6UCDI","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.19723&json=true","fetch_graph":"https://pith.science/api/pith-number/X2F6UCDI6MEM4W3AFDEXJKQZ7U/graph.json","fetch_events":"https://pith.science/api/pith-number/X2F6UCDI6MEM4W3AFDEXJKQZ7U/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/X2F6UCDI6MEM4W3AFDEXJKQZ7U/action/timestamp_anchor","attest_storage":"https://pith.science/pith/X2F6UCDI6MEM4W3AFDEXJKQZ7U/action/storage_attestation","attest_author":"https://pith.science/pith/X2F6UCDI6MEM4W3AFDEXJKQZ7U/action/author_attestation","sign_citation":"https://pith.science/pith/X2F6UCDI6MEM4W3AFDEXJKQZ7U/action/citation_signature","submit_replication":"https://pith.science/pith/X2F6UCDI6MEM4W3AFDEXJKQZ7U/action/replication_record"}},"created_at":"2026-05-20T01:06:11.023123+00:00","updated_at":"2026-05-20T01:06:11.023123+00:00"}