{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2024:4CBGIT3DTRJ4OSJTIKD5RLVKYP","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"e34a91beb2e2e155022b324febc212b54dc86d771a45391f2d4bdb223aba9e8f","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2024-06-26T17:43:06Z","title_canon_sha256":"3667cac0c5711d844d8a067790368dd9a16a04a1bb68844fa92baebc08c4f174"},"schema_version":"1.0","source":{"id":"2406.18629","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2406.18629","created_at":"2026-05-18T23:52:39Z"},{"alias_kind":"arxiv_version","alias_value":"2406.18629v1","created_at":"2026-05-18T23:52:39Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2406.18629","created_at":"2026-05-18T23:52:39Z"},{"alias_kind":"pith_short_12","alias_value":"4CBGIT3DTRJ4","created_at":"2026-05-18T23:52:39Z"},{"alias_kind":"pith_short_16","alias_value":"4CBGIT3DTRJ4OSJT","created_at":"2026-05-18T23:52:39Z"},{"alias_kind":"pith_short_8","alias_value":"4CBGIT3D","created_at":"2026-05-18T23:52:39Z"}],"graph_snapshots":[{"event_id":"sha256:e711525780bcd8c932fd448d8f0a2c3ff82c36f824f29fe397e19897fad01efe","target":"graph","created_at":"2026-05-18T23:52:39Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Mathematical reasoning presents a significant challenge for Large Language Models (LLMs) due to the extensive and precise chain of reasoning required for accuracy. Ensuring the correctness of each reasoning step is critical. To address this, we aim to enhance the robustness and factuality of LLMs by learning from human feedback. However, Direct Preference Optimization (DPO) has shown limited benefits for long-chain mathematical reasoning, as models employing DPO struggle to identify detailed errors in incorrect answers. This limitation stems from a lack of fine-grained process supervision. We ","authors_text":"Jiaya Jia, Senqiao Yang, Xiangru Peng, Xin Lai, Yukang Chen, Zhuotao Tian","cross_cats":["cs.AI","cs.CL"],"headline":"","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2024-06-26T17:43:06Z","title":"Step-DPO: Step-wise Preference Optimization for Long-chain Reasoning of LLMs"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2406.18629","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:b46535ea2eebe635cc059a6743f30e186882ec946ae125034b8f956921be7e4b","target":"record","created_at":"2026-05-18T23:52:39Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"e34a91beb2e2e155022b324febc212b54dc86d771a45391f2d4bdb223aba9e8f","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2024-06-26T17:43:06Z","title_canon_sha256":"3667cac0c5711d844d8a067790368dd9a16a04a1bb68844fa92baebc08c4f174"},"schema_version":"1.0","source":{"id":"2406.18629","kind":"arxiv","version":1}},"canonical_sha256":"e082644f639c53c749334287d8aeaac3fe23604b9d09cf29130161f2f0868101","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"e082644f639c53c749334287d8aeaac3fe23604b9d09cf29130161f2f0868101","first_computed_at":"2026-05-18T23:52:39.414688Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T23:52:39.414688Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"qkRzdaQdyxah0ln/iAcGZgJlaKdvBM9NhR5P09814uDbAHQW/PGthaemHD/g6arILYk7XAK9u26P21H4o2T3CA==","signature_status":"signed_v1","signed_at":"2026-05-18T23:52:39.417639Z","signed_message":"canonical_sha256_bytes"},"source_id":"2406.18629","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:b46535ea2eebe635cc059a6743f30e186882ec946ae125034b8f956921be7e4b","sha256:e711525780bcd8c932fd448d8f0a2c3ff82c36f824f29fe397e19897fad01efe"],"state_sha256":"ba1a0d7487c0b1802f40ec5deed0dada7b9ce50e102862b9724398490d446488"}