{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:I3E5CVLS2QK4GJWAMLULO3R7YS","short_pith_number":"pith:I3E5CVLS","schema_version":"1.0","canonical_sha256":"46c9d15572d415c326c062e8b76e3fc494b496916f05d0219ae20390483f49b7","source":{"kind":"arxiv","id":"2605.22074","version":1},"attestation_state":"computed","paper":{"title":"From Reasoning Chains to Verifiable Subproblems: Curriculum Reinforcement Learning Enables Credit Assignment for LLM Reasoning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.CL"],"primary_cat":"cs.LG","authors_text":"Gao Huang, Shenzhi Wang, Wenze Lin, Xitai Jiang, Yang Yue, Zihan Tang","submitted_at":"2026-05-21T07:13:00Z","abstract_excerpt":"Reinforcement learning from verifiable rewards (RLVR) has shown strong promise for LLM reasoning, but outcome-based RLVR remains inefficient on hard problems because correct final-answer rollouts are rare and sample-level credit assignment cannot use partial progress in failed attempts. We introduce SCRL (Subproblem Curriculum Reinforcement Learning), a curriculum RL framework that derives verifiable subproblems from reference reasoning chains and fixes the final subproblem as the original problem. This turns partial progress on hard problems into verifiable learning signals. Algorithmically, "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.22074","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-21T07:13:00Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"315e0b7313d8690b67e902702c8ce381933d0a3fd0b7ea8b97016dfb7bd596dd","abstract_canon_sha256":"2277cc68065894d5cfa8a01d4903fdbc3edefb30b6881079fd5d1e67cfd68f89"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-22T01:04:23.934288Z","signature_b64":"8OG8zSa/aBNs1dulxL0Gh+0mPEskwhsM3VYyquZxEJ/JM8D45sD+D9OCmkveQzrPWmY/uHcgLf+aX40ahZnNDA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"46c9d15572d415c326c062e8b76e3fc494b496916f05d0219ae20390483f49b7","last_reissued_at":"2026-05-22T01:04:23.933450Z","signature_status":"signed_v1","first_computed_at":"2026-05-22T01:04:23.933450Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"From Reasoning Chains to Verifiable Subproblems: Curriculum Reinforcement Learning Enables Credit Assignment for LLM Reasoning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.CL"],"primary_cat":"cs.LG","authors_text":"Gao Huang, Shenzhi Wang, Wenze Lin, Xitai Jiang, Yang Yue, Zihan Tang","submitted_at":"2026-05-21T07:13:00Z","abstract_excerpt":"Reinforcement learning from verifiable rewards (RLVR) has shown strong promise for LLM reasoning, but outcome-based RLVR remains inefficient on hard problems because correct final-answer rollouts are rare and sample-level credit assignment cannot use partial progress in failed attempts. We introduce SCRL (Subproblem Curriculum Reinforcement Learning), a curriculum RL framework that derives verifiable subproblems from reference reasoning chains and fixes the final subproblem as the original problem. This turns partial progress on hard problems into verifiable learning signals. Algorithmically, "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.22074","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.22074/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.22074","created_at":"2026-05-22T01:04:23.933585+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.22074v1","created_at":"2026-05-22T01:04:23.933585+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.22074","created_at":"2026-05-22T01:04:23.933585+00:00"},{"alias_kind":"pith_short_12","alias_value":"I3E5CVLS2QK4","created_at":"2026-05-22T01:04:23.933585+00:00"},{"alias_kind":"pith_short_16","alias_value":"I3E5CVLS2QK4GJWA","created_at":"2026-05-22T01:04:23.933585+00:00"},{"alias_kind":"pith_short_8","alias_value":"I3E5CVLS","created_at":"2026-05-22T01:04:23.933585+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/I3E5CVLS2QK4GJWAMLULO3R7YS","json":"https://pith.science/pith/I3E5CVLS2QK4GJWAMLULO3R7YS.json","graph_json":"https://pith.science/api/pith-number/I3E5CVLS2QK4GJWAMLULO3R7YS/graph.json","events_json":"https://pith.science/api/pith-number/I3E5CVLS2QK4GJWAMLULO3R7YS/events.json","paper":"https://pith.science/paper/I3E5CVLS"},"agent_actions":{"view_html":"https://pith.science/pith/I3E5CVLS2QK4GJWAMLULO3R7YS","download_json":"https://pith.science/pith/I3E5CVLS2QK4GJWAMLULO3R7YS.json","view_paper":"https://pith.science/paper/I3E5CVLS","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.22074&json=true","fetch_graph":"https://pith.science/api/pith-number/I3E5CVLS2QK4GJWAMLULO3R7YS/graph.json","fetch_events":"https://pith.science/api/pith-number/I3E5CVLS2QK4GJWAMLULO3R7YS/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/I3E5CVLS2QK4GJWAMLULO3R7YS/action/timestamp_anchor","attest_storage":"https://pith.science/pith/I3E5CVLS2QK4GJWAMLULO3R7YS/action/storage_attestation","attest_author":"https://pith.science/pith/I3E5CVLS2QK4GJWAMLULO3R7YS/action/author_attestation","sign_citation":"https://pith.science/pith/I3E5CVLS2QK4GJWAMLULO3R7YS/action/citation_signature","submit_replication":"https://pith.science/pith/I3E5CVLS2QK4GJWAMLULO3R7YS/action/replication_record"}},"created_at":"2026-05-22T01:04:23.933585+00:00","updated_at":"2026-05-22T01:04:23.933585+00:00"}