{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2025:PBT2I4KORUUAHC2OC2BVYEPYJJ","short_pith_number":"pith:PBT2I4KO","canonical_record":{"source":{"id":"2501.09686","kind":"arxiv","version":3},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2025-01-16T17:37:58Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"27a29be91192a11f36ffa1b46e5ee199fa483d41b5aac49cfac0e14c1b975c54","abstract_canon_sha256":"282c5a48b28b73fee08160a2e957058b7f8c773d182bcfbe789042d75bb24b76"},"schema_version":"1.0"},"canonical_sha256":"7867a4714e8d28038b4e16835c11f84a74534e244ec7b575293df3293f5be1cf","source":{"kind":"arxiv","id":"2501.09686","version":3},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2501.09686","created_at":"2026-05-17T23:38:50Z"},{"alias_kind":"arxiv_version","alias_value":"2501.09686v3","created_at":"2026-05-17T23:38:50Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2501.09686","created_at":"2026-05-17T23:38:50Z"},{"alias_kind":"pith_short_12","alias_value":"PBT2I4KORUUA","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"PBT2I4KORUUAHC2O","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"PBT2I4KO","created_at":"2026-05-18T12:33:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2025:PBT2I4KORUUAHC2OC2BVYEPYJJ","target":"record","payload":{"canonical_record":{"source":{"id":"2501.09686","kind":"arxiv","version":3},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2025-01-16T17:37:58Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"27a29be91192a11f36ffa1b46e5ee199fa483d41b5aac49cfac0e14c1b975c54","abstract_canon_sha256":"282c5a48b28b73fee08160a2e957058b7f8c773d182bcfbe789042d75bb24b76"},"schema_version":"1.0"},"canonical_sha256":"7867a4714e8d28038b4e16835c11f84a74534e244ec7b575293df3293f5be1cf","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:38:50.132881Z","signature_b64":"ftDai0C/J9skDzStEffjdBlIzA4Sm6g+W8XJcGsuc0le+eDZ41CheHUGLTwcMLgVU4l6pirJ3eBZMkOuNycJCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"7867a4714e8d28038b4e16835c11f84a74534e244ec7b575293df3293f5be1cf","last_reissued_at":"2026-05-17T23:38:50.132380Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:38:50.132380Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2501.09686","source_version":3,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:50Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"/wCw04b+kk5JmIrvAxxiVLiqzsqr9EjFgWjb5hi4MDL3Zs6lWpEk7FIdgEG+U46k8IRItGVfkp0n0QbZk+LSAQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-01T00:56:23.933693Z"},"content_sha256":"c02a8166772577c659fbd7cab967749f8b14a49f8af99adfb40e287bb555b064","schema_version":"1.0","event_id":"sha256:c02a8166772577c659fbd7cab967749f8b14a49f8af99adfb40e287bb555b064"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2025:PBT2I4KORUUAHC2OC2BVYEPYJJ","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Towards Large Reasoning Models: A Survey of Reinforced Reasoning with Large Language Models","license":"http://creativecommons.org/licenses/by/4.0/","headline":"Reinforcement learning on reasoning trajectories combined with test-time token scaling points toward Large Reasoning Models.","cross_cats":["cs.CL"],"primary_cat":"cs.AI","authors_text":"Chen Gao, Chenyang Shao, Fanjin Meng, Fengli Xu, Jiahui Gong, Jie Feng, Jingwei Wang, Jingyi Wang, Qianyue Hao, Qinglong Yang, Sijian Ren, Tianjian Ouyang, Xiaochong Lan, Xinyuan Hu, Yiwen Song, Yong Li, Yu Li, Yunke Zhang, Yuwei Yan, Zefang Zong","submitted_at":"2025-01-16T17:37:58Z","abstract_excerpt":"Language has long been conceived as an essential tool for human reasoning. The breakthrough of Large Language Models (LLMs) has sparked significant research interest in leveraging these models to tackle complex reasoning tasks. Researchers have moved beyond simple autoregressive token generation by introducing the concept of \"thought\" -- a sequence of tokens representing intermediate steps in the reasoning process. This innovative paradigm enables LLMs' to mimic complex human reasoning processes, such as tree search and reflective thinking. Recently, an emerging trend of learning to reason has"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"The train-time and test-time scaling combined to show a new research frontier -- a path toward Large Reasoning Model. The introduction of OpenAI's o1 series marks a significant milestone in this research direction.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That reinforcement learning applied to reasoning trajectories will reliably expand LLMs' reasoning capacity without introducing systematic biases or hallucinations that are harder to detect than in standard generation.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"The paper surveys reinforced reasoning techniques for LLMs, covering automated data construction, learning-to-reason methods, and test-time scaling as steps toward Large Reasoning Models.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Reinforcement learning on reasoning trajectories combined with test-time token scaling points toward Large Reasoning Models.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"cdeed429b10030e6db5fc938354b13e829f6fadd2d7e0fde87283cfcdba44374"},"source":{"id":"2501.09686","kind":"arxiv","version":3},"verdict":{"id":"0d8839db-dc2b-4fe4-b8e1-cd0966467a8e","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-15T21:17:24.834699Z","strongest_claim":"The train-time and test-time scaling combined to show a new research frontier -- a path toward Large Reasoning Model. The introduction of OpenAI's o1 series marks a significant milestone in this research direction.","one_line_summary":"The paper surveys reinforced reasoning techniques for LLMs, covering automated data construction, learning-to-reason methods, and test-time scaling as steps toward Large Reasoning Models.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That reinforcement learning applied to reasoning trajectories will reliably expand LLMs' reasoning capacity without introducing systematic biases or hallucinations that are harder to detect than in standard generation.","pith_extraction_headline":"Reinforcement learning on reasoning trajectories combined with test-time token scaling points toward Large Reasoning Models."},"references":{"count":202,"sample":[{"doi":"","year":2024,"title":"Phi-4 Technical Report","work_id":"b6274271-7af9-4ee8-993b-ba1ba4205ba8","ref_index":1,"cited_arxiv_id":"2412.08905","is_internal_anchor":true},{"doi":"","year":2023,"title":"GPT-4 Technical Report","work_id":"b928e041-6991-4c08-8c81-0359e4097c7b","ref_index":2,"cited_arxiv_id":"2303.08774","is_internal_anchor":true},{"doi":"","year":2022,"title":"Do As I Can, Not As I Say: Grounding Language in Robotic Affordances","work_id":"037320f1-b0a9-4cbe-a639-bfb25409ce71","ref_index":3,"cited_arxiv_id":"2204.01691","is_internal_anchor":true},{"doi":"","year":2024,"title":"arXiv preprint arXiv:2402.10571 , year=","work_id":"aca16f09-1a50-46c9-ba22-07ebf249d309","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2019,"title":"Mathqa: Towards interpretable math word problem solving with operation-based formalisms, 2019","work_id":"3dedd8c2-047c-43aa-86ff-2b56d8a0722c","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":202,"snapshot_sha256":"673eed4b00add99e3e5dbaaaa4f06697e3718e5306d34b9426aab43069d108fb","internal_anchors":48},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"0d8839db-dc2b-4fe4-b8e1-cd0966467a8e"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:50Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"mzYcR6aEkeGm9z5EaACUtYPpN/6Yau9gJLQn+25SRgmjTWE7EqDWNsszwHelYOhWU7NRDkwwjMr7nF5Yf23aCw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-01T00:56:23.934567Z"},"content_sha256":"88935bf242870bf0ebec8e8cf108dc479d6bedb16d183e2e47537d34f79703b5","schema_version":"1.0","event_id":"sha256:88935bf242870bf0ebec8e8cf108dc479d6bedb16d183e2e47537d34f79703b5"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/PBT2I4KORUUAHC2OC2BVYEPYJJ/bundle.json","state_url":"https://pith.science/pith/PBT2I4KORUUAHC2OC2BVYEPYJJ/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/PBT2I4KORUUAHC2OC2BVYEPYJJ/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-01T00:56:23Z","links":{"resolver":"https://pith.science/pith/PBT2I4KORUUAHC2OC2BVYEPYJJ","bundle":"https://pith.science/pith/PBT2I4KORUUAHC2OC2BVYEPYJJ/bundle.json","state":"https://pith.science/pith/PBT2I4KORUUAHC2OC2BVYEPYJJ/state.json","well_known_bundle":"https://pith.science/.well-known/pith/PBT2I4KORUUAHC2OC2BVYEPYJJ/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:PBT2I4KORUUAHC2OC2BVYEPYJJ","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"282c5a48b28b73fee08160a2e957058b7f8c773d182bcfbe789042d75bb24b76","cross_cats_sorted":["cs.CL"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2025-01-16T17:37:58Z","title_canon_sha256":"27a29be91192a11f36ffa1b46e5ee199fa483d41b5aac49cfac0e14c1b975c54"},"schema_version":"1.0","source":{"id":"2501.09686","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2501.09686","created_at":"2026-05-17T23:38:50Z"},{"alias_kind":"arxiv_version","alias_value":"2501.09686v3","created_at":"2026-05-17T23:38:50Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2501.09686","created_at":"2026-05-17T23:38:50Z"},{"alias_kind":"pith_short_12","alias_value":"PBT2I4KORUUA","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"PBT2I4KORUUAHC2O","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"PBT2I4KO","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:88935bf242870bf0ebec8e8cf108dc479d6bedb16d183e2e47537d34f79703b5","target":"graph","created_at":"2026-05-17T23:38:50Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"The train-time and test-time scaling combined to show a new research frontier -- a path toward Large Reasoning Model. The introduction of OpenAI's o1 series marks a significant milestone in this research direction."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That reinforcement learning applied to reasoning trajectories will reliably expand LLMs' reasoning capacity without introducing systematic biases or hallucinations that are harder to detect than in standard generation."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"The paper surveys reinforced reasoning techniques for LLMs, covering automated data construction, learning-to-reason methods, and test-time scaling as steps toward Large Reasoning Models."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Reinforcement learning on reasoning trajectories combined with test-time token scaling points toward Large Reasoning Models."}],"snapshot_sha256":"cdeed429b10030e6db5fc938354b13e829f6fadd2d7e0fde87283cfcdba44374"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Language has long been conceived as an essential tool for human reasoning. The breakthrough of Large Language Models (LLMs) has sparked significant research interest in leveraging these models to tackle complex reasoning tasks. Researchers have moved beyond simple autoregressive token generation by introducing the concept of \"thought\" -- a sequence of tokens representing intermediate steps in the reasoning process. This innovative paradigm enables LLMs' to mimic complex human reasoning processes, such as tree search and reflective thinking. Recently, an emerging trend of learning to reason has","authors_text":"Chen Gao, Chenyang Shao, Fanjin Meng, Fengli Xu, Jiahui Gong, Jie Feng, Jingwei Wang, Jingyi Wang, Qianyue Hao, Qinglong Yang, Sijian Ren, Tianjian Ouyang, Xiaochong Lan, Xinyuan Hu, Yiwen Song, Yong Li, Yu Li, Yunke Zhang, Yuwei Yan, Zefang Zong","cross_cats":["cs.CL"],"headline":"Reinforcement learning on reasoning trajectories combined with test-time token scaling points toward Large Reasoning Models.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2025-01-16T17:37:58Z","title":"Towards Large Reasoning Models: A Survey of Reinforced Reasoning with Large Language Models"},"references":{"count":202,"internal_anchors":48,"resolved_work":202,"sample":[{"cited_arxiv_id":"2412.08905","doi":"","is_internal_anchor":true,"ref_index":1,"title":"Phi-4 Technical Report","work_id":"b6274271-7af9-4ee8-993b-ba1ba4205ba8","year":2024},{"cited_arxiv_id":"2303.08774","doi":"","is_internal_anchor":true,"ref_index":2,"title":"GPT-4 Technical Report","work_id":"b928e041-6991-4c08-8c81-0359e4097c7b","year":2023},{"cited_arxiv_id":"2204.01691","doi":"","is_internal_anchor":true,"ref_index":3,"title":"Do As I Can, Not As I Say: Grounding Language in Robotic Affordances","work_id":"037320f1-b0a9-4cbe-a639-bfb25409ce71","year":2022},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"arXiv preprint arXiv:2402.10571 , year=","work_id":"aca16f09-1a50-46c9-ba22-07ebf249d309","year":2024},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"Mathqa: Towards interpretable math word problem solving with operation-based formalisms, 2019","work_id":"3dedd8c2-047c-43aa-86ff-2b56d8a0722c","year":2019}],"snapshot_sha256":"673eed4b00add99e3e5dbaaaa4f06697e3718e5306d34b9426aab43069d108fb"},"source":{"id":"2501.09686","kind":"arxiv","version":3},"verdict":{"created_at":"2026-05-15T21:17:24.834699Z","id":"0d8839db-dc2b-4fe4-b8e1-cd0966467a8e","model_set":{"reader":"grok-4.3"},"one_line_summary":"The paper surveys reinforced reasoning techniques for LLMs, covering automated data construction, learning-to-reason methods, and test-time scaling as steps toward Large Reasoning Models.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Reinforcement learning on reasoning trajectories combined with test-time token scaling points toward Large Reasoning Models.","strongest_claim":"The train-time and test-time scaling combined to show a new research frontier -- a path toward Large Reasoning Model. The introduction of OpenAI's o1 series marks a significant milestone in this research direction.","weakest_assumption":"That reinforcement learning applied to reasoning trajectories will reliably expand LLMs' reasoning capacity without introducing systematic biases or hallucinations that are harder to detect than in standard generation."}},"verdict_id":"0d8839db-dc2b-4fe4-b8e1-cd0966467a8e"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:c02a8166772577c659fbd7cab967749f8b14a49f8af99adfb40e287bb555b064","target":"record","created_at":"2026-05-17T23:38:50Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"282c5a48b28b73fee08160a2e957058b7f8c773d182bcfbe789042d75bb24b76","cross_cats_sorted":["cs.CL"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2025-01-16T17:37:58Z","title_canon_sha256":"27a29be91192a11f36ffa1b46e5ee199fa483d41b5aac49cfac0e14c1b975c54"},"schema_version":"1.0","source":{"id":"2501.09686","kind":"arxiv","version":3}},"canonical_sha256":"7867a4714e8d28038b4e16835c11f84a74534e244ec7b575293df3293f5be1cf","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"7867a4714e8d28038b4e16835c11f84a74534e244ec7b575293df3293f5be1cf","first_computed_at":"2026-05-17T23:38:50.132380Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:38:50.132380Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"ftDai0C/J9skDzStEffjdBlIzA4Sm6g+W8XJcGsuc0le+eDZ41CheHUGLTwcMLgVU4l6pirJ3eBZMkOuNycJCg==","signature_status":"signed_v1","signed_at":"2026-05-17T23:38:50.132881Z","signed_message":"canonical_sha256_bytes"},"source_id":"2501.09686","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:c02a8166772577c659fbd7cab967749f8b14a49f8af99adfb40e287bb555b064","sha256:88935bf242870bf0ebec8e8cf108dc479d6bedb16d183e2e47537d34f79703b5"],"state_sha256":"c2b20d999d2a8355c4a7bbbf242152ffa2ab5d44e723e14e65dd49bc914aa234"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"6YncXmvn4ypA5jAqdl1x0nw6DTrMgoIZ9q1/cRhPMbcZ/+masqOcg+QxE4fzjH3DoCW6xF6JO6KGgvivEa6TBg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-01T00:56:23.937649Z","bundle_sha256":"326c4764e3216c61e2dade8e4ad1487444da1bd632ef580cdac6d4db082111f6"}}