{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:IS2Y2FVNQ5CACKYP7PUWAEPTDP","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"e59522c92d3b0f71aafdef1fc393fd60031cc735838a4baa4cae25ef063974e1","cross_cats_sorted":["cs.CL"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2025-03-25T09:00:58Z","title_canon_sha256":"bf05ce1fc3a58133438a96c34ba9f399e45a1ef5ac857af372a738e3eca2b82e"},"schema_version":"1.0","source":{"id":"2503.19470","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2503.19470","created_at":"2026-05-17T23:38:47Z"},{"alias_kind":"arxiv_version","alias_value":"2503.19470v3","created_at":"2026-05-17T23:38:47Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2503.19470","created_at":"2026-05-17T23:38:47Z"},{"alias_kind":"pith_short_12","alias_value":"IS2Y2FVNQ5CA","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"IS2Y2FVNQ5CACKYP","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"IS2Y2FVN","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:bf12aa73654cb42323849e7402aacffaa17fe5cd7f6a92aed006fc1b068dac6d","target":"graph","created_at":"2026-05-17T23:38:47Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Our approach treats search operations as integral components of the reasoning chain, where when and how to perform searches is guided by text-based thinking, and search results subsequently influence further reasoning. Despite being trained on only one dataset, our models demonstrate strong generalizability across various benchmarks."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That outcome-based reinforcement learning rewards alone are sufficient to train effective search timing and integration without any supervised reasoning traces or explicit search supervision."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"ReSearch trains LLMs via RL to integrate search operations into reasoning steps, achieving strong generalization across benchmarks and eliciting reflection and self-correction without supervised reasoning data."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"ReSearch trains LLMs to interleave search operations with text reasoning using only outcome-based reinforcement learning rewards."}],"snapshot_sha256":"c8b56a6067d5761ab0bff66ab16b184e461c91af3e2d8e271ad996045df8b892"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Large Language Models (LLMs) have shown remarkable capabilities in reasoning, exemplified by the success of OpenAI-o1 and DeepSeek-R1. However, integrating reasoning with external search processes remains challenging, especially for complex multi-hop questions requiring multiple retrieval steps. We propose ReSearch, a novel framework that trains LLMs to Reason with Search via reinforcement learning without using any supervised data on reasoning steps. Our approach treats search operations as integral components of the reasoning chain, where when and how to perform searches is guided by text-ba","authors_text":"Chenzheng Zhu, Fan Yang, Haofen Wang, Haoze Sun, Huajun Chen, Jeff Z. Pan, Linzhuang Sun, Mingyang Chen, Tianpeng Li, Weipeng Chen, Wen Zhang, Yijie Zhou, Zenan Zhou","cross_cats":["cs.CL"],"headline":"ReSearch trains LLMs to interleave search operations with text reasoning using only outcome-based reinforcement learning rewards.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2025-03-25T09:00:58Z","title":"ReSearch: Learning to Reason with Search for LLMs via Reinforcement Learning"},"references":{"count":44,"internal_anchors":14,"resolved_work":44,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"Claude 3.7 sonnet and claude code, 2025","work_id":"34f8d3e0-abcb-40d4-8bed-60c970da1f8c","year":2025},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"Self-rag: Learning to retrieve, generate, and critique through self-reflection","work_id":"0a80e610-1315-461d-888b-efcd795f6ac2","year":2024},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"Rq-rag: Learning to refine queries for retrieval augmented generation","work_id":"5d12e4e2-f60c-4c61-aafa-e97863b41380","year":2024},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"Shuang Chen, Kaituo Feng, Hangting Chen, Wenxuan Huang, Dasen Dai, Quanxin Shou, Yunlong Lin, Xiangyu Yue, Shenghua Gao, and Tianyu Pang","work_id":"6c5542cb-0f99-4d92-85f4-f7b97cd42104","year":2024},{"cited_arxiv_id":"2501.12948","doi":"","is_internal_anchor":true,"ref_index":5,"title":"DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning","work_id":"e6b75ad5-2877-4168-97c8-710407094d20","year":2025}],"snapshot_sha256":"53932ec0eaf90a414548519649fe63f2f3a2428e592bab4dca2a5e35bdd4c6a7"},"source":{"id":"2503.19470","kind":"arxiv","version":3},"verdict":{"created_at":"2026-05-16T15:43:22.448661Z","id":"750f7f73-1453-415f-ad27-daae68bb00a0","model_set":{"reader":"grok-4.3"},"one_line_summary":"ReSearch trains LLMs via RL to integrate search operations into reasoning steps, achieving strong generalization across benchmarks and eliciting reflection and self-correction without supervised reasoning data.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"ReSearch trains LLMs to interleave search operations with text reasoning using only outcome-based reinforcement learning rewards.","strongest_claim":"Our approach treats search operations as integral components of the reasoning chain, where when and how to perform searches is guided by text-based thinking, and search results subsequently influence further reasoning. Despite being trained on only one dataset, our models demonstrate strong generalizability across various benchmarks.","weakest_assumption":"That outcome-based reinforcement learning rewards alone are sufficient to train effective search timing and integration without any supervised reasoning traces or explicit search supervision."}},"verdict_id":"750f7f73-1453-415f-ad27-daae68bb00a0"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:032e9282fa4d450d6c429a96cc530cd4ff8205f9c2478ef84b976fa71d7b5d82","target":"record","created_at":"2026-05-17T23:38:47Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"e59522c92d3b0f71aafdef1fc393fd60031cc735838a4baa4cae25ef063974e1","cross_cats_sorted":["cs.CL"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2025-03-25T09:00:58Z","title_canon_sha256":"bf05ce1fc3a58133438a96c34ba9f399e45a1ef5ac857af372a738e3eca2b82e"},"schema_version":"1.0","source":{"id":"2503.19470","kind":"arxiv","version":3}},"canonical_sha256":"44b58d16ad8744012b0ffbe96011f31bf53e5dbcc3f713af70316ef5b8f3a5f0","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"44b58d16ad8744012b0ffbe96011f31bf53e5dbcc3f713af70316ef5b8f3a5f0","first_computed_at":"2026-05-17T23:38:47.395691Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:38:47.395691Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"gNaEZn37FB16GUlOxdOlzWgPrbPYccnsUgYL6e6SX+nU/X4SMxCZ+HB8dHDSh8NDOpWG/9BSPoLPSzY9DmjUDw==","signature_status":"signed_v1","signed_at":"2026-05-17T23:38:47.396197Z","signed_message":"canonical_sha256_bytes"},"source_id":"2503.19470","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:032e9282fa4d450d6c429a96cc530cd4ff8205f9c2478ef84b976fa71d7b5d82","sha256:bf12aa73654cb42323849e7402aacffaa17fe5cd7f6a92aed006fc1b068dac6d"],"state_sha256":"89135774223bb8b100eb0ad00399ca3058f8c80c514add24aa414d35f3e64ac8"}