{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:BNMDKKCE6ACDCGB24WV6QR2UHK","short_pith_number":"pith:BNMDKKCE","schema_version":"1.0","canonical_sha256":"0b58352844f00431183ae5abe847543aa8904888c20febfc08d92574fc77f68a","source":{"kind":"arxiv","id":"2605.03344","version":2},"attestation_state":"computed","paper":{"title":"RAG over Thinking Traces Can Improve Reasoning Tasks","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"Retrieving thinking traces from problem-solving attempts improves reasoning performance on math and code benchmarks.","cross_cats":["cs.AI","cs.CL"],"primary_cat":"cs.IR","authors_text":"Matei Zaharia, Negar Arabzadeh, Sewon Min, Wenjie Ma","submitted_at":"2026-05-05T04:03:28Z","abstract_excerpt":"Retrieval-augmented generation (RAG) has proven effective for knowledge-intensive tasks, but is widely believed to offer limited benefit for reasoning-intensive problems such as math and code generation. We challenge this assumption by showing that the limitation lies not in RAG itself, but in the choice of corpus. Instead of retrieving documents, we propose retrieving thinking traces, i.e., intermediate thinking trajectories generated during problem solving attempts. We show that thinking traces are already a strong retrieval source, and further introduce T3, an offline method that transforms"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.03344","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2026-05-05T04:03:28Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"5e890a4c40583b848108dcf231ec24392004f1045164a61c171a89be9de09ef2","abstract_canon_sha256":"bfbff81563853940b3c3a52b262a2bcd53b55bdf33a5b2c13ec22862c7c421c8"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-10T01:08:36.134386Z","signature_b64":"UqfIih2xiAxwYGeV6Df4ENzZaP/9hv1XomNF5tJOeRmWyN5pQpQ0KcK7La8/0Vo+/9ZdUhMjqHc+N7Q3dOPVAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"0b58352844f00431183ae5abe847543aa8904888c20febfc08d92574fc77f68a","last_reissued_at":"2026-06-10T01:08:36.133373Z","signature_status":"signed_v1","first_computed_at":"2026-06-10T01:08:36.133373Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"RAG over Thinking Traces Can Improve Reasoning Tasks","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"Retrieving thinking traces from problem-solving attempts improves reasoning performance on math and code benchmarks.","cross_cats":["cs.AI","cs.CL"],"primary_cat":"cs.IR","authors_text":"Matei Zaharia, Negar Arabzadeh, Sewon Min, Wenjie Ma","submitted_at":"2026-05-05T04:03:28Z","abstract_excerpt":"Retrieval-augmented generation (RAG) has proven effective for knowledge-intensive tasks, but is widely believed to offer limited benefit for reasoning-intensive problems such as math and code generation. We challenge this assumption by showing that the limitation lies not in RAG itself, but in the choice of corpus. Instead of retrieving documents, we propose retrieving thinking traces, i.e., intermediate thinking trajectories generated during problem solving attempts. We show that thinking traces are already a strong retrieval source, and further introduce T3, an offline method that transforms"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Using these traces as a corpus, a simple retrieve-then-generate pipeline consistently improves reasoning performance across strong models and benchmarks such as AIME 2025--2026, LiveCodeBench, and GPQA-Diamond, outperforming both non-RAG baselines and retrieval over standard web corpora.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That thinking traces generated during problem-solving attempts contain generalizable, high-quality reasoning signals that transfer usefully to new problems and different models without introducing systematic errors or biases from the trace-generation process itself.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"RAG over structured thinking traces boosts LLM reasoning on AIME, LiveCodeBench, and GPQA, with relative gains up to 56% and little added cost.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Retrieving thinking traces from problem-solving attempts improves reasoning performance on math and code benchmarks.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"afb9248f274c2dff62aecc0246ad0e02c214c9762d417c5378d54df38e35aeb8"},"source":{"id":"2605.03344","kind":"arxiv","version":2},"verdict":{"id":"3c141420-5174-488a-ac11-861bf6d73167","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-07T14:29:33.071742Z","strongest_claim":"Using these traces as a corpus, a simple retrieve-then-generate pipeline consistently improves reasoning performance across strong models and benchmarks such as AIME 2025--2026, LiveCodeBench, and GPQA-Diamond, outperforming both non-RAG baselines and retrieval over standard web corpora.","one_line_summary":"RAG over structured thinking traces boosts LLM reasoning on AIME, LiveCodeBench, and GPQA, with relative gains up to 56% and little added cost.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That thinking traces generated during problem-solving attempts contain generalizable, high-quality reasoning signals that transfer usefully to new problems and different models without introducing systematic errors or biases from the trace-generation process itself.","pith_extraction_headline":"Retrieving thinking traces from problem-solving attempts improves reasoning performance on math and code benchmarks."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.03344/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"ai_meta_artifact","ran_at":"2026-05-20T14:33:53.643475Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"doi_title_agreement","ran_at":"2026-05-20T01:31:21.347325Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"doi_compliance","ran_at":"2026-05-19T15:28:07.283998Z","status":"completed","version":"1.0.0","findings_count":0}],"snapshot_sha256":"e8cf0726fe47c89e7bac061ed33fb057f29857783c3e2c7feff652dc0e575c36"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.03344","created_at":"2026-06-10T01:08:36.133518+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.03344v2","created_at":"2026-06-10T01:08:36.133518+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.03344","created_at":"2026-06-10T01:08:36.133518+00:00"},{"alias_kind":"pith_short_12","alias_value":"BNMDKKCE6ACD","created_at":"2026-06-10T01:08:36.133518+00:00"},{"alias_kind":"pith_short_16","alias_value":"BNMDKKCE6ACDCGB2","created_at":"2026-06-10T01:08:36.133518+00:00"},{"alias_kind":"pith_short_8","alias_value":"BNMDKKCE","created_at":"2026-06-10T01:08:36.133518+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"2605.27361","citing_title":"Natural Language Query to Configuration for Retrieval Agents","ref_index":2,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/BNMDKKCE6ACDCGB24WV6QR2UHK","json":"https://pith.science/pith/BNMDKKCE6ACDCGB24WV6QR2UHK.json","graph_json":"https://pith.science/api/pith-number/BNMDKKCE6ACDCGB24WV6QR2UHK/graph.json","events_json":"https://pith.science/api/pith-number/BNMDKKCE6ACDCGB24WV6QR2UHK/events.json","paper":"https://pith.science/paper/BNMDKKCE"},"agent_actions":{"view_html":"https://pith.science/pith/BNMDKKCE6ACDCGB24WV6QR2UHK","download_json":"https://pith.science/pith/BNMDKKCE6ACDCGB24WV6QR2UHK.json","view_paper":"https://pith.science/paper/BNMDKKCE","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.03344&json=true","fetch_graph":"https://pith.science/api/pith-number/BNMDKKCE6ACDCGB24WV6QR2UHK/graph.json","fetch_events":"https://pith.science/api/pith-number/BNMDKKCE6ACDCGB24WV6QR2UHK/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/BNMDKKCE6ACDCGB24WV6QR2UHK/action/timestamp_anchor","attest_storage":"https://pith.science/pith/BNMDKKCE6ACDCGB24WV6QR2UHK/action/storage_attestation","attest_author":"https://pith.science/pith/BNMDKKCE6ACDCGB24WV6QR2UHK/action/author_attestation","sign_citation":"https://pith.science/pith/BNMDKKCE6ACDCGB24WV6QR2UHK/action/citation_signature","submit_replication":"https://pith.science/pith/BNMDKKCE6ACDCGB24WV6QR2UHK/action/replication_record"}},"created_at":"2026-06-10T01:08:36.133518+00:00","updated_at":"2026-06-10T01:08:36.133518+00:00"}