{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:VVP5JLKRP6R6NKNAFD5MKVPZ27","short_pith_number":"pith:VVP5JLKR","schema_version":"1.0","canonical_sha256":"ad5fd4ad517fa3e6a9a028fac555f9d7f896a0f57a905d372b5c5e6cde013af5","source":{"kind":"arxiv","id":"2512.07843","version":2},"attestation_state":"computed","paper":{"title":"ThreadWeaver: Adaptive Threading for Efficient Parallel Reasoning in Language Models","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.CL"],"primary_cat":"cs.LG","authors_text":"Adam Yala, Alane Suhr, Felix Juefei-Xu, Long Lian, Sida Wang, Trevor Darrell, Tsu-Jui Fu, Xiuyu Li, Xi Victoria Lin, Yuandong Tian","submitted_at":"2025-11-24T18:55:59Z","abstract_excerpt":"Scaling inference-time computation has enabled Large Language Models (LLMs) to achieve strong reasoning performance, but their inherently sequential decoding incurs substantial latency, motivating parallelization of the generation process. However, existing parallel reasoning approaches suffer from performance degradation compared to their sequential counterparts, and often rely on specialized inference engines. We introduce ThreadWeaver, a framework for adaptive parallel reasoning that matches the accuracy of comparably sized sequential reasoning models while significantly reducing inference "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2512.07843","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-11-24T18:55:59Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"b4ed3e48a5829a2507ef2101586f4858b583fa7bae716cd20d4a2ccf070e1cf5","abstract_canon_sha256":"d65c89dc229d522440c05b63904b533bbc8fb1a2a0c5cf5c7699252c164c9068"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-03T00:16:51.614033Z","signature_b64":"xpk2+Je+3WIv6RkabF8e55AkBW55yan0nZn6AH5KVZ1qmV0HF2ta4dQU8KhgQxjOI3WVDAKZbLobqRJBdTsCBw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"ad5fd4ad517fa3e6a9a028fac555f9d7f896a0f57a905d372b5c5e6cde013af5","last_reissued_at":"2026-07-03T00:16:51.613550Z","signature_status":"signed_v1","first_computed_at":"2026-07-03T00:16:51.613550Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"ThreadWeaver: Adaptive Threading for Efficient Parallel Reasoning in Language Models","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.CL"],"primary_cat":"cs.LG","authors_text":"Adam Yala, Alane Suhr, Felix Juefei-Xu, Long Lian, Sida Wang, Trevor Darrell, Tsu-Jui Fu, Xiuyu Li, Xi Victoria Lin, Yuandong Tian","submitted_at":"2025-11-24T18:55:59Z","abstract_excerpt":"Scaling inference-time computation has enabled Large Language Models (LLMs) to achieve strong reasoning performance, but their inherently sequential decoding incurs substantial latency, motivating parallelization of the generation process. However, existing parallel reasoning approaches suffer from performance degradation compared to their sequential counterparts, and often rely on specialized inference engines. We introduce ThreadWeaver, a framework for adaptive parallel reasoning that matches the accuracy of comparably sized sequential reasoning models while significantly reducing inference "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2512.07843","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2512.07843/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2512.07843","created_at":"2026-07-03T00:16:51.613608+00:00"},{"alias_kind":"arxiv_version","alias_value":"2512.07843v2","created_at":"2026-07-03T00:16:51.613608+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2512.07843","created_at":"2026-07-03T00:16:51.613608+00:00"},{"alias_kind":"pith_short_12","alias_value":"VVP5JLKRP6R6","created_at":"2026-07-03T00:16:51.613608+00:00"},{"alias_kind":"pith_short_16","alias_value":"VVP5JLKRP6R6NKNA","created_at":"2026-07-03T00:16:51.613608+00:00"},{"alias_kind":"pith_short_8","alias_value":"VVP5JLKR","created_at":"2026-07-03T00:16:51.613608+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":3,"internal_anchor_count":3,"sample":[{"citing_arxiv_id":"2606.31779","citing_title":"Bridging the Gap Between Latent and Explicit Reasoning with Looped Transformers","ref_index":150,"is_internal_anchor":true},{"citing_arxiv_id":"2605.27570","citing_title":"LaneRoPE: Positional Encoding for Collaborative Parallel Reasoning and Generation","ref_index":15,"is_internal_anchor":true},{"citing_arxiv_id":"2605.06914","citing_title":"Regulating Branch Parallelism in LLM Serving","ref_index":10,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/VVP5JLKRP6R6NKNAFD5MKVPZ27","json":"https://pith.science/pith/VVP5JLKRP6R6NKNAFD5MKVPZ27.json","graph_json":"https://pith.science/api/pith-number/VVP5JLKRP6R6NKNAFD5MKVPZ27/graph.json","events_json":"https://pith.science/api/pith-number/VVP5JLKRP6R6NKNAFD5MKVPZ27/events.json","paper":"https://pith.science/paper/VVP5JLKR"},"agent_actions":{"view_html":"https://pith.science/pith/VVP5JLKRP6R6NKNAFD5MKVPZ27","download_json":"https://pith.science/pith/VVP5JLKRP6R6NKNAFD5MKVPZ27.json","view_paper":"https://pith.science/paper/VVP5JLKR","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2512.07843&json=true","fetch_graph":"https://pith.science/api/pith-number/VVP5JLKRP6R6NKNAFD5MKVPZ27/graph.json","fetch_events":"https://pith.science/api/pith-number/VVP5JLKRP6R6NKNAFD5MKVPZ27/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/VVP5JLKRP6R6NKNAFD5MKVPZ27/action/timestamp_anchor","attest_storage":"https://pith.science/pith/VVP5JLKRP6R6NKNAFD5MKVPZ27/action/storage_attestation","attest_author":"https://pith.science/pith/VVP5JLKRP6R6NKNAFD5MKVPZ27/action/author_attestation","sign_citation":"https://pith.science/pith/VVP5JLKRP6R6NKNAFD5MKVPZ27/action/citation_signature","submit_replication":"https://pith.science/pith/VVP5JLKRP6R6NKNAFD5MKVPZ27/action/replication_record"}},"created_at":"2026-07-03T00:16:51.613608+00:00","updated_at":"2026-07-03T00:16:51.613608+00:00"}