{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2025:Y724IUS3R43ZC5N6FQA2E34LPF","short_pith_number":"pith:Y724IUS3","canonical_record":{"source":{"id":"2504.01296","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2025-04-02T01:59:26Z","cross_cats_sorted":[],"title_canon_sha256":"8ac750f31e605e586bf46d52ef3b42890890427fa911befb0f7ce9074a4bcba2","abstract_canon_sha256":"ac003bb650b4416d87af90a5efc31ec072ca7afd6947d4adfcc624afa51623a6"},"schema_version":"1.0"},"canonical_sha256":"c7f5c4525b8f379175be2c01a26f8b797279fcbc3c81d5a12b78a0690de43b95","source":{"kind":"arxiv","id":"2504.01296","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2504.01296","created_at":"2026-05-17T23:38:14Z"},{"alias_kind":"arxiv_version","alias_value":"2504.01296v1","created_at":"2026-05-17T23:38:14Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2504.01296","created_at":"2026-05-17T23:38:14Z"},{"alias_kind":"pith_short_12","alias_value":"Y724IUS3R43Z","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"Y724IUS3R43ZC5N6","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"Y724IUS3","created_at":"2026-05-18T12:33:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2025:Y724IUS3R43ZC5N6FQA2E34LPF","target":"record","payload":{"canonical_record":{"source":{"id":"2504.01296","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2025-04-02T01:59:26Z","cross_cats_sorted":[],"title_canon_sha256":"8ac750f31e605e586bf46d52ef3b42890890427fa911befb0f7ce9074a4bcba2","abstract_canon_sha256":"ac003bb650b4416d87af90a5efc31ec072ca7afd6947d4adfcc624afa51623a6"},"schema_version":"1.0"},"canonical_sha256":"c7f5c4525b8f379175be2c01a26f8b797279fcbc3c81d5a12b78a0690de43b95","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:38:14.733747Z","signature_b64":"vG5+DsBNtixFjkmuZGbrdOp6/XGveNd/3TNoECxW6bwuXnZVMwpsJrRJ3FTQINtt23C3u/qG7Q+rPjy22CvkCA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"c7f5c4525b8f379175be2c01a26f8b797279fcbc3c81d5a12b78a0690de43b95","last_reissued_at":"2026-05-17T23:38:14.733060Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:38:14.733060Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2504.01296","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:14Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"GmZaCwQ7Z3iieWSe42H4fyThKEDlHvO6gueHWyu7WB7noom0SjT0oxRdX1jgBQ2V9KAF/CM3PJ2sMx9DUbhYDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T20:27:03.920585Z"},"content_sha256":"0ede29797b1aa08e9946ac6c568279bc179017fc9dbd88b9ca3ba18d955362ef","schema_version":"1.0","event_id":"sha256:0ede29797b1aa08e9946ac6c568279bc179017fc9dbd88b9ca3ba18d955362ef"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2025:Y724IUS3R43ZC5N6FQA2E34LPF","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"ThinkPrune: Pruning Long Chain-of-Thought of LLMs via Reinforcement Learning","license":"http://creativecommons.org/licenses/by/4.0/","headline":"Reinforcement learning with token limits can cut LLM chain-of-thought length in half while dropping accuracy by only two percent on math benchmarks.","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Bairu Hou, Jacob Andreas, Jiabao Ji, Kaizhi Qian, Shiyu Chang, Yang Zhang, Yujian Liu","submitted_at":"2025-04-02T01:59:26Z","abstract_excerpt":"We present ThinkPrune, a simple yet effective method for pruning the thinking length for long-thinking LLMs, which has been found to often produce inefficient and redundant thinking processes. Existing preliminary explorations of reducing thinking length primarily focus on forcing the thinking process to early exit, rather than adapting the LLM to optimize and consolidate the thinking process, and therefore the length-performance tradeoff observed so far is sub-optimal. To fill this gap, ThinkPrune offers a simple solution that continuously trains the long-thinking LLMs via reinforcement learn"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"ThinkPrune results in a remarkable performance-length tradeoff -- on the AIME24 dataset, the reasoning length of DeepSeek-R1-Distill-Qwen-1.5B can be reduced by half with only 2% drop in performance.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That the RL objective with the added token-limit penalty will converge to a policy that preserves core reasoning capability rather than learning superficial shortcuts that only work on the training distribution.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"ThinkPrune halves reasoning length on AIME24 for DeepSeek-R1-Distill-Qwen-1.5B with only 2% performance drop by applying iterative RL under token limits.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Reinforcement learning with token limits can cut LLM chain-of-thought length in half while dropping accuracy by only two percent on math benchmarks.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"6edb23556a7e21cd27f4951a96f64fd2f1f7206d8f38e0c7432a0821528d78cb"},"source":{"id":"2504.01296","kind":"arxiv","version":1},"verdict":{"id":"c13209b5-4e8f-4c51-aefe-76d27a1bd202","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-17T07:11:17.367727Z","strongest_claim":"ThinkPrune results in a remarkable performance-length tradeoff -- on the AIME24 dataset, the reasoning length of DeepSeek-R1-Distill-Qwen-1.5B can be reduced by half with only 2% drop in performance.","one_line_summary":"ThinkPrune halves reasoning length on AIME24 for DeepSeek-R1-Distill-Qwen-1.5B with only 2% performance drop by applying iterative RL under token limits.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That the RL objective with the added token-limit penalty will converge to a policy that preserves core reasoning capability rather than learning superficial shortcuts that only work on the training distribution.","pith_extraction_headline":"Reinforcement learning with token limits can cut LLM chain-of-thought length in half while dropping accuracy by only two percent on math benchmarks."},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":2,"snapshot_sha256":"4756eb27130b8a59e9b62e464b120b3e5aa2c678d970c22064d2f1deb476d005"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"c13209b5-4e8f-4c51-aefe-76d27a1bd202"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:14Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"nicuVOfEo6tFNUItL5v+xaL8PA5UmkHz0ZFc8wj4bi8Qpdr4NUyJ7d1OKlhgD0XQUUR6EhlRcvB8H883QdOVAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T20:27:03.921390Z"},"content_sha256":"72e0641184e1cc51d6eb2c705aca4334499ec798810d5cf83e7c7613197acdf1","schema_version":"1.0","event_id":"sha256:72e0641184e1cc51d6eb2c705aca4334499ec798810d5cf83e7c7613197acdf1"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/Y724IUS3R43ZC5N6FQA2E34LPF/bundle.json","state_url":"https://pith.science/pith/Y724IUS3R43ZC5N6FQA2E34LPF/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/Y724IUS3R43ZC5N6FQA2E34LPF/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-26T20:27:03Z","links":{"resolver":"https://pith.science/pith/Y724IUS3R43ZC5N6FQA2E34LPF","bundle":"https://pith.science/pith/Y724IUS3R43ZC5N6FQA2E34LPF/bundle.json","state":"https://pith.science/pith/Y724IUS3R43ZC5N6FQA2E34LPF/state.json","well_known_bundle":"https://pith.science/.well-known/pith/Y724IUS3R43ZC5N6FQA2E34LPF/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:Y724IUS3R43ZC5N6FQA2E34LPF","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"ac003bb650b4416d87af90a5efc31ec072ca7afd6947d4adfcc624afa51623a6","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2025-04-02T01:59:26Z","title_canon_sha256":"8ac750f31e605e586bf46d52ef3b42890890427fa911befb0f7ce9074a4bcba2"},"schema_version":"1.0","source":{"id":"2504.01296","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2504.01296","created_at":"2026-05-17T23:38:14Z"},{"alias_kind":"arxiv_version","alias_value":"2504.01296v1","created_at":"2026-05-17T23:38:14Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2504.01296","created_at":"2026-05-17T23:38:14Z"},{"alias_kind":"pith_short_12","alias_value":"Y724IUS3R43Z","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"Y724IUS3R43ZC5N6","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"Y724IUS3","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:72e0641184e1cc51d6eb2c705aca4334499ec798810d5cf83e7c7613197acdf1","target":"graph","created_at":"2026-05-17T23:38:14Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"ThinkPrune results in a remarkable performance-length tradeoff -- on the AIME24 dataset, the reasoning length of DeepSeek-R1-Distill-Qwen-1.5B can be reduced by half with only 2% drop in performance."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That the RL objective with the added token-limit penalty will converge to a policy that preserves core reasoning capability rather than learning superficial shortcuts that only work on the training distribution."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"ThinkPrune halves reasoning length on AIME24 for DeepSeek-R1-Distill-Qwen-1.5B with only 2% performance drop by applying iterative RL under token limits."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Reinforcement learning with token limits can cut LLM chain-of-thought length in half while dropping accuracy by only two percent on math benchmarks."}],"snapshot_sha256":"6edb23556a7e21cd27f4951a96f64fd2f1f7206d8f38e0c7432a0821528d78cb"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"4756eb27130b8a59e9b62e464b120b3e5aa2c678d970c22064d2f1deb476d005"},"paper":{"abstract_excerpt":"We present ThinkPrune, a simple yet effective method for pruning the thinking length for long-thinking LLMs, which has been found to often produce inefficient and redundant thinking processes. Existing preliminary explorations of reducing thinking length primarily focus on forcing the thinking process to early exit, rather than adapting the LLM to optimize and consolidate the thinking process, and therefore the length-performance tradeoff observed so far is sub-optimal. To fill this gap, ThinkPrune offers a simple solution that continuously trains the long-thinking LLMs via reinforcement learn","authors_text":"Bairu Hou, Jacob Andreas, Jiabao Ji, Kaizhi Qian, Shiyu Chang, Yang Zhang, Yujian Liu","cross_cats":[],"headline":"Reinforcement learning with token limits can cut LLM chain-of-thought length in half while dropping accuracy by only two percent on math benchmarks.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2025-04-02T01:59:26Z","title":"ThinkPrune: Pruning Long Chain-of-Thought of LLMs via Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2504.01296","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-17T07:11:17.367727Z","id":"c13209b5-4e8f-4c51-aefe-76d27a1bd202","model_set":{"reader":"grok-4.3"},"one_line_summary":"ThinkPrune halves reasoning length on AIME24 for DeepSeek-R1-Distill-Qwen-1.5B with only 2% performance drop by applying iterative RL under token limits.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Reinforcement learning with token limits can cut LLM chain-of-thought length in half while dropping accuracy by only two percent on math benchmarks.","strongest_claim":"ThinkPrune results in a remarkable performance-length tradeoff -- on the AIME24 dataset, the reasoning length of DeepSeek-R1-Distill-Qwen-1.5B can be reduced by half with only 2% drop in performance.","weakest_assumption":"That the RL objective with the added token-limit penalty will converge to a policy that preserves core reasoning capability rather than learning superficial shortcuts that only work on the training distribution."}},"verdict_id":"c13209b5-4e8f-4c51-aefe-76d27a1bd202"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:0ede29797b1aa08e9946ac6c568279bc179017fc9dbd88b9ca3ba18d955362ef","target":"record","created_at":"2026-05-17T23:38:14Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"ac003bb650b4416d87af90a5efc31ec072ca7afd6947d4adfcc624afa51623a6","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2025-04-02T01:59:26Z","title_canon_sha256":"8ac750f31e605e586bf46d52ef3b42890890427fa911befb0f7ce9074a4bcba2"},"schema_version":"1.0","source":{"id":"2504.01296","kind":"arxiv","version":1}},"canonical_sha256":"c7f5c4525b8f379175be2c01a26f8b797279fcbc3c81d5a12b78a0690de43b95","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"c7f5c4525b8f379175be2c01a26f8b797279fcbc3c81d5a12b78a0690de43b95","first_computed_at":"2026-05-17T23:38:14.733060Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:38:14.733060Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"vG5+DsBNtixFjkmuZGbrdOp6/XGveNd/3TNoECxW6bwuXnZVMwpsJrRJ3FTQINtt23C3u/qG7Q+rPjy22CvkCA==","signature_status":"signed_v1","signed_at":"2026-05-17T23:38:14.733747Z","signed_message":"canonical_sha256_bytes"},"source_id":"2504.01296","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:0ede29797b1aa08e9946ac6c568279bc179017fc9dbd88b9ca3ba18d955362ef","sha256:72e0641184e1cc51d6eb2c705aca4334499ec798810d5cf83e7c7613197acdf1"],"state_sha256":"ac5653cb516e45299c79b3d7f90df3672d52b305d6df8bec54d1ee7d039680da"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"1qDet/gnIghjLsQhGFqdbC0GULE1MezjVFFQf47AanmCk57R58qsiHHOCTTta5RDXt1YdA8uBK+9rzzHWx8kDw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-26T20:27:03.924980Z","bundle_sha256":"8973085294e98d3e5febb9d01f21d9f686e94c88eb30e4221671054481988895"}}