{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:W2QNV2A7XOV5DEKZPUCC4VXZLC","short_pith_number":"pith:W2QNV2A7","canonical_record":{"source":{"id":"2605.07501","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-08T09:37:30Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"0a6b985093118ec25d6a33452d3085c000bfc20123b8a966171b6709100a01e6","abstract_canon_sha256":"ed64316e54cb6d5e79165aced591829a0b149217ff7278987a7acaa20dd0c3b6"},"schema_version":"1.0"},"canonical_sha256":"b6a0dae81fbbabd191597d042e56f95881bcdc5954832c628af53ede889ab99d","source":{"kind":"arxiv","id":"2605.07501","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.07501","created_at":"2026-05-20T00:03:14Z"},{"alias_kind":"arxiv_version","alias_value":"2605.07501v2","created_at":"2026-05-20T00:03:14Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.07501","created_at":"2026-05-20T00:03:14Z"},{"alias_kind":"pith_short_12","alias_value":"W2QNV2A7XOV5","created_at":"2026-05-20T00:03:14Z"},{"alias_kind":"pith_short_16","alias_value":"W2QNV2A7XOV5DEKZ","created_at":"2026-05-20T00:03:14Z"},{"alias_kind":"pith_short_8","alias_value":"W2QNV2A7","created_at":"2026-05-20T00:03:14Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:W2QNV2A7XOV5DEKZPUCC4VXZLC","target":"record","payload":{"canonical_record":{"source":{"id":"2605.07501","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-08T09:37:30Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"0a6b985093118ec25d6a33452d3085c000bfc20123b8a966171b6709100a01e6","abstract_canon_sha256":"ed64316e54cb6d5e79165aced591829a0b149217ff7278987a7acaa20dd0c3b6"},"schema_version":"1.0"},"canonical_sha256":"b6a0dae81fbbabd191597d042e56f95881bcdc5954832c628af53ede889ab99d","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:03:14.957566Z","signature_b64":"bp++u7haAustiYe5kysvaxnmrCxJfGBThHiWeQJJ8uuBRAm2mWT075PyLqfrlC9peFtzyJKE+pZbYEEdOTRBCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"b6a0dae81fbbabd191597d042e56f95881bcdc5954832c628af53ede889ab99d","last_reissued_at":"2026-05-20T00:03:14.956712Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:03:14.956712Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.07501","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:03:14Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Qrlk4LStYsvF2OgaxE6vmGpFJ3ZmRC4pv34deHGwRYbG/YoL/h4B3joc0OK5vVqxAT5v0bs7U65AewAk8xS2AQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T18:20:58.711126Z"},"content_sha256":"256fb2b47d2f2b4c2145370f398ed8d99f829f6b63feab259dc817ec20ade074","schema_version":"1.0","event_id":"sha256:256fb2b47d2f2b4c2145370f398ed8d99f829f6b63feab259dc817ec20ade074"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:W2QNV2A7XOV5DEKZPUCC4VXZLC","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"ExpThink: Experience-Guided Reinforcement Learning for Adaptive Chain-of-Thought Compression","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"ExpThink applies experience-guided rewards and adaptive normalization in reinforcement learning to shorten chain-of-thought reasoning by up to 77% while increasing accuracy.","cross_cats":["cs.CL"],"primary_cat":"cs.LG","authors_text":"Haiwei Wang, Jinchang Luo, Jing Jin, Miaohui Wang, MingQuan Cheng, Tingcheng Bian, Wenyuan Jiang, Yuzhe Zhang","submitted_at":"2026-05-08T09:37:30Z","abstract_excerpt":"Large reasoning models (LRMs) achieve strong performance via extended chain-of-thought (CoT) reasoning, yet suffer from excessive token consumption and high inference latency. Existing reinforcement learning (RL) approaches for CoT compression rely on uniform, static length penalties that neglect model capability dynamics and problem-level difficulty variation. We propose \\textbf{ExpThink}\\xspace, an RL framework that addresses both dimensions through two complementary mechanisms. First, \\emph{experience-guided reward shaping} tracks the shortest correct solution found so far for each problem "},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Experiments on multiple mathematical reasoning benchmarks demonstrate that ExpThink reduces average response length by up to 77% while simultaneously improving accuracy, achieving up to 3× higher accuracy-efficiency ratio than the vanilla baseline.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That the three-tier reward based on tracked shortest correct solutions and the correct-count normalization produce stable, generalizable compression without overfitting to the specific benchmarks or introducing hidden accuracy trade-offs not captured in the reported metrics.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"ExpThink applies experience-tracked rewards and correct-count normalized advantages in RL to compress CoT reasoning, cutting length up to 77% while raising accuracy and efficiency ratio on math benchmarks.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"ExpThink applies experience-guided rewards and adaptive normalization in reinforcement learning to shorten chain-of-thought reasoning by up to 77% while increasing accuracy.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"f3a706344dfbb1618f4fd156909e1aa23033728c1672297686b7d01db60112c6"},"source":{"id":"2605.07501","kind":"arxiv","version":2},"verdict":{"id":"7e8e25ea-2cda-46b1-ad8a-7a4fc54b5ec4","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-11T01:56:57.476130Z","strongest_claim":"Experiments on multiple mathematical reasoning benchmarks demonstrate that ExpThink reduces average response length by up to 77% while simultaneously improving accuracy, achieving up to 3× higher accuracy-efficiency ratio than the vanilla baseline.","one_line_summary":"ExpThink applies experience-tracked rewards and correct-count normalized advantages in RL to compress CoT reasoning, cutting length up to 77% while raising accuracy and efficiency ratio on math benchmarks.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That the three-tier reward based on tracked shortest correct solutions and the correct-count normalization produce stable, generalizable compression without overfitting to the specific benchmarks or introducing hidden accuracy trade-offs not captured in the reported metrics.","pith_extraction_headline":"ExpThink applies experience-guided rewards and adaptive normalization in reinforcement learning to shorten chain-of-thought reasoning by up to 77% while increasing accuracy."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.07501/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"doi_title_agreement","ran_at":"2026-05-19T16:31:18.802467Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"doi_compliance","ran_at":"2026-05-19T11:44:04.787706Z","status":"completed","version":"1.0.0","findings_count":0}],"snapshot_sha256":"81db4c1e01426e47573c5363a720833f812d6f722fdbf0b7de6e6ceef34d67ea"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":2,"snapshot_sha256":"4f2a54e790f93447898133237b0b21dcc04936f0ba35aead09ce7bf665b77eb7"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"7e8e25ea-2cda-46b1-ad8a-7a4fc54b5ec4"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:03:14Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"FPb8KjbJUuiSqjppgb7NV0mNoCwQo6DOiIXwM+NvwHS6OdSAvnaRyBxlDFpgndjLBM2EPgtYncNdzyA6aEEpBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T18:20:58.711853Z"},"content_sha256":"26aa699439797bee7c00fa74cd9c36179450132c1ab20d556818dc773da35363","schema_version":"1.0","event_id":"sha256:26aa699439797bee7c00fa74cd9c36179450132c1ab20d556818dc773da35363"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/W2QNV2A7XOV5DEKZPUCC4VXZLC/bundle.json","state_url":"https://pith.science/pith/W2QNV2A7XOV5DEKZPUCC4VXZLC/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/W2QNV2A7XOV5DEKZPUCC4VXZLC/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-26T18:20:58Z","links":{"resolver":"https://pith.science/pith/W2QNV2A7XOV5DEKZPUCC4VXZLC","bundle":"https://pith.science/pith/W2QNV2A7XOV5DEKZPUCC4VXZLC/bundle.json","state":"https://pith.science/pith/W2QNV2A7XOV5DEKZPUCC4VXZLC/state.json","well_known_bundle":"https://pith.science/.well-known/pith/W2QNV2A7XOV5DEKZPUCC4VXZLC/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:W2QNV2A7XOV5DEKZPUCC4VXZLC","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"ed64316e54cb6d5e79165aced591829a0b149217ff7278987a7acaa20dd0c3b6","cross_cats_sorted":["cs.CL"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-08T09:37:30Z","title_canon_sha256":"0a6b985093118ec25d6a33452d3085c000bfc20123b8a966171b6709100a01e6"},"schema_version":"1.0","source":{"id":"2605.07501","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.07501","created_at":"2026-05-20T00:03:14Z"},{"alias_kind":"arxiv_version","alias_value":"2605.07501v2","created_at":"2026-05-20T00:03:14Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.07501","created_at":"2026-05-20T00:03:14Z"},{"alias_kind":"pith_short_12","alias_value":"W2QNV2A7XOV5","created_at":"2026-05-20T00:03:14Z"},{"alias_kind":"pith_short_16","alias_value":"W2QNV2A7XOV5DEKZ","created_at":"2026-05-20T00:03:14Z"},{"alias_kind":"pith_short_8","alias_value":"W2QNV2A7","created_at":"2026-05-20T00:03:14Z"}],"graph_snapshots":[{"event_id":"sha256:26aa699439797bee7c00fa74cd9c36179450132c1ab20d556818dc773da35363","target":"graph","created_at":"2026-05-20T00:03:14Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Experiments on multiple mathematical reasoning benchmarks demonstrate that ExpThink reduces average response length by up to 77% while simultaneously improving accuracy, achieving up to 3× higher accuracy-efficiency ratio than the vanilla baseline."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That the three-tier reward based on tracked shortest correct solutions and the correct-count normalization produce stable, generalizable compression without overfitting to the specific benchmarks or introducing hidden accuracy trade-offs not captured in the reported metrics."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"ExpThink applies experience-tracked rewards and correct-count normalized advantages in RL to compress CoT reasoning, cutting length up to 77% while raising accuracy and efficiency ratio on math benchmarks."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"ExpThink applies experience-guided rewards and adaptive normalization in reinforcement learning to shorten chain-of-thought reasoning by up to 77% while increasing accuracy."}],"snapshot_sha256":"f3a706344dfbb1618f4fd156909e1aa23033728c1672297686b7d01db60112c6"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"4f2a54e790f93447898133237b0b21dcc04936f0ba35aead09ce7bf665b77eb7"},"integrity":{"available":true,"clean":true,"detectors_run":[{"findings_count":0,"name":"doi_title_agreement","ran_at":"2026-05-19T16:31:18.802467Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"doi_compliance","ran_at":"2026-05-19T11:44:04.787706Z","status":"completed","version":"1.0.0"}],"endpoint":"/pith/2605.07501/integrity.json","findings":[],"snapshot_sha256":"81db4c1e01426e47573c5363a720833f812d6f722fdbf0b7de6e6ceef34d67ea","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Large reasoning models (LRMs) achieve strong performance via extended chain-of-thought (CoT) reasoning, yet suffer from excessive token consumption and high inference latency. Existing reinforcement learning (RL) approaches for CoT compression rely on uniform, static length penalties that neglect model capability dynamics and problem-level difficulty variation. We propose \\textbf{ExpThink}\\xspace, an RL framework that addresses both dimensions through two complementary mechanisms. First, \\emph{experience-guided reward shaping} tracks the shortest correct solution found so far for each problem ","authors_text":"Haiwei Wang, Jinchang Luo, Jing Jin, Miaohui Wang, MingQuan Cheng, Tingcheng Bian, Wenyuan Jiang, Yuzhe Zhang","cross_cats":["cs.CL"],"headline":"ExpThink applies experience-guided rewards and adaptive normalization in reinforcement learning to shorten chain-of-thought reasoning by up to 77% while increasing accuracy.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-08T09:37:30Z","title":"ExpThink: Experience-Guided Reinforcement Learning for Adaptive Chain-of-Thought Compression"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.07501","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-11T01:56:57.476130Z","id":"7e8e25ea-2cda-46b1-ad8a-7a4fc54b5ec4","model_set":{"reader":"grok-4.3"},"one_line_summary":"ExpThink applies experience-tracked rewards and correct-count normalized advantages in RL to compress CoT reasoning, cutting length up to 77% while raising accuracy and efficiency ratio on math benchmarks.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"ExpThink applies experience-guided rewards and adaptive normalization in reinforcement learning to shorten chain-of-thought reasoning by up to 77% while increasing accuracy.","strongest_claim":"Experiments on multiple mathematical reasoning benchmarks demonstrate that ExpThink reduces average response length by up to 77% while simultaneously improving accuracy, achieving up to 3× higher accuracy-efficiency ratio than the vanilla baseline.","weakest_assumption":"That the three-tier reward based on tracked shortest correct solutions and the correct-count normalization produce stable, generalizable compression without overfitting to the specific benchmarks or introducing hidden accuracy trade-offs not captured in the reported metrics."}},"verdict_id":"7e8e25ea-2cda-46b1-ad8a-7a4fc54b5ec4"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:256fb2b47d2f2b4c2145370f398ed8d99f829f6b63feab259dc817ec20ade074","target":"record","created_at":"2026-05-20T00:03:14Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"ed64316e54cb6d5e79165aced591829a0b149217ff7278987a7acaa20dd0c3b6","cross_cats_sorted":["cs.CL"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-08T09:37:30Z","title_canon_sha256":"0a6b985093118ec25d6a33452d3085c000bfc20123b8a966171b6709100a01e6"},"schema_version":"1.0","source":{"id":"2605.07501","kind":"arxiv","version":2}},"canonical_sha256":"b6a0dae81fbbabd191597d042e56f95881bcdc5954832c628af53ede889ab99d","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"b6a0dae81fbbabd191597d042e56f95881bcdc5954832c628af53ede889ab99d","first_computed_at":"2026-05-20T00:03:14.956712Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-20T00:03:14.956712Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"bp++u7haAustiYe5kysvaxnmrCxJfGBThHiWeQJJ8uuBRAm2mWT075PyLqfrlC9peFtzyJKE+pZbYEEdOTRBCg==","signature_status":"signed_v1","signed_at":"2026-05-20T00:03:14.957566Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.07501","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:256fb2b47d2f2b4c2145370f398ed8d99f829f6b63feab259dc817ec20ade074","sha256:26aa699439797bee7c00fa74cd9c36179450132c1ab20d556818dc773da35363"],"state_sha256":"29c6a164bcafa3052b10c80664730a4e0971b556756bf514c1cef2a5bec7e8a8"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"d6xC1tp+7EMdHTn57jkCqlKgZnYylhfyRvEAPrahftBd0YNpjVJFECFUvlbUAYXN89ef+1hGwTutzYeWX7yfBA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-26T18:20:58.715399Z","bundle_sha256":"c58b8ed14c20fc7ce2719fc3ceb2a99be232d742714a9e2244441707cb51fbb8"}}