{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:3ZAK63EJXKY5D2UZPLCZM3PUMN","merge_version":"pith-open-graph-merge-v1","event_count":3,"valid_event_count":3,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"00baede65b540590aefdf9d8125b22901bb17af42eb49249664b3b492e4e4f1d","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.AI","submitted_at":"2026-05-13T09:38:20Z","title_canon_sha256":"3fd4635281c5f14f852c07633698fc5e65cf60e63605afa04e829ceeb7619bef"},"schema_version":"1.0","source":{"id":"2605.13255","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.13255","created_at":"2026-05-18T02:44:49Z"},{"alias_kind":"arxiv_version","alias_value":"2605.13255v1","created_at":"2026-05-18T02:44:49Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.13255","created_at":"2026-05-18T02:44:49Z"},{"alias_kind":"pith_short_12","alias_value":"3ZAK63EJXKY5","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"3ZAK63EJXKY5D2UZ","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"3ZAK63EJ","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:cbfba81a2e02441d0c15a64db191dd2fde5257b3fe072f5328b458e56913b820","target":"graph","created_at":"2026-05-18T02:44:49Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Experiments with Qwen3-4B and Qwen3-8B in thinking mode show that EGRSD and CL-EGRSD advance the accuracy-length frontier among the compared trainable methods."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That selectively down-weighting high-entropy tokens via the teacher-entropy confidence gate improves net reasoning quality without discarding critical information that only appears in uncertain positions."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"EGRSD and CL-EGRSD advance the accuracy-length frontier in LLM reasoning by entropy-guided weighting of token-level distillation signals from the teacher."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"An entropy confidence gate that down-weights uncertain tokens improves the accuracy-length trade-off in on-policy self-distillation for LLM reasoning."}],"snapshot_sha256":"a9dc58b3043a88cb564b735afd95a5ea212f26c7f48f043b575c2a4bccf23389"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"On-policy self-distillation trains a reasoning model on its own rollouts while a teacher, often the same model conditioned on privileged context, provides dense token-level supervision. Existing objectives typically weight the teacher's token-level signal uniformly across a chain-of-thought sequence, despite substantial variation in the entropy of the teacher's predictive distribution. We propose EGRSD (Entropy-Guided Reinforced Self-Distillation), which unifies token-level updates through three signals: a reward-grounded direction, a teacher-student likelihood-ratio magnitude, and the propose","authors_text":"Conghui He, Junlong Ke, Linfeng Zhang, Weijia Li, Zichen Wen","cross_cats":[],"headline":"An entropy confidence gate that down-weights uncertain tokens improves the accuracy-length trade-off in on-policy self-distillation for LLM reasoning.","license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.AI","submitted_at":"2026-05-13T09:38:20Z","title":"Respecting Self-Uncertainty in On-Policy Self-Distillation for Efficient LLM Reasoning"},"references":{"count":27,"internal_anchors":10,"resolved_work":27,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"arXiv preprint arXiv:2505.16400 , year=","work_id":"428ad314-c120-41da-9db7-b8bc1918fffb","year":null},{"cited_arxiv_id":"2110.14168","doi":"","is_internal_anchor":true,"ref_index":2,"title":"Training Verifiers to Solve Math Word Problems","work_id":"acab1aa8-b4d6-40e0-a3ee-25341701dca2","year":null},{"cited_arxiv_id":"2506.04178","doi":"","is_internal_anchor":true,"ref_index":3,"title":"OpenThoughts: Data Recipes for Reasoning Models","work_id":"c7acbe41-27a0-4773-a7be-8f08d86cdf21","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"Entropy-aware on-policy distillation of language models","work_id":"7dccbe12-e2aa-48d8-9b76-5521ccf02668","year":null},{"cited_arxiv_id":"2603.24472","doi":"","is_internal_anchor":true,"ref_index":5,"title":"Why Does Self-Distillation (Sometimes) Degrade the Reasoning Capability of LLMs?","work_id":"8df6a2d1-d890-48ae-af85-c11643a91097","year":null}],"snapshot_sha256":"359ab656fd170e83e723876231863bf36e6850496e2ccddb02fb6eec773933be"},"source":{"id":"2605.13255","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-14T19:47:16.821590Z","id":"d9dc5cd5-62d7-400d-8e20-7d6319de3e6c","model_set":{"reader":"grok-4.3"},"one_line_summary":"EGRSD and CL-EGRSD advance the accuracy-length frontier in LLM reasoning by entropy-guided weighting of token-level distillation signals from the teacher.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"An entropy confidence gate that down-weights uncertain tokens improves the accuracy-length trade-off in on-policy self-distillation for LLM reasoning.","strongest_claim":"Experiments with Qwen3-4B and Qwen3-8B in thinking mode show that EGRSD and CL-EGRSD advance the accuracy-length frontier among the compared trainable methods.","weakest_assumption":"That selectively down-weighting high-entropy tokens via the teacher-entropy confidence gate improves net reasoning quality without discarding critical information that only appears in uncertain positions."}},"verdict_id":"d9dc5cd5-62d7-400d-8e20-7d6319de3e6c"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:f69cd0d5de39894a0b78e630d7fdbed87f01c1042e54598b2da46a752fee16e3","target":"record","created_at":"2026-05-18T02:44:49Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"00baede65b540590aefdf9d8125b22901bb17af42eb49249664b3b492e4e4f1d","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.AI","submitted_at":"2026-05-13T09:38:20Z","title_canon_sha256":"3fd4635281c5f14f852c07633698fc5e65cf60e63605afa04e829ceeb7619bef"},"schema_version":"1.0","source":{"id":"2605.13255","kind":"arxiv","version":1}},"canonical_sha256":"de40af6c89bab1d1ea997ac5966df46358ad9897dcee5fdb409fc775af3e9699","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"de40af6c89bab1d1ea997ac5966df46358ad9897dcee5fdb409fc775af3e9699","first_computed_at":"2026-05-18T02:44:49.396514Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T02:44:49.396514Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"3kAVrKlhHjS/Pmic2Pujhsol6UMsaoWIsGMma1TriBp+/7Q/VeD52i3tiD3/jZpV0R7dRdyb2VxHfawX+ci/Ag==","signature_status":"signed_v1","signed_at":"2026-05-18T02:44:49.396999Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.13255","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:f69cd0d5de39894a0b78e630d7fdbed87f01c1042e54598b2da46a752fee16e3","sha256:cbfba81a2e02441d0c15a64db191dd2fde5257b3fe072f5328b458e56913b820","sha256:00be9e31c82da67aea40133c7652f28fea4c5eda27f3ff1deb3d79414b8050ae"],"state_sha256":"306d6482e9572b5e957707442f1a8adf2ea540f59ab81cc85fe22f4321f7fcb0"}