{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:6CAYQMLC2AWU7XRFPLFHJHAFOB","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"e47065b2b3899f9d1ef26913dd54940e0ce0168bd5ddce746b04c5375f12abf1","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-13T15:05:30Z","title_canon_sha256":"d6b1a3571dcfc254d2acd344a58689ed6aa89569657105e1ce4e6d52fdca7abc"},"schema_version":"1.0","source":{"id":"2605.13643","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.13643","created_at":"2026-05-18T02:44:17Z"},{"alias_kind":"arxiv_version","alias_value":"2605.13643v1","created_at":"2026-05-18T02:44:17Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.13643","created_at":"2026-05-18T02:44:17Z"},{"alias_kind":"pith_short_12","alias_value":"6CAYQMLC2AWU","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"6CAYQMLC2AWU7XRF","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"6CAYQMLC","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:cac0e82b4fb9e4bcf7cf3659376bcc488bb5114eec16455d7c1943e0c1dcbd58","target":"graph","created_at":"2026-05-18T02:44:17Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"supervision should concentrate on trajectory regions where the teacher's feedback remains discriminative, rather than uniformly covering the entire response. We operationalize this principle through a trajectory-specific release rule... Experimental results... indicate that this release rule consistently outperforms standard full-trajectory OPD across five in-domain benchmarks."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That the BIC-style downward change point on NLTK-sentence-aggregated teacher margins over the student's top-K set reliably identifies the onset of local teachability collapse without prematurely cutting useful supervision or retaining non-discriminative tokens."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"Local teachability collapse in trajectory suffixes makes uniform dense supervision suboptimal in strong-to-weak OPD; truncating at BIC-style change points on teacher margin improves performance."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"In strong-to-weak on-policy distillation, truncating supervision at the onset of local teachability collapse outperforms full-trajectory training."}],"snapshot_sha256":"3e34d5d3f186b02588155a8d7cc0e00a5a308d2589f07aee23e2ac64944b8fc0"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"On-policy distillation (OPD) trains a student model on its own rollouts using dense feedback from a stronger teacher. Prior literature suggests that, provided teacher feedback is available, supervising the full sequence of response tokens should monotonically improve performance. However, we demonstrate that this assumption sometimes fails to hold in strong-to-weak OPD settings. While later segments of a generated trajectory may still exhibit a non-zero teacher-student advantage, they frequently lack the local contrast that makes dense feedback effective for prioritizing student learning. We t","authors_text":"Bing Wang, Jieping Ye, Kaiyuan Liu, Rongxiang Weng, Yang Bai, Ziyuan Zhuang","cross_cats":[],"headline":"In strong-to-weak on-policy distillation, truncating supervision at the onset of local teachability collapse outperforms full-trajectory training.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-13T15:05:30Z","title":"Prefix Teach, Suffix Fade: Local Teachability Collapse in Strong-to-Weak On-Policy Distillation"},"references":{"count":51,"internal_anchors":18,"resolved_work":51,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"On-policy distillation of language models: Learning from self-generated mistakes","work_id":"3733ff2d-cd95-4776-9fa9-1b2328326749","year":2024},{"cited_arxiv_id":"2108.07732","doi":"","is_internal_anchor":true,"ref_index":2,"title":"Program Synthesis with Large Language Models","work_id":"fd241a05-03b9-4de2-9588-9d77ce176125","year":2021},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"Online difficulty filtering for reasoning oriented reinforcement learning","work_id":"6d0fd622-6a60-49c6-9da6-05e40bb88d27","year":2026},{"cited_arxiv_id":"2505.23281","doi":"","is_internal_anchor":true,"ref_index":4,"title":"MathArena: Evaluating LLMs on Uncontaminated Math Competitions","work_id":"61e8d872-ccc7-46b8-8ec1-94008704c941","year":2025},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"Steven Bird, Ewan Klein, and Edward Loper.Natural language processing with Python: analyzing text with the natural language toolkit. \" O’Reilly Media, Inc.\", 2009","work_id":"d0edf312-ba06-4052-bb7d-82db22200b26","year":2009}],"snapshot_sha256":"bb27542122f32d4a0f43d2e5740804e2fccf8abd8f9a5dbc08eb87f746c80999"},"source":{"id":"2605.13643","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-14T19:50:03.308971Z","id":"dcb99f9f-fd20-45e4-a9f2-1f7da77757a6","model_set":{"reader":"grok-4.3"},"one_line_summary":"Local teachability collapse in trajectory suffixes makes uniform dense supervision suboptimal in strong-to-weak OPD; truncating at BIC-style change points on teacher margin improves performance.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"In strong-to-weak on-policy distillation, truncating supervision at the onset of local teachability collapse outperforms full-trajectory training.","strongest_claim":"supervision should concentrate on trajectory regions where the teacher's feedback remains discriminative, rather than uniformly covering the entire response. We operationalize this principle through a trajectory-specific release rule... Experimental results... indicate that this release rule consistently outperforms standard full-trajectory OPD across five in-domain benchmarks.","weakest_assumption":"That the BIC-style downward change point on NLTK-sentence-aggregated teacher margins over the student's top-K set reliably identifies the onset of local teachability collapse without prematurely cutting useful supervision or retaining non-discriminative tokens."}},"verdict_id":"dcb99f9f-fd20-45e4-a9f2-1f7da77757a6"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:4f5cf60981924b481dec48b0663b694260e9c7fb96089f9e6c0b50167158ff67","target":"record","created_at":"2026-05-18T02:44:17Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"e47065b2b3899f9d1ef26913dd54940e0ce0168bd5ddce746b04c5375f12abf1","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-13T15:05:30Z","title_canon_sha256":"d6b1a3571dcfc254d2acd344a58689ed6aa89569657105e1ce4e6d52fdca7abc"},"schema_version":"1.0","source":{"id":"2605.13643","kind":"arxiv","version":1}},"canonical_sha256":"f081883162d02d4fde257aca749c05706f026011bdbc4f358c0417fa8f5b7cd7","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"f081883162d02d4fde257aca749c05706f026011bdbc4f358c0417fa8f5b7cd7","first_computed_at":"2026-05-18T02:44:17.571911Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T02:44:17.571911Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"2x+O50ONytqyP3SHod7Kmeq1Y7B1pHljU+278PO5UiDPW/pkdp3SJbDNvoF4kKiCOPEJOnTk5BAoYSwREcXuDQ==","signature_status":"signed_v1","signed_at":"2026-05-18T02:44:17.572576Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.13643","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:4f5cf60981924b481dec48b0663b694260e9c7fb96089f9e6c0b50167158ff67","sha256:cac0e82b4fb9e4bcf7cf3659376bcc488bb5114eec16455d7c1943e0c1dcbd58"],"state_sha256":"aa4c0d05627041f96b5bb7467a20519a3cfffeaa81c2414fd51e9033c14bc7f5"}