{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:XW454VBUN4D277CKJVYRDSJGKK","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"ba6d65570414866ecd4fee1adb027638526a2647a6c41106d722fc5f23e6c3d8","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-06-01T17:58:22Z","title_canon_sha256":"9f7596f9cd7afff04a3b20065afa1b4a6b941b2b57c309db0e868f5a44cd0cbf"},"schema_version":"1.0","source":{"id":"2606.02684","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.02684","created_at":"2026-06-03T00:05:06Z"},{"alias_kind":"arxiv_version","alias_value":"2606.02684v1","created_at":"2026-06-03T00:05:06Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.02684","created_at":"2026-06-03T00:05:06Z"},{"alias_kind":"pith_short_12","alias_value":"XW454VBUN4D2","created_at":"2026-06-03T00:05:06Z"},{"alias_kind":"pith_short_16","alias_value":"XW454VBUN4D277CK","created_at":"2026-06-03T00:05:06Z"},{"alias_kind":"pith_short_8","alias_value":"XW454VBU","created_at":"2026-06-03T00:05:06Z"}],"graph_snapshots":[{"event_id":"sha256:ba649011b6505f957ebdf6804d482aa024e9a668e8b13ed550e18e8ef3c344a3","target":"graph","created_at":"2026-06-03T00:05:06Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.02684/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"On-Policy distillation (OPD) in large language models is shifting from full-trace KL supervision toward more selective training paradigms. Recent OPD methods increasingly focus on selecting which trajectories to learn from, which tokens are most informative, and which supervision signals are most reliable. Motivated by this trend, we rethink optimization granularity of OPD and propose \\fireicon\\ FiRe-OPD (Filter, then Reweight), which jointly adjusts supervision signals at both trajectory and token levels. In details, FiRe-OPD first filters trajectories to remove low-quality rollout samples, a","authors_text":"Huangjie Yuan, Jing Jin, Leqi Zheng, Tao Feng, Wenrui Zhou, Xing Hu, Xuchang Zhong, Yongzi Yu, Yuying Li","cross_cats":["cs.AI","cs.CL"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-06-01T17:58:22Z","title":"Filter, Then Reweight: Rethinking Optimization Granularity in On-Policy Distillation"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.02684","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:705feb172f04306f0121255b36661d47cb739d20a234e6240e346419fc4f3155","target":"record","created_at":"2026-06-03T00:05:06Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"ba6d65570414866ecd4fee1adb027638526a2647a6c41106d722fc5f23e6c3d8","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-06-01T17:58:22Z","title_canon_sha256":"9f7596f9cd7afff04a3b20065afa1b4a6b941b2b57c309db0e868f5a44cd0cbf"},"schema_version":"1.0","source":{"id":"2606.02684","kind":"arxiv","version":1}},"canonical_sha256":"bdb9de54346f07affc4a4d7111c92652bf4050cc258e42b71afc0d7572e676a5","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"bdb9de54346f07affc4a4d7111c92652bf4050cc258e42b71afc0d7572e676a5","first_computed_at":"2026-06-03T00:05:06.361079Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-03T00:05:06.361079Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"FXmkDiAhxOzMMr7MZqFxrTRikSjnmN6w+RqkphmuiK7og1tbj9SDWGmEfEzCnK/HsLJOPaJkSCeuaeYlOM3xBw==","signature_status":"signed_v1","signed_at":"2026-06-03T00:05:06.361458Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.02684","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:705feb172f04306f0121255b36661d47cb739d20a234e6240e346419fc4f3155","sha256:ba649011b6505f957ebdf6804d482aa024e9a668e8b13ed550e18e8ef3c344a3"],"state_sha256":"b2f0b05c6657058520493559ad71da42e22c1d1b328b8bfdd09eb8a77f252809"}