{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:R3LFMUG6RCIIDOQFXUQFT2SNV3","short_pith_number":"pith:R3LFMUG6","canonical_record":{"source":{"id":"2606.21994","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-06-20T11:18:34Z","cross_cats_sorted":[],"title_canon_sha256":"49e81f3e602aa89cf26463cf02222e6cce91da3f50f462888b132607ba87b951","abstract_canon_sha256":"5042bb4d6a62e40c4ee13731c3a67a056f7282c50e8353bb705e070191c026b3"},"schema_version":"1.0"},"canonical_sha256":"8ed65650de889081ba05bd2059ea4daecadffbfce13731f9104adbb9adcf01b8","source":{"kind":"arxiv","id":"2606.21994","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.21994","created_at":"2026-06-23T02:13:05Z"},{"alias_kind":"arxiv_version","alias_value":"2606.21994v1","created_at":"2026-06-23T02:13:05Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.21994","created_at":"2026-06-23T02:13:05Z"},{"alias_kind":"pith_short_12","alias_value":"R3LFMUG6RCII","created_at":"2026-06-23T02:13:05Z"},{"alias_kind":"pith_short_16","alias_value":"R3LFMUG6RCIIDOQF","created_at":"2026-06-23T02:13:05Z"},{"alias_kind":"pith_short_8","alias_value":"R3LFMUG6","created_at":"2026-06-23T02:13:05Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:R3LFMUG6RCIIDOQFXUQFT2SNV3","target":"record","payload":{"canonical_record":{"source":{"id":"2606.21994","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-06-20T11:18:34Z","cross_cats_sorted":[],"title_canon_sha256":"49e81f3e602aa89cf26463cf02222e6cce91da3f50f462888b132607ba87b951","abstract_canon_sha256":"5042bb4d6a62e40c4ee13731c3a67a056f7282c50e8353bb705e070191c026b3"},"schema_version":"1.0"},"canonical_sha256":"8ed65650de889081ba05bd2059ea4daecadffbfce13731f9104adbb9adcf01b8","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-23T02:13:05.131532Z","signature_b64":"IKbNk0NLC0/k64kdMTzpMQsERHzwooIv8bOZ6PnU8kvapnc5QVzPiwWSOYkntyERyaP81A6znQGZ6qbdTJOtBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"8ed65650de889081ba05bd2059ea4daecadffbfce13731f9104adbb9adcf01b8","last_reissued_at":"2026-06-23T02:13:05.131185Z","signature_status":"signed_v1","first_computed_at":"2026-06-23T02:13:05.131185Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2606.21994","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-23T02:13:05Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"8Rz8UMJRqhktAYPZBD3NHSix0mTtV9JMnuA8pqGkkUvkziXEIt6cvscqVg8DGYOGSydjbpN2qLIPJUDiWHMYAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-28T04:34:56.216658Z"},"content_sha256":"32306f466f2655bfaf3ca29c3de297f189cfc396ac9a02891b49893f22ade81d","schema_version":"1.0","event_id":"sha256:32306f466f2655bfaf3ca29c3de297f189cfc396ac9a02891b49893f22ade81d"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:R3LFMUG6RCIIDOQFXUQFT2SNV3","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Prefix-Guided On-Policy Distillation: Mining Golden Trajectories from Rollouts","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Huan Song, Jiawei Shao, Qingfei Zhao, Shuyu Tian, Xuelong Li","submitted_at":"2026-06-20T11:18:34Z","abstract_excerpt":"On-policy distillation (OPD) improves reasoning models by applying dense teacher supervision on student-sampled trajectories. However, scaling OPD to long-horizon mathematical reasoning exposes a reliability and efficiency problem: standard OPD assigns every sampled candidate the same long rollout budget, even though some trajectories may quickly become weakly aligned with the teacher and provide less useful supervision. Prior analyses suggest that successful OPD depends on local teacher-student compatibility, which can be measured by top-k overlap on student-visited prefixes. When this overla"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.21994","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.21994/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-23T02:13:05Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"C44H/jAu2Q7VBZhxSk4gAk1eR3fPoGIFbJgv01ydzDS/ZYP88jIzDHRuNkhyLWkBmcqZVpptqnbK3utLhL9bCg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-28T04:34:56.217013Z"},"content_sha256":"1b135e9d4a6419de08aaa3b7dcfa7e59320543acc267fadeffeb77eff0d171f9","schema_version":"1.0","event_id":"sha256:1b135e9d4a6419de08aaa3b7dcfa7e59320543acc267fadeffeb77eff0d171f9"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/R3LFMUG6RCIIDOQFXUQFT2SNV3/bundle.json","state_url":"https://pith.science/pith/R3LFMUG6RCIIDOQFXUQFT2SNV3/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/R3LFMUG6RCIIDOQFXUQFT2SNV3/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-28T04:34:56Z","links":{"resolver":"https://pith.science/pith/R3LFMUG6RCIIDOQFXUQFT2SNV3","bundle":"https://pith.science/pith/R3LFMUG6RCIIDOQFXUQFT2SNV3/bundle.json","state":"https://pith.science/pith/R3LFMUG6RCIIDOQFXUQFT2SNV3/state.json","well_known_bundle":"https://pith.science/.well-known/pith/R3LFMUG6RCIIDOQFXUQFT2SNV3/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:R3LFMUG6RCIIDOQFXUQFT2SNV3","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"5042bb4d6a62e40c4ee13731c3a67a056f7282c50e8353bb705e070191c026b3","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-06-20T11:18:34Z","title_canon_sha256":"49e81f3e602aa89cf26463cf02222e6cce91da3f50f462888b132607ba87b951"},"schema_version":"1.0","source":{"id":"2606.21994","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.21994","created_at":"2026-06-23T02:13:05Z"},{"alias_kind":"arxiv_version","alias_value":"2606.21994v1","created_at":"2026-06-23T02:13:05Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.21994","created_at":"2026-06-23T02:13:05Z"},{"alias_kind":"pith_short_12","alias_value":"R3LFMUG6RCII","created_at":"2026-06-23T02:13:05Z"},{"alias_kind":"pith_short_16","alias_value":"R3LFMUG6RCIIDOQF","created_at":"2026-06-23T02:13:05Z"},{"alias_kind":"pith_short_8","alias_value":"R3LFMUG6","created_at":"2026-06-23T02:13:05Z"}],"graph_snapshots":[{"event_id":"sha256:1b135e9d4a6419de08aaa3b7dcfa7e59320543acc267fadeffeb77eff0d171f9","target":"graph","created_at":"2026-06-23T02:13:05Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.21994/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"On-policy distillation (OPD) improves reasoning models by applying dense teacher supervision on student-sampled trajectories. However, scaling OPD to long-horizon mathematical reasoning exposes a reliability and efficiency problem: standard OPD assigns every sampled candidate the same long rollout budget, even though some trajectories may quickly become weakly aligned with the teacher and provide less useful supervision. Prior analyses suggest that successful OPD depends on local teacher-student compatibility, which can be measured by top-k overlap on student-visited prefixes. When this overla","authors_text":"Huan Song, Jiawei Shao, Qingfei Zhao, Shuyu Tian, Xuelong Li","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-06-20T11:18:34Z","title":"Prefix-Guided On-Policy Distillation: Mining Golden Trajectories from Rollouts"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.21994","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:32306f466f2655bfaf3ca29c3de297f189cfc396ac9a02891b49893f22ade81d","target":"record","created_at":"2026-06-23T02:13:05Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"5042bb4d6a62e40c4ee13731c3a67a056f7282c50e8353bb705e070191c026b3","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-06-20T11:18:34Z","title_canon_sha256":"49e81f3e602aa89cf26463cf02222e6cce91da3f50f462888b132607ba87b951"},"schema_version":"1.0","source":{"id":"2606.21994","kind":"arxiv","version":1}},"canonical_sha256":"8ed65650de889081ba05bd2059ea4daecadffbfce13731f9104adbb9adcf01b8","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"8ed65650de889081ba05bd2059ea4daecadffbfce13731f9104adbb9adcf01b8","first_computed_at":"2026-06-23T02:13:05.131185Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-23T02:13:05.131185Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"IKbNk0NLC0/k64kdMTzpMQsERHzwooIv8bOZ6PnU8kvapnc5QVzPiwWSOYkntyERyaP81A6znQGZ6qbdTJOtBA==","signature_status":"signed_v1","signed_at":"2026-06-23T02:13:05.131532Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.21994","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:32306f466f2655bfaf3ca29c3de297f189cfc396ac9a02891b49893f22ade81d","sha256:1b135e9d4a6419de08aaa3b7dcfa7e59320543acc267fadeffeb77eff0d171f9"],"state_sha256":"ceed8b0560ebfaeca9f9eddf409f59f490125d8ef37d89ea26ac5d17e9600199"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"E0k5VfD4yWCw0IHs+wukBwn7rO7Pw7mcYHfOfrn0xNHwLx2ieIp3MCkfzNlxIoa62ZiNCuGCm5HH2F3tcCUrDg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-28T04:34:56.218936Z","bundle_sha256":"88c525a405bb68d101c65cbe477083a115f2ea7fc7f2b9c250aa52d4329dc0e0"}}