{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:WRLLWTUI5TJGFIL76XM6LRIWOD","short_pith_number":"pith:WRLLWTUI","canonical_record":{"source":{"id":"2606.07082","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-05T09:20:15Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"fbd7efa65fd43416dc414e07bb24fe27397213cd8b9e95487094e709147a0068","abstract_canon_sha256":"70d65a4bf7c3ec7f9a5de1e5bc6745f14d6cfdd0d32aee5095052e76da8a82e7"},"schema_version":"1.0"},"canonical_sha256":"b456bb4e88ecd262a17ff5d9e5c51670d16858688f5ee2dcb3ef72984368f355","source":{"kind":"arxiv","id":"2606.07082","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.07082","created_at":"2026-06-08T01:04:45Z"},{"alias_kind":"arxiv_version","alias_value":"2606.07082v1","created_at":"2026-06-08T01:04:45Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.07082","created_at":"2026-06-08T01:04:45Z"},{"alias_kind":"pith_short_12","alias_value":"WRLLWTUI5TJG","created_at":"2026-06-08T01:04:45Z"},{"alias_kind":"pith_short_16","alias_value":"WRLLWTUI5TJGFIL7","created_at":"2026-06-08T01:04:45Z"},{"alias_kind":"pith_short_8","alias_value":"WRLLWTUI","created_at":"2026-06-08T01:04:45Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:WRLLWTUI5TJGFIL76XM6LRIWOD","target":"record","payload":{"canonical_record":{"source":{"id":"2606.07082","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-05T09:20:15Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"fbd7efa65fd43416dc414e07bb24fe27397213cd8b9e95487094e709147a0068","abstract_canon_sha256":"70d65a4bf7c3ec7f9a5de1e5bc6745f14d6cfdd0d32aee5095052e76da8a82e7"},"schema_version":"1.0"},"canonical_sha256":"b456bb4e88ecd262a17ff5d9e5c51670d16858688f5ee2dcb3ef72984368f355","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-08T01:04:45.203605Z","signature_b64":"6yrlj5KDGIAVb+5DsWo9OLgTOu9xui6brsA94Nr1sQWRzDgY/CZwNgQEwtvA3mKeWI2VSTowfUDLjvg3zXK4Bg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"b456bb4e88ecd262a17ff5d9e5c51670d16858688f5ee2dcb3ef72984368f355","last_reissued_at":"2026-06-08T01:04:45.202767Z","signature_status":"signed_v1","first_computed_at":"2026-06-08T01:04:45.202767Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2606.07082","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-08T01:04:45Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"o9V/kMwMuBMyBqAvTwRvTexYBXFNXQ6OSNqPDrx0lvIzgcs96mWWGNHgu0/y5ZHgmFLY685BZlb9zGFLS2B2DQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-10T00:05:17.801816Z"},"content_sha256":"6ea73cbd05ac450c3e1b99bde710c7baa1de827e21b86b43184e7c03210086d2","schema_version":"1.0","event_id":"sha256:6ea73cbd05ac450c3e1b99bde710c7baa1de827e21b86b43184e7c03210086d2"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:WRLLWTUI5TJGFIL76XM6LRIWOD","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"On the Geometry of On-Policy Distillation","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Chak Tou Leong, Qingyu Yin, Rongduo Han, Sunbowen Lee, Yanshu Li, Yanxu Chen, Yi R. Fung, Zhennan Shen, Zhilin Wang","submitted_at":"2026-06-05T09:20:15Z","abstract_excerpt":"On-policy distillation (OPD) is increasingly used to improve large language model reasoning, but its training dynamics remain poorly understood. We characterize the trajectory of OPD updates in parameter space and compare it with supervised fine-tuning (SFT) and reinforcement learning with verifiable rewards (RLVR). A suite of parameter-space diagnostics consistently places OPD in a relaxed off-principal regime: compared with SFT, its updates affect fewer weights and avoid principal directions more strongly, while compared with RLVR, they remain less tightly constrained. Beyond this static loc"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.07082","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.07082/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-08T01:04:45Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"VPfOHZYuVTcwoVfjEpqZ7pVLM4Oaod1wZosiul+uDnpZqeer9lWoNlUDg+9ohGDmz5CQSFbND2e6gAUNRK5iBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-10T00:05:17.802616Z"},"content_sha256":"7991cd930a849df23fe948f5f2fa2636db11c699f2347616a2e7381f1635d89d","schema_version":"1.0","event_id":"sha256:7991cd930a849df23fe948f5f2fa2636db11c699f2347616a2e7381f1635d89d"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/WRLLWTUI5TJGFIL76XM6LRIWOD/bundle.json","state_url":"https://pith.science/pith/WRLLWTUI5TJGFIL76XM6LRIWOD/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/WRLLWTUI5TJGFIL76XM6LRIWOD/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-10T00:05:17Z","links":{"resolver":"https://pith.science/pith/WRLLWTUI5TJGFIL76XM6LRIWOD","bundle":"https://pith.science/pith/WRLLWTUI5TJGFIL76XM6LRIWOD/bundle.json","state":"https://pith.science/pith/WRLLWTUI5TJGFIL76XM6LRIWOD/state.json","well_known_bundle":"https://pith.science/.well-known/pith/WRLLWTUI5TJGFIL76XM6LRIWOD/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:WRLLWTUI5TJGFIL76XM6LRIWOD","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"70d65a4bf7c3ec7f9a5de1e5bc6745f14d6cfdd0d32aee5095052e76da8a82e7","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-05T09:20:15Z","title_canon_sha256":"fbd7efa65fd43416dc414e07bb24fe27397213cd8b9e95487094e709147a0068"},"schema_version":"1.0","source":{"id":"2606.07082","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.07082","created_at":"2026-06-08T01:04:45Z"},{"alias_kind":"arxiv_version","alias_value":"2606.07082v1","created_at":"2026-06-08T01:04:45Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.07082","created_at":"2026-06-08T01:04:45Z"},{"alias_kind":"pith_short_12","alias_value":"WRLLWTUI5TJG","created_at":"2026-06-08T01:04:45Z"},{"alias_kind":"pith_short_16","alias_value":"WRLLWTUI5TJGFIL7","created_at":"2026-06-08T01:04:45Z"},{"alias_kind":"pith_short_8","alias_value":"WRLLWTUI","created_at":"2026-06-08T01:04:45Z"}],"graph_snapshots":[{"event_id":"sha256:7991cd930a849df23fe948f5f2fa2636db11c699f2347616a2e7381f1635d89d","target":"graph","created_at":"2026-06-08T01:04:45Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.07082/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"On-policy distillation (OPD) is increasingly used to improve large language model reasoning, but its training dynamics remain poorly understood. We characterize the trajectory of OPD updates in parameter space and compare it with supervised fine-tuning (SFT) and reinforcement learning with verifiable rewards (RLVR). A suite of parameter-space diagnostics consistently places OPD in a relaxed off-principal regime: compared with SFT, its updates affect fewer weights and avoid principal directions more strongly, while compared with RLVR, they remain less tightly constrained. Beyond this static loc","authors_text":"Chak Tou Leong, Qingyu Yin, Rongduo Han, Sunbowen Lee, Yanshu Li, Yanxu Chen, Yi R. Fung, Zhennan Shen, Zhilin Wang","cross_cats":["cs.AI"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-05T09:20:15Z","title":"On the Geometry of On-Policy Distillation"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.07082","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:6ea73cbd05ac450c3e1b99bde710c7baa1de827e21b86b43184e7c03210086d2","target":"record","created_at":"2026-06-08T01:04:45Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"70d65a4bf7c3ec7f9a5de1e5bc6745f14d6cfdd0d32aee5095052e76da8a82e7","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-05T09:20:15Z","title_canon_sha256":"fbd7efa65fd43416dc414e07bb24fe27397213cd8b9e95487094e709147a0068"},"schema_version":"1.0","source":{"id":"2606.07082","kind":"arxiv","version":1}},"canonical_sha256":"b456bb4e88ecd262a17ff5d9e5c51670d16858688f5ee2dcb3ef72984368f355","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"b456bb4e88ecd262a17ff5d9e5c51670d16858688f5ee2dcb3ef72984368f355","first_computed_at":"2026-06-08T01:04:45.202767Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-08T01:04:45.202767Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"6yrlj5KDGIAVb+5DsWo9OLgTOu9xui6brsA94Nr1sQWRzDgY/CZwNgQEwtvA3mKeWI2VSTowfUDLjvg3zXK4Bg==","signature_status":"signed_v1","signed_at":"2026-06-08T01:04:45.203605Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.07082","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:6ea73cbd05ac450c3e1b99bde710c7baa1de827e21b86b43184e7c03210086d2","sha256:7991cd930a849df23fe948f5f2fa2636db11c699f2347616a2e7381f1635d89d"],"state_sha256":"bd7225723e1deeb769833443d090b151d5827934c36e334069d405ed131c9f18"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"h22kTXCzoMRwxZmhzgetah9b4kkylmvM5nTmcGkzPyFUuNzTksu1MvZOKGD3dsfd8H5sGJXnYvu+GIvjw4SzCA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-10T00:05:17.807017Z","bundle_sha256":"bfb6138ecfec22cdb408f01eb75018e45e3d7b974288f5ab3dc7815b94354ed7"}}