{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:EUSI362Q3K4HA3UAPBJJGQHS7Z","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"aea3160d6b8adc9c0dc677640a99686eb1f1398f687152badf7220ddcbd9cce2","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-06-25T09:24:09Z","title_canon_sha256":"57a214cfa9bbd9c7d86b2f841addbbcd13d252a55461bbf51c044f7ef6e03c41"},"schema_version":"1.0","source":{"id":"2606.26790","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.26790","created_at":"2026-06-26T01:15:59Z"},{"alias_kind":"arxiv_version","alias_value":"2606.26790v1","created_at":"2026-06-26T01:15:59Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.26790","created_at":"2026-06-26T01:15:59Z"},{"alias_kind":"pith_short_12","alias_value":"EUSI362Q3K4H","created_at":"2026-06-26T01:15:59Z"},{"alias_kind":"pith_short_16","alias_value":"EUSI362Q3K4HA3UA","created_at":"2026-06-26T01:15:59Z"},{"alias_kind":"pith_short_8","alias_value":"EUSI362Q","created_at":"2026-06-26T01:15:59Z"}],"graph_snapshots":[{"event_id":"sha256:692b9972358cbcca2ac163243a38734cb13a86a6107533369c60742660ca20eb","target":"graph","created_at":"2026-06-26T01:15:59Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.26790/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Outcome-based reinforcement learning provides a stable optimization backbone for language agents, but its sparse trajectory-level rewards provide little guidance on which intermediate decisions should be reinforced or suppressed. On-policy self-distillation offers dense token-level supervision, yet existing skill-conditioned variants often rely on external skill memories or retrieved privileged context, which are costly to maintain and can be mismatched with the state distribution induced by the current policy in multi-turn interaction. We propose \\textbf{OPID} (\\textbf{O}n-\\textbf{P}olicy Sk\\","authors_text":"Fan Zhang, Haoran Luo, Jianhua Tao, Jinyang Wu, Lang Feng, Shuai Zhang, Shuo Yang, Yuhao Shen, Zheng Lian, Zhengqi Wen, Zhengxi Lu","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-06-25T09:24:09Z","title":"OPID: On-Policy Skill Distillation for Agentic Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.26790","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:b2837d5ea331f03026dc6ff44121441b6cfea2dcf16bab6e589fc0c275852fd2","target":"record","created_at":"2026-06-26T01:15:59Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"aea3160d6b8adc9c0dc677640a99686eb1f1398f687152badf7220ddcbd9cce2","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-06-25T09:24:09Z","title_canon_sha256":"57a214cfa9bbd9c7d86b2f841addbbcd13d252a55461bbf51c044f7ef6e03c41"},"schema_version":"1.0","source":{"id":"2606.26790","kind":"arxiv","version":1}},"canonical_sha256":"25248dfb50dab8706e8078529340f2fe4022010abe7f9d8247af234b241f1803","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"25248dfb50dab8706e8078529340f2fe4022010abe7f9d8247af234b241f1803","first_computed_at":"2026-06-26T01:15:59.780381Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-26T01:15:59.780381Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"W7iXAZ9itdv5lBh4vFjCrkOAOmNUTEe9Ee0m9qzKimgy/OLOU0h401xuaM4fUUqwaPBpORpfbHAr1+qn1EEkCg==","signature_status":"signed_v1","signed_at":"2026-06-26T01:15:59.780766Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.26790","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:b2837d5ea331f03026dc6ff44121441b6cfea2dcf16bab6e589fc0c275852fd2","sha256:692b9972358cbcca2ac163243a38734cb13a86a6107533369c60742660ca20eb"],"state_sha256":"889e78062f7d80eca1a50446c8f12a948e51be831db04d84e15f93badef8cd6c"}