{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:EUSI362Q3K4HA3UAPBJJGQHS7Z","short_pith_number":"pith:EUSI362Q","canonical_record":{"source":{"id":"2606.26790","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-06-25T09:24:09Z","cross_cats_sorted":[],"title_canon_sha256":"57a214cfa9bbd9c7d86b2f841addbbcd13d252a55461bbf51c044f7ef6e03c41","abstract_canon_sha256":"aea3160d6b8adc9c0dc677640a99686eb1f1398f687152badf7220ddcbd9cce2"},"schema_version":"1.0"},"canonical_sha256":"25248dfb50dab8706e8078529340f2fe4022010abe7f9d8247af234b241f1803","source":{"kind":"arxiv","id":"2606.26790","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.26790","created_at":"2026-06-26T01:15:59Z"},{"alias_kind":"arxiv_version","alias_value":"2606.26790v1","created_at":"2026-06-26T01:15:59Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.26790","created_at":"2026-06-26T01:15:59Z"},{"alias_kind":"pith_short_12","alias_value":"EUSI362Q3K4H","created_at":"2026-06-26T01:15:59Z"},{"alias_kind":"pith_short_16","alias_value":"EUSI362Q3K4HA3UA","created_at":"2026-06-26T01:15:59Z"},{"alias_kind":"pith_short_8","alias_value":"EUSI362Q","created_at":"2026-06-26T01:15:59Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:EUSI362Q3K4HA3UAPBJJGQHS7Z","target":"record","payload":{"canonical_record":{"source":{"id":"2606.26790","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-06-25T09:24:09Z","cross_cats_sorted":[],"title_canon_sha256":"57a214cfa9bbd9c7d86b2f841addbbcd13d252a55461bbf51c044f7ef6e03c41","abstract_canon_sha256":"aea3160d6b8adc9c0dc677640a99686eb1f1398f687152badf7220ddcbd9cce2"},"schema_version":"1.0"},"canonical_sha256":"25248dfb50dab8706e8078529340f2fe4022010abe7f9d8247af234b241f1803","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-26T01:15:59.780766Z","signature_b64":"W7iXAZ9itdv5lBh4vFjCrkOAOmNUTEe9Ee0m9qzKimgy/OLOU0h401xuaM4fUUqwaPBpORpfbHAr1+qn1EEkCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"25248dfb50dab8706e8078529340f2fe4022010abe7f9d8247af234b241f1803","last_reissued_at":"2026-06-26T01:15:59.780381Z","signature_status":"signed_v1","first_computed_at":"2026-06-26T01:15:59.780381Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2606.26790","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-26T01:15:59Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"tUi1ChM06nvBGRvyJYxmnO/CoFGANpwIDki1h6Zdj6F/8Absqdy4Sut96mXxdaKfWNsAnEKLNHVHzXj8nAgOBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-27T19:44:40.581583Z"},"content_sha256":"b2837d5ea331f03026dc6ff44121441b6cfea2dcf16bab6e589fc0c275852fd2","schema_version":"1.0","event_id":"sha256:b2837d5ea331f03026dc6ff44121441b6cfea2dcf16bab6e589fc0c275852fd2"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:EUSI362Q3K4HA3UAPBJJGQHS7Z","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"OPID: On-Policy Skill Distillation for Agentic Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Fan Zhang, Haoran Luo, Jianhua Tao, Jinyang Wu, Lang Feng, Shuai Zhang, Shuo Yang, Yuhao Shen, Zheng Lian, Zhengqi Wen, Zhengxi Lu","submitted_at":"2026-06-25T09:24:09Z","abstract_excerpt":"Outcome-based reinforcement learning provides a stable optimization backbone for language agents, but its sparse trajectory-level rewards provide little guidance on which intermediate decisions should be reinforced or suppressed. On-policy self-distillation offers dense token-level supervision, yet existing skill-conditioned variants often rely on external skill memories or retrieved privileged context, which are costly to maintain and can be mismatched with the state distribution induced by the current policy in multi-turn interaction. We propose \\textbf{OPID} (\\textbf{O}n-\\textbf{P}olicy Sk\\"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.26790","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.26790/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-26T01:15:59Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"YpFYIAneNSNjYymWCDJFhgzWiaehhflD1vWPKql3F44fgRTDbayEjhjeWsh52unA1RkUERcrz5b1A2TrGT6mBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-27T19:44:40.581942Z"},"content_sha256":"692b9972358cbcca2ac163243a38734cb13a86a6107533369c60742660ca20eb","schema_version":"1.0","event_id":"sha256:692b9972358cbcca2ac163243a38734cb13a86a6107533369c60742660ca20eb"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/EUSI362Q3K4HA3UAPBJJGQHS7Z/bundle.json","state_url":"https://pith.science/pith/EUSI362Q3K4HA3UAPBJJGQHS7Z/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/EUSI362Q3K4HA3UAPBJJGQHS7Z/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-27T19:44:40Z","links":{"resolver":"https://pith.science/pith/EUSI362Q3K4HA3UAPBJJGQHS7Z","bundle":"https://pith.science/pith/EUSI362Q3K4HA3UAPBJJGQHS7Z/bundle.json","state":"https://pith.science/pith/EUSI362Q3K4HA3UAPBJJGQHS7Z/state.json","well_known_bundle":"https://pith.science/.well-known/pith/EUSI362Q3K4HA3UAPBJJGQHS7Z/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:EUSI362Q3K4HA3UAPBJJGQHS7Z","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"aea3160d6b8adc9c0dc677640a99686eb1f1398f687152badf7220ddcbd9cce2","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-06-25T09:24:09Z","title_canon_sha256":"57a214cfa9bbd9c7d86b2f841addbbcd13d252a55461bbf51c044f7ef6e03c41"},"schema_version":"1.0","source":{"id":"2606.26790","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.26790","created_at":"2026-06-26T01:15:59Z"},{"alias_kind":"arxiv_version","alias_value":"2606.26790v1","created_at":"2026-06-26T01:15:59Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.26790","created_at":"2026-06-26T01:15:59Z"},{"alias_kind":"pith_short_12","alias_value":"EUSI362Q3K4H","created_at":"2026-06-26T01:15:59Z"},{"alias_kind":"pith_short_16","alias_value":"EUSI362Q3K4HA3UA","created_at":"2026-06-26T01:15:59Z"},{"alias_kind":"pith_short_8","alias_value":"EUSI362Q","created_at":"2026-06-26T01:15:59Z"}],"graph_snapshots":[{"event_id":"sha256:692b9972358cbcca2ac163243a38734cb13a86a6107533369c60742660ca20eb","target":"graph","created_at":"2026-06-26T01:15:59Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.26790/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Outcome-based reinforcement learning provides a stable optimization backbone for language agents, but its sparse trajectory-level rewards provide little guidance on which intermediate decisions should be reinforced or suppressed. On-policy self-distillation offers dense token-level supervision, yet existing skill-conditioned variants often rely on external skill memories or retrieved privileged context, which are costly to maintain and can be mismatched with the state distribution induced by the current policy in multi-turn interaction. We propose \\textbf{OPID} (\\textbf{O}n-\\textbf{P}olicy Sk\\","authors_text":"Fan Zhang, Haoran Luo, Jianhua Tao, Jinyang Wu, Lang Feng, Shuai Zhang, Shuo Yang, Yuhao Shen, Zheng Lian, Zhengqi Wen, Zhengxi Lu","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-06-25T09:24:09Z","title":"OPID: On-Policy Skill Distillation for Agentic Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.26790","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:b2837d5ea331f03026dc6ff44121441b6cfea2dcf16bab6e589fc0c275852fd2","target":"record","created_at":"2026-06-26T01:15:59Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"aea3160d6b8adc9c0dc677640a99686eb1f1398f687152badf7220ddcbd9cce2","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-06-25T09:24:09Z","title_canon_sha256":"57a214cfa9bbd9c7d86b2f841addbbcd13d252a55461bbf51c044f7ef6e03c41"},"schema_version":"1.0","source":{"id":"2606.26790","kind":"arxiv","version":1}},"canonical_sha256":"25248dfb50dab8706e8078529340f2fe4022010abe7f9d8247af234b241f1803","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"25248dfb50dab8706e8078529340f2fe4022010abe7f9d8247af234b241f1803","first_computed_at":"2026-06-26T01:15:59.780381Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-26T01:15:59.780381Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"W7iXAZ9itdv5lBh4vFjCrkOAOmNUTEe9Ee0m9qzKimgy/OLOU0h401xuaM4fUUqwaPBpORpfbHAr1+qn1EEkCg==","signature_status":"signed_v1","signed_at":"2026-06-26T01:15:59.780766Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.26790","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:b2837d5ea331f03026dc6ff44121441b6cfea2dcf16bab6e589fc0c275852fd2","sha256:692b9972358cbcca2ac163243a38734cb13a86a6107533369c60742660ca20eb"],"state_sha256":"889e78062f7d80eca1a50446c8f12a948e51be831db04d84e15f93badef8cd6c"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"JoKiEYvIV+8bKK/UMKBwRBbkP+7izzv4OlsisprutJo4jlUaNZCoYwedtdxgB7bUTsyJWdyLuSNd+pptzwQrAg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-27T19:44:40.583893Z","bundle_sha256":"e7891c499e04fb7cc1ae6f2a4fcb94ed7d5b1dd11611b253514c0cc78bd231bc"}}