{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:DCPD5KYOJXRBTXMUD2RTK66FEM","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"f5f167207f8945e73c28884c421427aecc8cc4e97fd6567dbb3b435b811d4963","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-06-05T07:43:22Z","title_canon_sha256":"2cdeac450906fd6f9cac0027f3cd635181730d5a4651e15109c146daa882d000"},"schema_version":"1.0","source":{"id":"2606.07000","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.07000","created_at":"2026-06-08T01:04:40Z"},{"alias_kind":"arxiv_version","alias_value":"2606.07000v1","created_at":"2026-06-08T01:04:40Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.07000","created_at":"2026-06-08T01:04:40Z"},{"alias_kind":"pith_short_12","alias_value":"DCPD5KYOJXRB","created_at":"2026-06-08T01:04:40Z"},{"alias_kind":"pith_short_16","alias_value":"DCPD5KYOJXRBTXMU","created_at":"2026-06-08T01:04:40Z"},{"alias_kind":"pith_short_8","alias_value":"DCPD5KYO","created_at":"2026-06-08T01:04:40Z"}],"graph_snapshots":[{"event_id":"sha256:11d6f177bce6ca3607625a35dc0e3cb855093e302e64d8293d575ac771663faf","target":"graph","created_at":"2026-06-08T01:04:40Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.07000/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Recent post-training methods, particularly Reinforcement Learning with Verifiable Rewards (RLVR), have significantly enhanced the reasoning ability of Large Vision-Language Models (LVLMs). However, the sparse nature of verifiable rewards provides little token-level supervision for failed rollouts, often leading to inefficient exploration in complex multimodal reasoning tasks. Although policy distillation can offer dense guidance, external teacher based methods introduce substantial computational overhead, while answer conditioned tuning methods may expose answer-level information and induce sh","authors_text":"Jian Luan, Ke An, Pei Fu, Qilong Wang, Shizhe Xiang, Wenlong Yu, Yue Liu","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-06-05T07:43:22Z","title":"Teaching the Way, Not the Answer: Privileged Tutoring Distillation for Multimodal Policy Optimization"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.07000","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:b5e224fab58e3e9b560a1e867737877ad58441dda97c33233870730b5c818e34","target":"record","created_at":"2026-06-08T01:04:40Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"f5f167207f8945e73c28884c421427aecc8cc4e97fd6567dbb3b435b811d4963","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-06-05T07:43:22Z","title_canon_sha256":"2cdeac450906fd6f9cac0027f3cd635181730d5a4651e15109c146daa882d000"},"schema_version":"1.0","source":{"id":"2606.07000","kind":"arxiv","version":1}},"canonical_sha256":"189e3eab0e4de219dd941ea3357bc5233f9fbde93bc72bdc84ec483acb23fbad","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"189e3eab0e4de219dd941ea3357bc5233f9fbde93bc72bdc84ec483acb23fbad","first_computed_at":"2026-06-08T01:04:40.534557Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-08T01:04:40.534557Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"EUcBYKabvQtv0ugiu0kel9keae54iSVoe4MOor/rMVMKKPPPnmEi8Wm/6NB2Hp1Yqj6O2egEElGKlO3lYkArCg==","signature_status":"signed_v1","signed_at":"2026-06-08T01:04:40.535408Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.07000","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:b5e224fab58e3e9b560a1e867737877ad58441dda97c33233870730b5c818e34","sha256:11d6f177bce6ca3607625a35dc0e3cb855093e302e64d8293d575ac771663faf"],"state_sha256":"9eb9a7f187320919c3a8eb5c8c0547262deb422c76dbf330a499151697d9bc49"}