{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:E5JZWUAYMP5JRYZTBQPZHO5G2X","short_pith_number":"pith:E5JZWUAY","canonical_record":{"source":{"id":"2605.06094","kind":"arxiv","version":4},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-05-07T12:13:15Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"93d04c2f3cc99c20cb363f9751e2ec4788e7659aed38c0cea8dddb485f8c67e8","abstract_canon_sha256":"66047564e89d9049c1c3abc2e89c63810c04935c8b1b3eb8ab2084e8a2cb3c3b"},"schema_version":"1.0"},"canonical_sha256":"27539b501863fa98e3330c1f93bba6d5d2f766536f0779258990c2093f428886","source":{"kind":"arxiv","id":"2605.06094","version":4},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.06094","created_at":"2026-05-25T02:01:22Z"},{"alias_kind":"arxiv_version","alias_value":"2605.06094v4","created_at":"2026-05-25T02:01:22Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.06094","created_at":"2026-05-25T02:01:22Z"},{"alias_kind":"pith_short_12","alias_value":"E5JZWUAYMP5J","created_at":"2026-05-25T02:01:22Z"},{"alias_kind":"pith_short_16","alias_value":"E5JZWUAYMP5JRYZT","created_at":"2026-05-25T02:01:22Z"},{"alias_kind":"pith_short_8","alias_value":"E5JZWUAY","created_at":"2026-05-25T02:01:22Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:E5JZWUAYMP5JRYZTBQPZHO5G2X","target":"record","payload":{"canonical_record":{"source":{"id":"2605.06094","kind":"arxiv","version":4},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-05-07T12:13:15Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"93d04c2f3cc99c20cb363f9751e2ec4788e7659aed38c0cea8dddb485f8c67e8","abstract_canon_sha256":"66047564e89d9049c1c3abc2e89c63810c04935c8b1b3eb8ab2084e8a2cb3c3b"},"schema_version":"1.0"},"canonical_sha256":"27539b501863fa98e3330c1f93bba6d5d2f766536f0779258990c2093f428886","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-25T02:01:22.118519Z","signature_b64":"EHlS6wZ80pI69Qh40Vz62qgqVfJApX4tdwltWd/6YJixB+GoZwN/4uviZIh1rw0MXToT79GGkN1GJOwDun3mDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"27539b501863fa98e3330c1f93bba6d5d2f766536f0779258990c2093f428886","last_reissued_at":"2026-05-25T02:01:22.117869Z","signature_status":"signed_v1","first_computed_at":"2026-05-25T02:01:22.117869Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.06094","source_version":4,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-25T02:01:22Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"0G9QU0q1MBZLDrkdTavjKw2SCQsZ2b/4qLq1p8uk782Y74AITKd5eumXyU9npyZ9Xo70v21P5kmc4AvFVkz5DQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T23:47:36.716600Z"},"content_sha256":"35a42109cf794418ea89126dc8878d600b227d8dbfc92bdbe4fa6e7a7474e2a1","schema_version":"1.0","event_id":"sha256:35a42109cf794418ea89126dc8878d600b227d8dbfc92bdbe4fa6e7a7474e2a1"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:E5JZWUAYMP5JRYZTBQPZHO5G2X","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"VISD: Enhancing Video Reasoning via Structured Self-Distillation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"Structured self-distillation with a video-aware judge improves VideoLLM reasoning accuracy and training efficiency.","cross_cats":["cs.AI"],"primary_cat":"cs.CV","authors_text":"Hao Lin, Hongbo Jin, Jiayu Ding, Jingqi Tian, Kunyang Lv, Qiaoman Zhang, Xu Jiang, Zhongjing Du","submitted_at":"2026-05-07T12:13:15Z","abstract_excerpt":"Training VideoLLMs for complex reasoning remains challenging due to sparse sequence level rewards and the lack of fine grained credit assignment over long, temporally grounded reasoning trajectories. While reinforcement learning with verifiable rewards (RLVR) provides reliable supervision, it fails to capture token level contributions, leading to inefficient learning. Conversely, existing self distillation methods offer dense supervision but lack structure and diagnostic specificity, and often interact unstably with reinforcement learning. In this work, we propose VISD, a structured self disti"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Experiments on diverse benchmarks show that VISD consistently outperforms strong baselines, improving answer accuracy and spatio-temporal grounding quality. Notably, VISD reaches these gains with nearly 2x faster convergence in optimization steps.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The video-aware judge model produces diagnostically meaningful, unbiased privileged information that can be safely used for token-level supervision without introducing new failure modes or reward hacking.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"VISD adds structured privileged feedback from a judge model and a direction-magnitude decoupling trick to let VideoLLMs learn token-level credit assignment while keeping RL stable, yielding higher accuracy and roughly 2x faster convergence on video reasoning benchmarks.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Structured self-distillation with a video-aware judge improves VideoLLM reasoning accuracy and training efficiency.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"181f09371073185e57e6fe05ed384482ee6d72fe8932e596939b6a82fdfdb8d1"},"source":{"id":"2605.06094","kind":"arxiv","version":4},"verdict":{"id":"6feb4323-4321-4310-814e-c8311dbf84b5","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-12T03:31:46.483302Z","strongest_claim":"Experiments on diverse benchmarks show that VISD consistently outperforms strong baselines, improving answer accuracy and spatio-temporal grounding quality. Notably, VISD reaches these gains with nearly 2x faster convergence in optimization steps.","one_line_summary":"VISD adds structured privileged feedback from a judge model and a direction-magnitude decoupling trick to let VideoLLMs learn token-level credit assignment while keeping RL stable, yielding higher accuracy and roughly 2x faster convergence on video reasoning benchmarks.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The video-aware judge model produces diagnostically meaningful, unbiased privileged information that can be safely used for token-level supervision without introducing new failure modes or reward hacking.","pith_extraction_headline":"Structured self-distillation with a video-aware judge improves VideoLLM reasoning accuracy and training efficiency."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.06094/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"claim_evidence","ran_at":"2026-05-20T13:02:04.335373Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"ai_meta_artifact","ran_at":"2026-05-20T08:37:29.823452Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"doi_title_agreement","ran_at":"2026-05-19T19:01:19.508982Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"doi_compliance","ran_at":"2026-05-19T12:56:53.589840Z","status":"completed","version":"1.0.0","findings_count":0}],"snapshot_sha256":"5e9f2ca24d48aa4457497865cad55fab9a87a3308deace055d893a9e37af3741"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"6feb4323-4321-4310-814e-c8311dbf84b5"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-25T02:01:22Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"4ME1tdbxxB6Gf4c8rQe7U/JKGb+lDCzGoNyq8vYsM+tWuiCyO78LgcfUZSJ77v8b2ERzu0JDOtfVleZJAc6VDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T23:47:36.717557Z"},"content_sha256":"eb31d9a45caf515a795f38d2d3e42c3a602f1bb598c8a13df823ca6a11245391","schema_version":"1.0","event_id":"sha256:eb31d9a45caf515a795f38d2d3e42c3a602f1bb598c8a13df823ca6a11245391"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/E5JZWUAYMP5JRYZTBQPZHO5G2X/bundle.json","state_url":"https://pith.science/pith/E5JZWUAYMP5JRYZTBQPZHO5G2X/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/E5JZWUAYMP5JRYZTBQPZHO5G2X/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-25T23:47:36Z","links":{"resolver":"https://pith.science/pith/E5JZWUAYMP5JRYZTBQPZHO5G2X","bundle":"https://pith.science/pith/E5JZWUAYMP5JRYZTBQPZHO5G2X/bundle.json","state":"https://pith.science/pith/E5JZWUAYMP5JRYZTBQPZHO5G2X/state.json","well_known_bundle":"https://pith.science/.well-known/pith/E5JZWUAYMP5JRYZTBQPZHO5G2X/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:E5JZWUAYMP5JRYZTBQPZHO5G2X","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"66047564e89d9049c1c3abc2e89c63810c04935c8b1b3eb8ab2084e8a2cb3c3b","cross_cats_sorted":["cs.AI"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-05-07T12:13:15Z","title_canon_sha256":"93d04c2f3cc99c20cb363f9751e2ec4788e7659aed38c0cea8dddb485f8c67e8"},"schema_version":"1.0","source":{"id":"2605.06094","kind":"arxiv","version":4}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.06094","created_at":"2026-05-25T02:01:22Z"},{"alias_kind":"arxiv_version","alias_value":"2605.06094v4","created_at":"2026-05-25T02:01:22Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.06094","created_at":"2026-05-25T02:01:22Z"},{"alias_kind":"pith_short_12","alias_value":"E5JZWUAYMP5J","created_at":"2026-05-25T02:01:22Z"},{"alias_kind":"pith_short_16","alias_value":"E5JZWUAYMP5JRYZT","created_at":"2026-05-25T02:01:22Z"},{"alias_kind":"pith_short_8","alias_value":"E5JZWUAY","created_at":"2026-05-25T02:01:22Z"}],"graph_snapshots":[{"event_id":"sha256:eb31d9a45caf515a795f38d2d3e42c3a602f1bb598c8a13df823ca6a11245391","target":"graph","created_at":"2026-05-25T02:01:22Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Experiments on diverse benchmarks show that VISD consistently outperforms strong baselines, improving answer accuracy and spatio-temporal grounding quality. Notably, VISD reaches these gains with nearly 2x faster convergence in optimization steps."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The video-aware judge model produces diagnostically meaningful, unbiased privileged information that can be safely used for token-level supervision without introducing new failure modes or reward hacking."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"VISD adds structured privileged feedback from a judge model and a direction-magnitude decoupling trick to let VideoLLMs learn token-level credit assignment while keeping RL stable, yielding higher accuracy and roughly 2x faster convergence on video reasoning benchmarks."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Structured self-distillation with a video-aware judge improves VideoLLM reasoning accuracy and training efficiency."}],"snapshot_sha256":"181f09371073185e57e6fe05ed384482ee6d72fe8932e596939b6a82fdfdb8d1"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[{"findings_count":0,"name":"claim_evidence","ran_at":"2026-05-20T13:02:04.335373Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"ai_meta_artifact","ran_at":"2026-05-20T08:37:29.823452Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"doi_title_agreement","ran_at":"2026-05-19T19:01:19.508982Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"doi_compliance","ran_at":"2026-05-19T12:56:53.589840Z","status":"completed","version":"1.0.0"}],"endpoint":"/pith/2605.06094/integrity.json","findings":[],"snapshot_sha256":"5e9f2ca24d48aa4457497865cad55fab9a87a3308deace055d893a9e37af3741","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Training VideoLLMs for complex reasoning remains challenging due to sparse sequence level rewards and the lack of fine grained credit assignment over long, temporally grounded reasoning trajectories. While reinforcement learning with verifiable rewards (RLVR) provides reliable supervision, it fails to capture token level contributions, leading to inefficient learning. Conversely, existing self distillation methods offer dense supervision but lack structure and diagnostic specificity, and often interact unstably with reinforcement learning. In this work, we propose VISD, a structured self disti","authors_text":"Hao Lin, Hongbo Jin, Jiayu Ding, Jingqi Tian, Kunyang Lv, Qiaoman Zhang, Xu Jiang, Zhongjing Du","cross_cats":["cs.AI"],"headline":"Structured self-distillation with a video-aware judge improves VideoLLM reasoning accuracy and training efficiency.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-05-07T12:13:15Z","title":"VISD: Enhancing Video Reasoning via Structured Self-Distillation"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.06094","kind":"arxiv","version":4},"verdict":{"created_at":"2026-05-12T03:31:46.483302Z","id":"6feb4323-4321-4310-814e-c8311dbf84b5","model_set":{"reader":"grok-4.3"},"one_line_summary":"VISD adds structured privileged feedback from a judge model and a direction-magnitude decoupling trick to let VideoLLMs learn token-level credit assignment while keeping RL stable, yielding higher accuracy and roughly 2x faster convergence on video reasoning benchmarks.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Structured self-distillation with a video-aware judge improves VideoLLM reasoning accuracy and training efficiency.","strongest_claim":"Experiments on diverse benchmarks show that VISD consistently outperforms strong baselines, improving answer accuracy and spatio-temporal grounding quality. Notably, VISD reaches these gains with nearly 2x faster convergence in optimization steps.","weakest_assumption":"The video-aware judge model produces diagnostically meaningful, unbiased privileged information that can be safely used for token-level supervision without introducing new failure modes or reward hacking."}},"verdict_id":"6feb4323-4321-4310-814e-c8311dbf84b5"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:35a42109cf794418ea89126dc8878d600b227d8dbfc92bdbe4fa6e7a7474e2a1","target":"record","created_at":"2026-05-25T02:01:22Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"66047564e89d9049c1c3abc2e89c63810c04935c8b1b3eb8ab2084e8a2cb3c3b","cross_cats_sorted":["cs.AI"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-05-07T12:13:15Z","title_canon_sha256":"93d04c2f3cc99c20cb363f9751e2ec4788e7659aed38c0cea8dddb485f8c67e8"},"schema_version":"1.0","source":{"id":"2605.06094","kind":"arxiv","version":4}},"canonical_sha256":"27539b501863fa98e3330c1f93bba6d5d2f766536f0779258990c2093f428886","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"27539b501863fa98e3330c1f93bba6d5d2f766536f0779258990c2093f428886","first_computed_at":"2026-05-25T02:01:22.117869Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-25T02:01:22.117869Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"EHlS6wZ80pI69Qh40Vz62qgqVfJApX4tdwltWd/6YJixB+GoZwN/4uviZIh1rw0MXToT79GGkN1GJOwDun3mDQ==","signature_status":"signed_v1","signed_at":"2026-05-25T02:01:22.118519Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.06094","source_kind":"arxiv","source_version":4}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:35a42109cf794418ea89126dc8878d600b227d8dbfc92bdbe4fa6e7a7474e2a1","sha256:eb31d9a45caf515a795f38d2d3e42c3a602f1bb598c8a13df823ca6a11245391"],"state_sha256":"ed7914a4b4009309646fcf2cf888d1a7074b0d987e1aa50f9a74f1bb19104087"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"uE0t1+hlrJ7wtph2D7otDa+4xlkst5Yr5cBnO2sGNOn8IXYPWUVW8e7T4ebhiy1kuH5oEg5DjRKxUCYxXi2fBQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-25T23:47:36.721558Z","bundle_sha256":"3f388a39d6d845343076eece441b73a055d06515c50288af8234d30113813750"}}