{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:RON6S63C5FNTXZ3WDG2A3Y42GD","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"63e8823af031389dd04990d2d89a90d7ecd25493c886a4d3593d00dfdac31982","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-06-10T08:25:09Z","title_canon_sha256":"cd53a81e94771d1ad0b8b1c681b91e3e4bcbf2f5c513b10e155cd98957e2d3cd"},"schema_version":"1.0","source":{"id":"2606.11792","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.11792","created_at":"2026-06-11T01:10:08Z"},{"alias_kind":"arxiv_version","alias_value":"2606.11792v1","created_at":"2026-06-11T01:10:08Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.11792","created_at":"2026-06-11T01:10:08Z"},{"alias_kind":"pith_short_12","alias_value":"RON6S63C5FNT","created_at":"2026-06-11T01:10:08Z"},{"alias_kind":"pith_short_16","alias_value":"RON6S63C5FNTXZ3W","created_at":"2026-06-11T01:10:08Z"},{"alias_kind":"pith_short_8","alias_value":"RON6S63C","created_at":"2026-06-11T01:10:08Z"}],"graph_snapshots":[{"event_id":"sha256:368db3240b546253fb45b5d4c582c77ef21ce61984649d08011fb56c05178c04","target":"graph","created_at":"2026-06-11T01:10:08Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.11792/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Video Large Multimodal Models have achieved remarkable progress in video understanding, yet they remain prone to hallucinations, where generated responses are not faithfully supported by the input video. In this paper, we propose MultiToP, a multimodal-context-aware visual token patching framework that mitigates hallucinations by refining unreliable visual tokens before language generation. MultiToP introduces a lightweight Visual Token Patcher to predict token-level replacement distributions and selectively substitute unreliable visual tokens with a dynamic global patch token. To train the pa","authors_text":"Han Bao, Jiahao Yuan, Kaiwen Zhou, Wenbin Xing, Wenzhi Chen, Yuansheng Gao, Zonghui Wang","cross_cats":["cs.AI","cs.CL"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-06-10T08:25:09Z","title":"MultiToP: Learning to Patch Visual Tokens to Mitigate Hallucinations in Video Large Multimodal Models"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.11792","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:2669685a27c0727934219bbb98360b22f15c32f61c101e01ea7df86693472d4a","target":"record","created_at":"2026-06-11T01:10:08Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"63e8823af031389dd04990d2d89a90d7ecd25493c886a4d3593d00dfdac31982","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-06-10T08:25:09Z","title_canon_sha256":"cd53a81e94771d1ad0b8b1c681b91e3e4bcbf2f5c513b10e155cd98957e2d3cd"},"schema_version":"1.0","source":{"id":"2606.11792","kind":"arxiv","version":1}},"canonical_sha256":"8b9be97b62e95b3be77619b40de39a30f2891b5f6a93cae3c288de2578f22315","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"8b9be97b62e95b3be77619b40de39a30f2891b5f6a93cae3c288de2578f22315","first_computed_at":"2026-06-11T01:10:08.071762Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-11T01:10:08.071762Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"AgMB8tQmKMo3J8l+KA6gEgf/mL/rsoMP3BckULCs9MC/BWDwz1yvqWoNPYAEESSzAdT8Hw09oyBU6iY5/nSpCg==","signature_status":"signed_v1","signed_at":"2026-06-11T01:10:08.072589Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.11792","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:2669685a27c0727934219bbb98360b22f15c32f61c101e01ea7df86693472d4a","sha256:368db3240b546253fb45b5d4c582c77ef21ce61984649d08011fb56c05178c04"],"state_sha256":"4cddb4b548c9c330e33d346adb8d21aaf561b1e64cc017182dfe119922a8ff8f"}