{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:7YTTPW2FGATVYYGHCM4ZOKR3SA","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"e7078a1e1630406e9f1e1f578d68c06ca66aa41de04d723eda73f9d5f3adfd42","cross_cats_sorted":["cs.CL","cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-06-24T21:12:24Z","title_canon_sha256":"1fb774185f2ddb930d67cc3c6b4c9092409e57418920b65e7d0bc7d1cae71cc2"},"schema_version":"1.0","source":{"id":"2606.26387","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.26387","created_at":"2026-06-26T00:15:39Z"},{"alias_kind":"arxiv_version","alias_value":"2606.26387v1","created_at":"2026-06-26T00:15:39Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.26387","created_at":"2026-06-26T00:15:39Z"},{"alias_kind":"pith_short_12","alias_value":"7YTTPW2FGATV","created_at":"2026-06-26T00:15:39Z"},{"alias_kind":"pith_short_16","alias_value":"7YTTPW2FGATVYYGH","created_at":"2026-06-26T00:15:39Z"},{"alias_kind":"pith_short_8","alias_value":"7YTTPW2F","created_at":"2026-06-26T00:15:39Z"}],"graph_snapshots":[{"event_id":"sha256:ac7ab20a37dc4354ff7141bb2fea00e0929d7570ec22a0e133acdf75b1e41761","target":"graph","created_at":"2026-06-26T00:15:39Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.26387/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Multimodal large language models (MLLMs) extend large language models (LLMs) with visual perception, enabling joint reasoning over images and text. Despite inheriting strong reasoning capabilities from LLMs, they remain prone to hallucinations that contradict their visual inputs. Mechanistic studies indicate that this weakness stems from visual laziness: MLLMs encode the correct visual evidence internally, but overly rely on strong language priors during response. Existing alignment methods, such as direct preference optimization, primarily optimize outcome-level rewards based on text. This in","authors_text":"Chen Liu, Chih-Ting Liao, Hao Xu, Janet Wang, Jianyang Gu, Lin Zhao, Muchao Ye, Qizhen Lan, Tianyang Wang, Xi Xiao, Yunbei Zhang, Yuxiang Wei","cross_cats":["cs.CL","cs.LG"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-06-24T21:12:24Z","title":"Staying VIGILant: Mitigating Visual Laziness via Counterfactual Visual Alignment in MLLMs"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.26387","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:f2d864996a02250ac5c69db61b714e356637054ed63f05088fc50f82ffd5ff13","target":"record","created_at":"2026-06-26T00:15:39Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"e7078a1e1630406e9f1e1f578d68c06ca66aa41de04d723eda73f9d5f3adfd42","cross_cats_sorted":["cs.CL","cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-06-24T21:12:24Z","title_canon_sha256":"1fb774185f2ddb930d67cc3c6b4c9092409e57418920b65e7d0bc7d1cae71cc2"},"schema_version":"1.0","source":{"id":"2606.26387","kind":"arxiv","version":1}},"canonical_sha256":"fe2737db4530275c60c71339972a3b90077d44572e074fc07b254f048915b8ec","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"fe2737db4530275c60c71339972a3b90077d44572e074fc07b254f048915b8ec","first_computed_at":"2026-06-26T00:15:39.002868Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-26T00:15:39.002868Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"4imfFrAyHnxlxVK2J/yrrw9xVi0P3Ev5jYIvitEcsAaerpXSgwZyOnqBVYNbzQ3qJvZOaPJ4i3IvfkLcJhTUAg==","signature_status":"signed_v1","signed_at":"2026-06-26T00:15:39.003270Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.26387","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:f2d864996a02250ac5c69db61b714e356637054ed63f05088fc50f82ffd5ff13","sha256:ac7ab20a37dc4354ff7141bb2fea00e0929d7570ec22a0e133acdf75b1e41761"],"state_sha256":"39e9c1a56797e3dd7f07b1c377725f7f2ff79d3107a55823658f670c14d83df4"}