{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:TDMEP2TLUSOXJAT25FRJA2A52D","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"790c508a6a91475e6c345445cc45eb7d2d3d9bd022c0252ea519d60c26968785","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-06-05T22:06:07Z","title_canon_sha256":"d9eb8c40f61e65e779168af0410966e5a86fdeab209da39a1a6cfc85006ac486"},"schema_version":"1.0","source":{"id":"2606.07872","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.07872","created_at":"2026-06-09T01:04:54Z"},{"alias_kind":"arxiv_version","alias_value":"2606.07872v1","created_at":"2026-06-09T01:04:54Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.07872","created_at":"2026-06-09T01:04:54Z"},{"alias_kind":"pith_short_12","alias_value":"TDMEP2TLUSOX","created_at":"2026-06-09T01:04:54Z"},{"alias_kind":"pith_short_16","alias_value":"TDMEP2TLUSOXJAT2","created_at":"2026-06-09T01:04:54Z"},{"alias_kind":"pith_short_8","alias_value":"TDMEP2TL","created_at":"2026-06-09T01:04:54Z"}],"graph_snapshots":[{"event_id":"sha256:7223905dd837690e25d68e9a5939930e51b9bb2111fb19e1606c64cd33493296","target":"graph","created_at":"2026-06-09T01:04:54Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.07872/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"When a multimodal large language model answers a visual reasoning question correctly, is the prediction actually supported by the task-critical visual evidence? Correct answers can coexist with flawed reasoning, making accuracy alone an incomplete test of grounding. We introduce VisualFLIP, a paired benchmark with 1,374 images arranged as same-question perturbation pairs across cardinality, attribute, spatial, and logic tasks. Each pair keeps the question fixed but minimally changes the evidence so the gold answer deterministically flips. We evaluate 24 MLLMs with pair accuracy, which requires","authors_text":"Changrui Chen, Didi Zhu, Jiankang Deng, Stefanos Zafeiriou","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-06-05T22:06:07Z","title":"VisualFLIP: Do Predictions Depend on Task-Critical Visual Evidence in Multimodal Reasoning?"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.07872","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:b4146e281880a8434a0bb3bacfd4762f4cd276fcba7f8ccff033e2c568294d7d","target":"record","created_at":"2026-06-09T01:04:54Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"790c508a6a91475e6c345445cc45eb7d2d3d9bd022c0252ea519d60c26968785","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-06-05T22:06:07Z","title_canon_sha256":"d9eb8c40f61e65e779168af0410966e5a86fdeab209da39a1a6cfc85006ac486"},"schema_version":"1.0","source":{"id":"2606.07872","kind":"arxiv","version":1}},"canonical_sha256":"98d847ea6ba49d74827ae96290681dd0ccef29b01475a800db9e5223a51dcb6a","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"98d847ea6ba49d74827ae96290681dd0ccef29b01475a800db9e5223a51dcb6a","first_computed_at":"2026-06-09T01:04:54.243499Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-09T01:04:54.243499Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"R1YL9SnAI+yY8TkZdzMpM0LjOV/qbfTPophl1VavS6kIwITmSLd5rzeq9gFFRZjWLq4MwUOplm4ljnO+zN7EDQ==","signature_status":"signed_v1","signed_at":"2026-06-09T01:04:54.243881Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.07872","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:b4146e281880a8434a0bb3bacfd4762f4cd276fcba7f8ccff033e2c568294d7d","sha256:7223905dd837690e25d68e9a5939930e51b9bb2111fb19e1606c64cd33493296"],"state_sha256":"6f3eb3c42d92cd4e32ae385c09f1409d0c7e474bccfbf018ab36a6be3d5565cb"}