{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:4NJT54U5HB4W7GOKX336DHE7O6","short_pith_number":"pith:4NJT54U5","schema_version":"1.0","canonical_sha256":"e3533ef29d38796f99cabef7e19c9f77b23afcf78e6ea73808080580daacc0a5","source":{"kind":"arxiv","id":"2602.18746","version":3},"attestation_state":"computed","paper":{"title":"Bridging Modality Disconnect in Self-Reflection via Closed-Loop Visually Grounded Verification","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Che Sun, Haoyu Zhang, Mingyang Gao, Pengxiang Li, Rui Gao, Xintong Zhang, Yunde Jia, Yuwei Wu, Zhi Gao","submitted_at":"2026-02-21T07:56:59Z","abstract_excerpt":"In the era of Vision-Language Models (VLMs), enhancing multimodal reasoning capabilities remains a critical challenge, particularly in handling ambiguous or complex visual inputs, where initial inferences often lead to hallucinations or logic errors. Existing VLMs often produce plausible yet ungrounded answers, and even when prompted to \"reflect\", their corrections may remain detached from the image evidence. To address this, we propose the MIRROR framework for Multimodal Iterative Reasoning via Reflection On visual Regions. By embedding visual reflection as a core mechanism, MIRROR is formula"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2602.18746","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-02-21T07:56:59Z","cross_cats_sorted":[],"title_canon_sha256":"6bcda7d70ffcb72f55c670f7ac58a8db50220668cbc5938c50701f0b12be430b","abstract_canon_sha256":"8bfd7d186419e44f67fa2a4d93f132a8ff828ab9af06984b9a39310b90958bc0"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-19T16:09:55.244824Z","signature_b64":"DcxSaRrwPbYkmsOYv/ntOEdSQ61j9gzv8MApPrXFtqX+QU6mvUPURBg76+V8ahBksuU2C/9K2MfJnoqcnFVQDA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"e3533ef29d38796f99cabef7e19c9f77b23afcf78e6ea73808080580daacc0a5","last_reissued_at":"2026-06-19T16:09:55.244404Z","signature_status":"signed_v1","first_computed_at":"2026-06-19T16:09:55.244404Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Bridging Modality Disconnect in Self-Reflection via Closed-Loop Visually Grounded Verification","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Che Sun, Haoyu Zhang, Mingyang Gao, Pengxiang Li, Rui Gao, Xintong Zhang, Yunde Jia, Yuwei Wu, Zhi Gao","submitted_at":"2026-02-21T07:56:59Z","abstract_excerpt":"In the era of Vision-Language Models (VLMs), enhancing multimodal reasoning capabilities remains a critical challenge, particularly in handling ambiguous or complex visual inputs, where initial inferences often lead to hallucinations or logic errors. Existing VLMs often produce plausible yet ungrounded answers, and even when prompted to \"reflect\", their corrections may remain detached from the image evidence. To address this, we propose the MIRROR framework for Multimodal Iterative Reasoning via Reflection On visual Regions. By embedding visual reflection as a core mechanism, MIRROR is formula"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2602.18746","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2602.18746/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2602.18746","created_at":"2026-06-19T16:09:55.244460+00:00"},{"alias_kind":"arxiv_version","alias_value":"2602.18746v3","created_at":"2026-06-19T16:09:55.244460+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2602.18746","created_at":"2026-06-19T16:09:55.244460+00:00"},{"alias_kind":"pith_short_12","alias_value":"4NJT54U5HB4W","created_at":"2026-06-19T16:09:55.244460+00:00"},{"alias_kind":"pith_short_16","alias_value":"4NJT54U5HB4W7GOK","created_at":"2026-06-19T16:09:55.244460+00:00"},{"alias_kind":"pith_short_8","alias_value":"4NJT54U5","created_at":"2026-06-19T16:09:55.244460+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/4NJT54U5HB4W7GOKX336DHE7O6","json":"https://pith.science/pith/4NJT54U5HB4W7GOKX336DHE7O6.json","graph_json":"https://pith.science/api/pith-number/4NJT54U5HB4W7GOKX336DHE7O6/graph.json","events_json":"https://pith.science/api/pith-number/4NJT54U5HB4W7GOKX336DHE7O6/events.json","paper":"https://pith.science/paper/4NJT54U5"},"agent_actions":{"view_html":"https://pith.science/pith/4NJT54U5HB4W7GOKX336DHE7O6","download_json":"https://pith.science/pith/4NJT54U5HB4W7GOKX336DHE7O6.json","view_paper":"https://pith.science/paper/4NJT54U5","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2602.18746&json=true","fetch_graph":"https://pith.science/api/pith-number/4NJT54U5HB4W7GOKX336DHE7O6/graph.json","fetch_events":"https://pith.science/api/pith-number/4NJT54U5HB4W7GOKX336DHE7O6/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/4NJT54U5HB4W7GOKX336DHE7O6/action/timestamp_anchor","attest_storage":"https://pith.science/pith/4NJT54U5HB4W7GOKX336DHE7O6/action/storage_attestation","attest_author":"https://pith.science/pith/4NJT54U5HB4W7GOKX336DHE7O6/action/author_attestation","sign_citation":"https://pith.science/pith/4NJT54U5HB4W7GOKX336DHE7O6/action/citation_signature","submit_replication":"https://pith.science/pith/4NJT54U5HB4W7GOKX336DHE7O6/action/replication_record"}},"created_at":"2026-06-19T16:09:55.244460+00:00","updated_at":"2026-06-19T16:09:55.244460+00:00"}