{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:AKPT4QRHCFVF5H3ZYHEHZ5FAXF","short_pith_number":"pith:AKPT4QRH","schema_version":"1.0","canonical_sha256":"029f3e4227116a5e9f79c1c87cf4a0b9752234a6b90619e2f97d52eb8d57c69d","source":{"kind":"arxiv","id":"2606.26923","version":1},"attestation_state":"computed","paper":{"title":"GAVEL: Grounded Caption Error Verification and Localization","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Atsushi Hashimoto, Kuniaki Saito, Zixian Gao","submitted_at":"2026-06-25T12:00:45Z","abstract_excerpt":"Vision-language models (VLMs) often produce hallucinated or inconsistent outputs, where text and images are not properly aligned. Addressing this issue requires not only detecting misalignment but also explaining the discrepancy and localizing its visual evidence. We introduce GAVEL (Grounded Caption Error Verification and Localization), a task that jointly addresses verification, explanation, and localization for image-text pairs. To support systematic evaluation, we also present a corresponding dataset and benchmark. We further train a supervised baseline on the human-annotated training spli"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.26923","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-06-25T12:00:45Z","cross_cats_sorted":[],"title_canon_sha256":"ffed647fbea39eca8e1248902e40ed0052780bcf88b49ec6bf4dde5805423fc0","abstract_canon_sha256":"4f018b4ff56fd919ed8d58c841d4b246300ccca637c7e9757bc886dfcb16230a"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-26T01:16:04.213154Z","signature_b64":"Gp/C5FEJqEHt2Kym/YKU83NOXBKL9mcy49ciR294SPPeQuTJdENaBpM5mrnz00dsvr0waR1b0que6abP0CBqAA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"029f3e4227116a5e9f79c1c87cf4a0b9752234a6b90619e2f97d52eb8d57c69d","last_reissued_at":"2026-06-26T01:16:04.212758Z","signature_status":"signed_v1","first_computed_at":"2026-06-26T01:16:04.212758Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"GAVEL: Grounded Caption Error Verification and Localization","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Atsushi Hashimoto, Kuniaki Saito, Zixian Gao","submitted_at":"2026-06-25T12:00:45Z","abstract_excerpt":"Vision-language models (VLMs) often produce hallucinated or inconsistent outputs, where text and images are not properly aligned. Addressing this issue requires not only detecting misalignment but also explaining the discrepancy and localizing its visual evidence. We introduce GAVEL (Grounded Caption Error Verification and Localization), a task that jointly addresses verification, explanation, and localization for image-text pairs. To support systematic evaluation, we also present a corresponding dataset and benchmark. We further train a supervised baseline on the human-annotated training spli"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.26923","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.26923/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.26923","created_at":"2026-06-26T01:16:04.212820+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.26923v1","created_at":"2026-06-26T01:16:04.212820+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.26923","created_at":"2026-06-26T01:16:04.212820+00:00"},{"alias_kind":"pith_short_12","alias_value":"AKPT4QRHCFVF","created_at":"2026-06-26T01:16:04.212820+00:00"},{"alias_kind":"pith_short_16","alias_value":"AKPT4QRHCFVF5H3Z","created_at":"2026-06-26T01:16:04.212820+00:00"},{"alias_kind":"pith_short_8","alias_value":"AKPT4QRH","created_at":"2026-06-26T01:16:04.212820+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/AKPT4QRHCFVF5H3ZYHEHZ5FAXF","json":"https://pith.science/pith/AKPT4QRHCFVF5H3ZYHEHZ5FAXF.json","graph_json":"https://pith.science/api/pith-number/AKPT4QRHCFVF5H3ZYHEHZ5FAXF/graph.json","events_json":"https://pith.science/api/pith-number/AKPT4QRHCFVF5H3ZYHEHZ5FAXF/events.json","paper":"https://pith.science/paper/AKPT4QRH"},"agent_actions":{"view_html":"https://pith.science/pith/AKPT4QRHCFVF5H3ZYHEHZ5FAXF","download_json":"https://pith.science/pith/AKPT4QRHCFVF5H3ZYHEHZ5FAXF.json","view_paper":"https://pith.science/paper/AKPT4QRH","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.26923&json=true","fetch_graph":"https://pith.science/api/pith-number/AKPT4QRHCFVF5H3ZYHEHZ5FAXF/graph.json","fetch_events":"https://pith.science/api/pith-number/AKPT4QRHCFVF5H3ZYHEHZ5FAXF/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/AKPT4QRHCFVF5H3ZYHEHZ5FAXF/action/timestamp_anchor","attest_storage":"https://pith.science/pith/AKPT4QRHCFVF5H3ZYHEHZ5FAXF/action/storage_attestation","attest_author":"https://pith.science/pith/AKPT4QRHCFVF5H3ZYHEHZ5FAXF/action/author_attestation","sign_citation":"https://pith.science/pith/AKPT4QRHCFVF5H3ZYHEHZ5FAXF/action/citation_signature","submit_replication":"https://pith.science/pith/AKPT4QRHCFVF5H3ZYHEHZ5FAXF/action/replication_record"}},"created_at":"2026-06-26T01:16:04.212820+00:00","updated_at":"2026-06-26T01:16:04.212820+00:00"}