{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:SMUSK7IXTVJTAMSNRIWZLWYFP6","short_pith_number":"pith:SMUSK7IX","canonical_record":{"source":{"id":"2605.10893","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-11T17:35:10Z","cross_cats_sorted":[],"title_canon_sha256":"520b3966640d7ace8afa9a7ea8c97592ce5ac4545ecd0961391f3fd8adb4001e","abstract_canon_sha256":"a387993af524cff36faabb839a651d8c05bc86143c44b1cce94dc3d3dcf8da7a"},"schema_version":"1.0"},"canonical_sha256":"9329257d179d5330324d8a2d95db057fb2ac517f81a169b7324ac6e87e36a8c8","source":{"kind":"arxiv","id":"2605.10893","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.10893","created_at":"2026-05-20T00:00:42Z"},{"alias_kind":"arxiv_version","alias_value":"2605.10893v2","created_at":"2026-05-20T00:00:42Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.10893","created_at":"2026-05-20T00:00:42Z"},{"alias_kind":"pith_short_12","alias_value":"SMUSK7IXTVJT","created_at":"2026-05-20T00:00:42Z"},{"alias_kind":"pith_short_16","alias_value":"SMUSK7IXTVJTAMSN","created_at":"2026-05-20T00:00:42Z"},{"alias_kind":"pith_short_8","alias_value":"SMUSK7IX","created_at":"2026-05-20T00:00:42Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:SMUSK7IXTVJTAMSNRIWZLWYFP6","target":"record","payload":{"canonical_record":{"source":{"id":"2605.10893","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-11T17:35:10Z","cross_cats_sorted":[],"title_canon_sha256":"520b3966640d7ace8afa9a7ea8c97592ce5ac4545ecd0961391f3fd8adb4001e","abstract_canon_sha256":"a387993af524cff36faabb839a651d8c05bc86143c44b1cce94dc3d3dcf8da7a"},"schema_version":"1.0"},"canonical_sha256":"9329257d179d5330324d8a2d95db057fb2ac517f81a169b7324ac6e87e36a8c8","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:00:42.570530Z","signature_b64":"o9kNqVPLg92MkjJTbYW8ISpnjmuWdoB6xsXqaa3oXuEmnRpPleNQfc4MpiwCFdcPOJDILpoDtjBkC9HrqUYgBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"9329257d179d5330324d8a2d95db057fb2ac517f81a169b7324ac6e87e36a8c8","last_reissued_at":"2026-05-20T00:00:42.569712Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:00:42.569712Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.10893","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:00:42Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"e3U0EQokHHOiWBrlV3VKyKTpHQfyk3kNKFHGcMtUCNy8KZazq04M8q8NP48B76V8VDWt53xL7Fbi8P9DrZ3aBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-21T06:42:21.545551Z"},"content_sha256":"54b6b9125ab08ab17ae4e4d04b31b4d37a3857cc6534055dfc9048e29845c39f","schema_version":"1.0","event_id":"sha256:54b6b9125ab08ab17ae4e4d04b31b4d37a3857cc6534055dfc9048e29845c39f"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:SMUSK7IXTVJTAMSNRIWZLWYFP6","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Grounded or Guessing? LVLM Confidence Estimation via Blind-Image Contrastive Ranking","license":"http://creativecommons.org/licenses/by/4.0/","headline":"Training probes to prefer real images over blacked-out ones lets them detect when vision-language models actually use visual input for their answers.","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Charese H. Smiley, Erfan Miahi, Ivan Brugere, Kundan Thind, Mohammad M. Ghassemi, Reza Khanmohammadi, Simerjot Kaur","submitted_at":"2026-05-11T17:35:10Z","abstract_excerpt":"Large vision-language models suffer from visual ungroundedness: they can produce a fluent, confident, and even correct response driven entirely by language priors, with the image contributing nothing to the prediction. Existing confidence estimation methods cannot detect this, as they observe model behavior under normal inference with no mechanism to determine whether a prediction was shaped by the image or by text alone. We introduce BICR (Blind-Image Contrastive Ranking), a model-agnostic confidence estimation framework that makes this contrast explicit during training by extracting hidden s"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"BICR achieves the best cross-LVLM average on both calibration and discrimination simultaneously, with statistically significant discrimination gains robust to cluster-aware analysis at 4-18x fewer parameters than the strongest probing baseline.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That penalizing higher confidence on the blacked-out image view via ranking loss on hidden states will cause the probe to reliably treat the presence of visual information as a signal of prediction reliability (abstract, paragraph describing the training objective).","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"BICR trains a lightweight probe on contrastive hidden states from real versus blind images to detect visual grounding in LVLM predictions, outperforming baselines on calibration and discrimination with fewer parameters.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Training probes to prefer real images over blacked-out ones lets them detect when vision-language models actually use visual input for their answers.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"ba5dc5b77126998e32c2d3d816ee0c971f296ebdaf023a2c291fac1e7106a93a"},"source":{"id":"2605.10893","kind":"arxiv","version":2},"verdict":{"id":"0dd4474e-0031-4bdc-9e24-459b55e2c4ad","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-19T17:04:36.427796Z","strongest_claim":"BICR achieves the best cross-LVLM average on both calibration and discrimination simultaneously, with statistically significant discrimination gains robust to cluster-aware analysis at 4-18x fewer parameters than the strongest probing baseline.","one_line_summary":"BICR trains a lightweight probe on contrastive hidden states from real versus blind images to detect visual grounding in LVLM predictions, outperforming baselines on calibration and discrimination with fewer parameters.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That penalizing higher confidence on the blacked-out image view via ranking loss on hidden states will cause the probe to reliably treat the presence of visual information as a signal of prediction reliability (abstract, paragraph describing the training objective).","pith_extraction_headline":"Training probes to prefer real images over blacked-out ones lets them detect when vision-language models actually use visual input for their answers."},"integrity":{"clean":false,"summary":{"advisory":2,"critical":0,"by_detector":{"doi_compliance":{"total":2,"advisory":2,"critical":0,"informational":0}},"informational":0},"endpoint":"/pith/2605.10893/integrity.json","findings":[{"note":"DOI in the printed bibliography is fragmented by whitespace or line breaks. A longer candidate (10.18653/v1/2025.findings-acl.99.URLhttps://aclanthology.org/2025.findings-acl.99/.11) was visible in the surrounding text but could not be confirmed against doi.org as printed.","detector":"doi_compliance","severity":"advisory","ref_index":3,"audited_at":"2026-05-19T08:55:11.671278Z","detected_doi":"10.18653/v1/2025.findings-acl.99.URLhttps://aclanthology.org/2025.findings-acl.99/.11","finding_type":"recoverable_identifier","verdict_class":"incontrovertible","detected_arxiv_id":null},{"note":"DOI in the printed bibliography is fragmented by whitespace or line breaks. A longer candidate (10.18653/v1/2025.emnlp-main.73.URLhttps://aclanthology.org/2025.emnlp-main.73/) was visible in the surrounding text but could not be confirmed against doi.org as printed.","detector":"doi_compliance","severity":"advisory","ref_index":12,"audited_at":"2026-05-19T08:55:11.671278Z","detected_doi":"10.18653/v1/2025.emnlp-main.73.URLhttps://aclanthology.org/2025.emnlp-main.73/","finding_type":"recoverable_identifier","verdict_class":"incontrovertible","detected_arxiv_id":null}],"available":true,"detectors_run":[{"name":"ai_meta_artifact","ran_at":"2026-05-19T14:33:45.379386Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"doi_title_agreement","ran_at":"2026-05-19T10:31:17.335542Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"doi_compliance","ran_at":"2026-05-19T08:55:11.671278Z","status":"completed","version":"1.0.0","findings_count":2}],"snapshot_sha256":"68fae0da717fbc8be76998cfc655f9f5b76fdcd95b1306042502a689b16d15b9"},"references":{"count":61,"sample":[{"doi":"","year":2025,"title":"Mitigating hallucination in large vision- language models via modular attribution and intervention","work_id":"c346e8c3-51df-4829-a1aa-13fe41858f17","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2025,"title":"Don’t miss the forest for the trees: Attentional vision calibration for large vision language models","work_id":"60c01516-b90a-4dea-a2ff-215896e2de46","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"10.18653/v1/","year":2025,"title":"doi: 10.18653/v1/ 2024.findings-acl.586","work_id":"8d675bdd-79ca-48d6-9163-fc17ce0e8ece","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2025,"title":"Hidden in plain sight: VLMs overlook their visual representations","work_id":"05ae64ec-2694-4d04-9002-80813085820a","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"10.18653/v1/2024.findings-emnlp.262","year":2024,"title":"Reference- free hallucination detection for large vision-language models","work_id":"9851845e-05e5-4691-84d9-fa8f4df8d535","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":61,"snapshot_sha256":"a853f4e18b2de02769827e59c3b689e072c5f67338321bba1cd926b4159d93ea","internal_anchors":11},"formal_canon":{"evidence_count":2,"snapshot_sha256":"1e993af0af2940dea1ffc1168998a42163bb6129ac79bbbbccf3f4abd552d79b"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"0dd4474e-0031-4bdc-9e24-459b55e2c4ad"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:00:42Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"2WRs8+edgX/VFaNH5E/sVMnz8rVXYidwMipKr8b+dYJDpSW7dW1NZ/G6QEs0vjQTiJKxWPB/KFKMD9XLMyZwBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-21T06:42:21.546376Z"},"content_sha256":"92d7878286b9296092fb53062d0fcf7dc084255ec77897f60cf75b66549173a7","schema_version":"1.0","event_id":"sha256:92d7878286b9296092fb53062d0fcf7dc084255ec77897f60cf75b66549173a7"},{"event_type":"integrity_finding","subject_pith_number":"pith:2026:SMUSK7IXTVJTAMSNRIWZLWYFP6","target":"integrity","payload":{"note":"DOI in the printed bibliography is fragmented by whitespace or line breaks. A longer candidate (10.18653/v1/2025.emnlp-main.73.URLhttps://aclanthology.org/2025.emnlp-main.73/) was visible in the surrounding text but could not be confirmed against doi.org as printed.","snippet":"Qingcheng Zeng, Weihao Xuan, Leyang Cui, and Rob V oigt. Thinking out loud: Do reasoning models know when they’re right? In Christos Christodoulopoulos, Tanmoy Chakraborty, Carolyn Rose, and Violet Peng, editors,Proceedings of the 2025 Conf","arxiv_id":"2605.10893","detector":"doi_compliance","evidence":{"ref_index":12,"verdict_class":"incontrovertible","resolved_title":null,"printed_excerpt":"Qingcheng Zeng, Weihao Xuan, Leyang Cui, and Rob V oigt. Thinking out loud: Do reasoning models know when they’re right? In Christos Christodoulopoulos, Tanmoy Chakraborty, Carolyn Rose, and Violet Peng, editors,Proceedings of the 2025 Conf","reconstructed_doi":"10.18653/v1/2025.emnlp-main.73.URLhttps://aclanthology.org/2025.emnlp-main.73/"},"severity":"advisory","ref_index":12,"audited_at":"2026-05-19T08:55:11.671278Z","event_type":"pith.integrity.v1","detected_doi":"10.18653/v1/2025.emnlp-main.73.URLhttps://aclanthology.org/2025.emnlp-main.73/","detector_url":"https://pith.science/pith-integrity-protocol#doi_compliance","external_url":null,"finding_type":"recoverable_identifier","evidence_hash":"7e4908c58d6354c2e6bd1faacc6066fa4434546e74318f4d4a0892a5005c5567","paper_version":1,"verdict_class":"incontrovertible","resolved_title":null,"detector_version":"1.0.0","detected_arxiv_id":null,"integrity_event_id":533,"payload_sha256":"d9fbd03375744c7e2809ac800c57dbcd2a71a90e2bcdffd1f472d4b2f361722d","signature_b64":"Ok+vxHAYgE3Mld8DSlO6BRGXxTJJ2QJOUPdZ8M9BcGrD/C86iuiPSh82YBR0FyUcLKGN8DleWEmsk3/0qi1eAg==","signing_key_id":"pith-v1-2026-05"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-19T08:56:57Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"hCSrcCsYoihlDBwKysZ4A8kbeGG3bP4uftJMtGGv5N5ZpUDk5GJ+HspI0bAzsOwg+6wFU/6r/BG5edj+vJIUDQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-21T06:42:21.547328Z"},"content_sha256":"67ed4dc696e2cc029cf9d1c3979af11d5da336cee4bb239de3d6fbe7e02043cb","schema_version":"1.0","event_id":"sha256:67ed4dc696e2cc029cf9d1c3979af11d5da336cee4bb239de3d6fbe7e02043cb"},{"event_type":"integrity_finding","subject_pith_number":"pith:2026:SMUSK7IXTVJTAMSNRIWZLWYFP6","target":"integrity","payload":{"note":"DOI in the printed bibliography is fragmented by whitespace or line breaks. A longer candidate (10.18653/v1/2025.findings-acl.99.URLhttps://aclanthology.org/2025.findings-acl.99/.11) was visible in the surrounding text but could not be confirmed against doi.org as printed.","snippet":"Association for Computational Linguistics. ISBN 979-8-89176-256-5. doi: 10.18653/v1/ 2025.findings-acl.99. URLhttps://aclanthology.org/2025.findings-acl.99/. 11","arxiv_id":"2605.10893","detector":"doi_compliance","evidence":{"ref_index":3,"verdict_class":"incontrovertible","resolved_title":null,"printed_excerpt":"Association for Computational Linguistics. ISBN 979-8-89176-256-5. doi: 10.18653/v1/ 2025.findings-acl.99. URLhttps://aclanthology.org/2025.findings-acl.99/. 11","reconstructed_doi":"10.18653/v1/2025.findings-acl.99.URLhttps://aclanthology.org/2025.findings-acl.99/.11"},"severity":"advisory","ref_index":3,"audited_at":"2026-05-19T08:55:11.671278Z","event_type":"pith.integrity.v1","detected_doi":"10.18653/v1/2025.findings-acl.99.URLhttps://aclanthology.org/2025.findings-acl.99/.11","detector_url":"https://pith.science/pith-integrity-protocol#doi_compliance","external_url":null,"finding_type":"recoverable_identifier","evidence_hash":"193aa245a3c6e95879a626ca5c31b2b090b4f926d168165ef438c2f8ab8ef5af","paper_version":1,"verdict_class":"incontrovertible","resolved_title":null,"detector_version":"1.0.0","detected_arxiv_id":null,"integrity_event_id":532,"payload_sha256":"27e5e2c91ca0e83079a35ea24bc7dfae66c2a3c43641a3574b2a9f8c3e66f849","signature_b64":"kLQ53QQsTREHKmSCOWGh4FdU09E6F6kQnVKccDuoM1XO0eYRiTaKI7K5u3MRrd9hD3OA4TQwm/vkaDQkHlVrAQ==","signing_key_id":"pith-v1-2026-05"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-19T08:56:57Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"qPiwo5tbQjoNHo5DZMIPhT+b7AS59Mh1ezYCtglMKaXPBsgArKIjPmdWlgxNZhUGdpsYnYh/LNHB1wg7FXmCDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-21T06:42:21.547622Z"},"content_sha256":"b68424edb680b8fcc4eda92caf56956c29d18b0f308192c4bc4401a78b0f1076","schema_version":"1.0","event_id":"sha256:b68424edb680b8fcc4eda92caf56956c29d18b0f308192c4bc4401a78b0f1076"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/SMUSK7IXTVJTAMSNRIWZLWYFP6/bundle.json","state_url":"https://pith.science/pith/SMUSK7IXTVJTAMSNRIWZLWYFP6/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/SMUSK7IXTVJTAMSNRIWZLWYFP6/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-21T06:42:21Z","links":{"resolver":"https://pith.science/pith/SMUSK7IXTVJTAMSNRIWZLWYFP6","bundle":"https://pith.science/pith/SMUSK7IXTVJTAMSNRIWZLWYFP6/bundle.json","state":"https://pith.science/pith/SMUSK7IXTVJTAMSNRIWZLWYFP6/state.json","well_known_bundle":"https://pith.science/.well-known/pith/SMUSK7IXTVJTAMSNRIWZLWYFP6/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:SMUSK7IXTVJTAMSNRIWZLWYFP6","merge_version":"pith-open-graph-merge-v1","event_count":4,"valid_event_count":4,"invalid_event_count":0,"equivocation_count":1,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"a387993af524cff36faabb839a651d8c05bc86143c44b1cce94dc3d3dcf8da7a","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-11T17:35:10Z","title_canon_sha256":"520b3966640d7ace8afa9a7ea8c97592ce5ac4545ecd0961391f3fd8adb4001e"},"schema_version":"1.0","source":{"id":"2605.10893","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.10893","created_at":"2026-05-20T00:00:42Z"},{"alias_kind":"arxiv_version","alias_value":"2605.10893v2","created_at":"2026-05-20T00:00:42Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.10893","created_at":"2026-05-20T00:00:42Z"},{"alias_kind":"pith_short_12","alias_value":"SMUSK7IXTVJT","created_at":"2026-05-20T00:00:42Z"},{"alias_kind":"pith_short_16","alias_value":"SMUSK7IXTVJTAMSN","created_at":"2026-05-20T00:00:42Z"},{"alias_kind":"pith_short_8","alias_value":"SMUSK7IX","created_at":"2026-05-20T00:00:42Z"}],"graph_snapshots":[{"event_id":"sha256:92d7878286b9296092fb53062d0fcf7dc084255ec77897f60cf75b66549173a7","target":"graph","created_at":"2026-05-20T00:00:42Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"BICR achieves the best cross-LVLM average on both calibration and discrimination simultaneously, with statistically significant discrimination gains robust to cluster-aware analysis at 4-18x fewer parameters than the strongest probing baseline."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That penalizing higher confidence on the blacked-out image view via ranking loss on hidden states will cause the probe to reliably treat the presence of visual information as a signal of prediction reliability (abstract, paragraph describing the training objective)."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"BICR trains a lightweight probe on contrastive hidden states from real versus blind images to detect visual grounding in LVLM predictions, outperforming baselines on calibration and discrimination with fewer parameters."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Training probes to prefer real images over blacked-out ones lets them detect when vision-language models actually use visual input for their answers."}],"snapshot_sha256":"ba5dc5b77126998e32c2d3d816ee0c971f296ebdaf023a2c291fac1e7106a93a"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"1e993af0af2940dea1ffc1168998a42163bb6129ac79bbbbccf3f4abd552d79b"},"integrity":{"available":true,"clean":false,"detectors_run":[{"findings_count":0,"name":"ai_meta_artifact","ran_at":"2026-05-19T14:33:45.379386Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"doi_title_agreement","ran_at":"2026-05-19T10:31:17.335542Z","status":"completed","version":"1.0.0"},{"findings_count":2,"name":"doi_compliance","ran_at":"2026-05-19T08:55:11.671278Z","status":"completed","version":"1.0.0"}],"endpoint":"/pith/2605.10893/integrity.json","findings":[{"audited_at":"2026-05-19T08:55:11.671278Z","detected_arxiv_id":null,"detected_doi":"10.18653/v1/2025.findings-acl.99.URLhttps://aclanthology.org/2025.findings-acl.99/.11","detector":"doi_compliance","finding_type":"recoverable_identifier","note":"DOI in the printed bibliography is fragmented by whitespace or line breaks. A longer candidate (10.18653/v1/2025.findings-acl.99.URLhttps://aclanthology.org/2025.findings-acl.99/.11) was visible in the surrounding text but could not be confirmed against doi.org as printed.","ref_index":3,"severity":"advisory","verdict_class":"incontrovertible"},{"audited_at":"2026-05-19T08:55:11.671278Z","detected_arxiv_id":null,"detected_doi":"10.18653/v1/2025.emnlp-main.73.URLhttps://aclanthology.org/2025.emnlp-main.73/","detector":"doi_compliance","finding_type":"recoverable_identifier","note":"DOI in the printed bibliography is fragmented by whitespace or line breaks. A longer candidate (10.18653/v1/2025.emnlp-main.73.URLhttps://aclanthology.org/2025.emnlp-main.73/) was visible in the surrounding text but could not be confirmed against doi.org as printed.","ref_index":12,"severity":"advisory","verdict_class":"incontrovertible"}],"snapshot_sha256":"68fae0da717fbc8be76998cfc655f9f5b76fdcd95b1306042502a689b16d15b9","summary":{"advisory":2,"by_detector":{"doi_compliance":{"advisory":2,"critical":0,"informational":0,"total":2}},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Large vision-language models suffer from visual ungroundedness: they can produce a fluent, confident, and even correct response driven entirely by language priors, with the image contributing nothing to the prediction. Existing confidence estimation methods cannot detect this, as they observe model behavior under normal inference with no mechanism to determine whether a prediction was shaped by the image or by text alone. We introduce BICR (Blind-Image Contrastive Ranking), a model-agnostic confidence estimation framework that makes this contrast explicit during training by extracting hidden s","authors_text":"Charese H. Smiley, Erfan Miahi, Ivan Brugere, Kundan Thind, Mohammad M. Ghassemi, Reza Khanmohammadi, Simerjot Kaur","cross_cats":[],"headline":"Training probes to prefer real images over blacked-out ones lets them detect when vision-language models actually use visual input for their answers.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-11T17:35:10Z","title":"Grounded or Guessing? LVLM Confidence Estimation via Blind-Image Contrastive Ranking"},"references":{"count":61,"internal_anchors":11,"resolved_work":61,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"Mitigating hallucination in large vision- language models via modular attribution and intervention","work_id":"c346e8c3-51df-4829-a1aa-13fe41858f17","year":2025},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"Don’t miss the forest for the trees: Attentional vision calibration for large vision language models","work_id":"60c01516-b90a-4dea-a2ff-215896e2de46","year":2025},{"cited_arxiv_id":"","doi":"10.18653/v1/","is_internal_anchor":false,"ref_index":3,"title":"doi: 10.18653/v1/ 2024.findings-acl.586","work_id":"8d675bdd-79ca-48d6-9163-fc17ce0e8ece","year":2025},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"Hidden in plain sight: VLMs overlook their visual representations","work_id":"05ae64ec-2694-4d04-9002-80813085820a","year":2025},{"cited_arxiv_id":"","doi":"10.18653/v1/2024.findings-emnlp.262","is_internal_anchor":false,"ref_index":5,"title":"Reference- free hallucination detection for large vision-language models","work_id":"9851845e-05e5-4691-84d9-fa8f4df8d535","year":2024}],"snapshot_sha256":"a853f4e18b2de02769827e59c3b689e072c5f67338321bba1cd926b4159d93ea"},"source":{"id":"2605.10893","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-19T17:04:36.427796Z","id":"0dd4474e-0031-4bdc-9e24-459b55e2c4ad","model_set":{"reader":"grok-4.3"},"one_line_summary":"BICR trains a lightweight probe on contrastive hidden states from real versus blind images to detect visual grounding in LVLM predictions, outperforming baselines on calibration and discrimination with fewer parameters.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Training probes to prefer real images over blacked-out ones lets them detect when vision-language models actually use visual input for their answers.","strongest_claim":"BICR achieves the best cross-LVLM average on both calibration and discrimination simultaneously, with statistically significant discrimination gains robust to cluster-aware analysis at 4-18x fewer parameters than the strongest probing baseline.","weakest_assumption":"That penalizing higher confidence on the blacked-out image view via ranking loss on hidden states will cause the probe to reliably treat the presence of visual information as a signal of prediction reliability (abstract, paragraph describing the training objective)."}},"verdict_id":"0dd4474e-0031-4bdc-9e24-459b55e2c4ad"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:54b6b9125ab08ab17ae4e4d04b31b4d37a3857cc6534055dfc9048e29845c39f","target":"record","created_at":"2026-05-20T00:00:42Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"a387993af524cff36faabb839a651d8c05bc86143c44b1cce94dc3d3dcf8da7a","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-11T17:35:10Z","title_canon_sha256":"520b3966640d7ace8afa9a7ea8c97592ce5ac4545ecd0961391f3fd8adb4001e"},"schema_version":"1.0","source":{"id":"2605.10893","kind":"arxiv","version":2}},"canonical_sha256":"9329257d179d5330324d8a2d95db057fb2ac517f81a169b7324ac6e87e36a8c8","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"9329257d179d5330324d8a2d95db057fb2ac517f81a169b7324ac6e87e36a8c8","first_computed_at":"2026-05-20T00:00:42.569712Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-20T00:00:42.569712Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"o9kNqVPLg92MkjJTbYW8ISpnjmuWdoB6xsXqaa3oXuEmnRpPleNQfc4MpiwCFdcPOJDILpoDtjBkC9HrqUYgBA==","signature_status":"signed_v1","signed_at":"2026-05-20T00:00:42.570530Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.10893","source_kind":"arxiv","source_version":2}}},"equivocations":[{"signer_id":"pith.science","event_type":"integrity_finding","target":"integrity","event_ids":["sha256:67ed4dc696e2cc029cf9d1c3979af11d5da336cee4bb239de3d6fbe7e02043cb","sha256:b68424edb680b8fcc4eda92caf56956c29d18b0f308192c4bc4401a78b0f1076"]}],"invalid_events":[],"applied_event_ids":["sha256:54b6b9125ab08ab17ae4e4d04b31b4d37a3857cc6534055dfc9048e29845c39f","sha256:92d7878286b9296092fb53062d0fcf7dc084255ec77897f60cf75b66549173a7"],"state_sha256":"576d24d1fdf9d2b55c99764d57024c08a227b2cae604c0c45652d33663e752f0"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"nYtln0RPzBntla/oY2oggLR1P4RmjEKngNlT357MelmwXt8XOfCNjKWIpQRowoKKHTwxkiAv77TJmfl/Y5fDDg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-21T06:42:21.550829Z","bundle_sha256":"585d4e5833137a9866c9e5ee8610e4d8a233c2b5ac1847ba470b3e9a38ca2f68"}}