{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:M6T2WLTQJO6K4LA6SP3IBXMGUC","short_pith_number":"pith:M6T2WLTQ","canonical_record":{"source":{"id":"2604.02784","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2026-04-03T06:48:27Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"699cb3718481908c167998b5c43a73505704b40e97d434ac6fc3880162b41adf","abstract_canon_sha256":"738c55bb07479d179d1811524fbd914f228950bf16dadee89e4261c309996b93"},"schema_version":"1.0"},"canonical_sha256":"67a7ab2e704bbcae2c1e93f680dd86a09558056bd61b5ddb2dc381396fca99b3","source":{"kind":"arxiv","id":"2604.02784","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2604.02784","created_at":"2026-05-20T01:05:12Z"},{"alias_kind":"arxiv_version","alias_value":"2604.02784v2","created_at":"2026-05-20T01:05:12Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.02784","created_at":"2026-05-20T01:05:12Z"},{"alias_kind":"pith_short_12","alias_value":"M6T2WLTQJO6K","created_at":"2026-05-20T01:05:12Z"},{"alias_kind":"pith_short_16","alias_value":"M6T2WLTQJO6K4LA6","created_at":"2026-05-20T01:05:12Z"},{"alias_kind":"pith_short_8","alias_value":"M6T2WLTQ","created_at":"2026-05-20T01:05:12Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:M6T2WLTQJO6K4LA6SP3IBXMGUC","target":"record","payload":{"canonical_record":{"source":{"id":"2604.02784","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2026-04-03T06:48:27Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"699cb3718481908c167998b5c43a73505704b40e97d434ac6fc3880162b41adf","abstract_canon_sha256":"738c55bb07479d179d1811524fbd914f228950bf16dadee89e4261c309996b93"},"schema_version":"1.0"},"canonical_sha256":"67a7ab2e704bbcae2c1e93f680dd86a09558056bd61b5ddb2dc381396fca99b3","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T01:05:12.454200Z","signature_b64":"Ym1qrTdA/jLVDkzWL6n6HsYvVUJntc5qSCAUtm1iqOUdl1lCUR7BLB9TLys/+UkV4C7nXYGZetup9qC39tP+Bg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"67a7ab2e704bbcae2c1e93f680dd86a09558056bd61b5ddb2dc381396fca99b3","last_reissued_at":"2026-05-20T01:05:12.453408Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T01:05:12.453408Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2604.02784","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T01:05:12Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"OnShisi+8ret5WR2EAgnDULNnQjAun+ZKNdp/Vvf+zSFZDnbjDOIp6ObnJLXAljUi3jaYa69NtwfdFsaM5fzBQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T05:38:45.379752Z"},"content_sha256":"bb0f95e3270373878ffbeb30b2c8fcd68696e5557dff41231de71475d7d28295","schema_version":"1.0","event_id":"sha256:bb0f95e3270373878ffbeb30b2c8fcd68696e5557dff41231de71475d7d28295"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:M6T2WLTQJO6K4LA6SP3IBXMGUC","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"EnsemHalDet: Robust VLM Hallucination Detection via Ensemble of Internal State Detectors","license":"http://creativecommons.org/licenses/by-sa/4.0/","headline":"EnsemHalDet improves hallucination detection in vision-language models by ensembling detectors from multiple internal states.","cross_cats":["cs.CL"],"primary_cat":"cs.CV","authors_text":"Kei Harada, Ryuhei Miyazato, Shunsuke Kitada","submitted_at":"2026-04-03T06:48:27Z","abstract_excerpt":"Vision-Language Models (VLMs) excel at multimodal tasks, but they remain vulnerable to hallucinations that are factually incorrect or ungrounded in the input image. Recent work suggests that hallucination detection using internal representations is more efficient and accurate than approaches that rely solely on model outputs. However, existing internal-representation-based methods typically rely on a single representation or detector, limiting their ability to capture diverse hallucination signals. In this paper, we propose EnsemHalDet, an ensemble-based hallucination detection framework that "},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"EnsemHalDet consistently outperforms prior methods and single-detector models in terms of AUC across multiple VQA datasets and VLMs.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That the chosen internal representations (attention outputs and hidden states) supply sufficiently diverse and complementary hallucination signals that can be combined effectively by ensemble learning.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"EnsemHalDet improves hallucination detection in VLMs by ensembling independent detectors on diverse internal states, yielding higher AUC than single-detector baselines on VQA datasets.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"EnsemHalDet improves hallucination detection in vision-language models by ensembling detectors from multiple internal states.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"0a85595d0381d470cd9ec9f1e09a61cc2a5b6e6bd7b0f94606e8401a5242264d"},"source":{"id":"2604.02784","kind":"arxiv","version":2},"verdict":{"id":"30b34599-c9c6-4c77-b55c-40ea78eecf9c","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-13T19:57:09.066792Z","strongest_claim":"EnsemHalDet consistently outperforms prior methods and single-detector models in terms of AUC across multiple VQA datasets and VLMs.","one_line_summary":"EnsemHalDet improves hallucination detection in VLMs by ensembling independent detectors on diverse internal states, yielding higher AUC than single-detector baselines on VQA datasets.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That the chosen internal representations (attention outputs and hidden states) supply sufficiently diverse and complementary hallucination signals that can be combined effectively by ensemble learning.","pith_extraction_headline":"EnsemHalDet improves hallucination detection in vision-language models by ensembling detectors from multiple internal states."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2604.02784/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"30b34599-c9c6-4c77-b55c-40ea78eecf9c"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T01:05:12Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"umGEbb3iQyiuivzUepb7VYtLiV0kEfmHohglraEqobRII8gmgAc05RdRlZaXx4j2sqZF9wrfwp0fDD1sXatTBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T05:38:45.380275Z"},"content_sha256":"37af7609d48250d351514e91ac03957e6faaba7af10c0ceb348f8496be79f2c8","schema_version":"1.0","event_id":"sha256:37af7609d48250d351514e91ac03957e6faaba7af10c0ceb348f8496be79f2c8"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/M6T2WLTQJO6K4LA6SP3IBXMGUC/bundle.json","state_url":"https://pith.science/pith/M6T2WLTQJO6K4LA6SP3IBXMGUC/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/M6T2WLTQJO6K4LA6SP3IBXMGUC/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-26T05:38:45Z","links":{"resolver":"https://pith.science/pith/M6T2WLTQJO6K4LA6SP3IBXMGUC","bundle":"https://pith.science/pith/M6T2WLTQJO6K4LA6SP3IBXMGUC/bundle.json","state":"https://pith.science/pith/M6T2WLTQJO6K4LA6SP3IBXMGUC/state.json","well_known_bundle":"https://pith.science/.well-known/pith/M6T2WLTQJO6K4LA6SP3IBXMGUC/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:M6T2WLTQJO6K4LA6SP3IBXMGUC","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"738c55bb07479d179d1811524fbd914f228950bf16dadee89e4261c309996b93","cross_cats_sorted":["cs.CL"],"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2026-04-03T06:48:27Z","title_canon_sha256":"699cb3718481908c167998b5c43a73505704b40e97d434ac6fc3880162b41adf"},"schema_version":"1.0","source":{"id":"2604.02784","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2604.02784","created_at":"2026-05-20T01:05:12Z"},{"alias_kind":"arxiv_version","alias_value":"2604.02784v2","created_at":"2026-05-20T01:05:12Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.02784","created_at":"2026-05-20T01:05:12Z"},{"alias_kind":"pith_short_12","alias_value":"M6T2WLTQJO6K","created_at":"2026-05-20T01:05:12Z"},{"alias_kind":"pith_short_16","alias_value":"M6T2WLTQJO6K4LA6","created_at":"2026-05-20T01:05:12Z"},{"alias_kind":"pith_short_8","alias_value":"M6T2WLTQ","created_at":"2026-05-20T01:05:12Z"}],"graph_snapshots":[{"event_id":"sha256:37af7609d48250d351514e91ac03957e6faaba7af10c0ceb348f8496be79f2c8","target":"graph","created_at":"2026-05-20T01:05:12Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"EnsemHalDet consistently outperforms prior methods and single-detector models in terms of AUC across multiple VQA datasets and VLMs."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That the chosen internal representations (attention outputs and hidden states) supply sufficiently diverse and complementary hallucination signals that can be combined effectively by ensemble learning."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"EnsemHalDet improves hallucination detection in VLMs by ensembling independent detectors on diverse internal states, yielding higher AUC than single-detector baselines on VQA datasets."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"EnsemHalDet improves hallucination detection in vision-language models by ensembling detectors from multiple internal states."}],"snapshot_sha256":"0a85595d0381d470cd9ec9f1e09a61cc2a5b6e6bd7b0f94606e8401a5242264d"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2604.02784/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Vision-Language Models (VLMs) excel at multimodal tasks, but they remain vulnerable to hallucinations that are factually incorrect or ungrounded in the input image. Recent work suggests that hallucination detection using internal representations is more efficient and accurate than approaches that rely solely on model outputs. However, existing internal-representation-based methods typically rely on a single representation or detector, limiting their ability to capture diverse hallucination signals. In this paper, we propose EnsemHalDet, an ensemble-based hallucination detection framework that ","authors_text":"Kei Harada, Ryuhei Miyazato, Shunsuke Kitada","cross_cats":["cs.CL"],"headline":"EnsemHalDet improves hallucination detection in vision-language models by ensembling detectors from multiple internal states.","license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2026-04-03T06:48:27Z","title":"EnsemHalDet: Robust VLM Hallucination Detection via Ensemble of Internal State Detectors"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2604.02784","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-13T19:57:09.066792Z","id":"30b34599-c9c6-4c77-b55c-40ea78eecf9c","model_set":{"reader":"grok-4.3"},"one_line_summary":"EnsemHalDet improves hallucination detection in VLMs by ensembling independent detectors on diverse internal states, yielding higher AUC than single-detector baselines on VQA datasets.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"EnsemHalDet improves hallucination detection in vision-language models by ensembling detectors from multiple internal states.","strongest_claim":"EnsemHalDet consistently outperforms prior methods and single-detector models in terms of AUC across multiple VQA datasets and VLMs.","weakest_assumption":"That the chosen internal representations (attention outputs and hidden states) supply sufficiently diverse and complementary hallucination signals that can be combined effectively by ensemble learning."}},"verdict_id":"30b34599-c9c6-4c77-b55c-40ea78eecf9c"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:bb0f95e3270373878ffbeb30b2c8fcd68696e5557dff41231de71475d7d28295","target":"record","created_at":"2026-05-20T01:05:12Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"738c55bb07479d179d1811524fbd914f228950bf16dadee89e4261c309996b93","cross_cats_sorted":["cs.CL"],"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2026-04-03T06:48:27Z","title_canon_sha256":"699cb3718481908c167998b5c43a73505704b40e97d434ac6fc3880162b41adf"},"schema_version":"1.0","source":{"id":"2604.02784","kind":"arxiv","version":2}},"canonical_sha256":"67a7ab2e704bbcae2c1e93f680dd86a09558056bd61b5ddb2dc381396fca99b3","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"67a7ab2e704bbcae2c1e93f680dd86a09558056bd61b5ddb2dc381396fca99b3","first_computed_at":"2026-05-20T01:05:12.453408Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-20T01:05:12.453408Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"Ym1qrTdA/jLVDkzWL6n6HsYvVUJntc5qSCAUtm1iqOUdl1lCUR7BLB9TLys/+UkV4C7nXYGZetup9qC39tP+Bg==","signature_status":"signed_v1","signed_at":"2026-05-20T01:05:12.454200Z","signed_message":"canonical_sha256_bytes"},"source_id":"2604.02784","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:bb0f95e3270373878ffbeb30b2c8fcd68696e5557dff41231de71475d7d28295","sha256:37af7609d48250d351514e91ac03957e6faaba7af10c0ceb348f8496be79f2c8"],"state_sha256":"bba9d4be1f9cec745e83805f92c488dddcf08f8d658939f91d8f4cbd65da1130"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"EJ5pZZTpSM7Xlon/bTi/WIWVYcJ5gcy+cNsQBckvHdNWn+udQYF/COyFISSeXxN4KMhKiHLxT//L/ofImZD5Dg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-26T05:38:45.383566Z","bundle_sha256":"bbe56004128ef7e6ab54d0380f9f64168619a5761e456af96444b37e8270ee7f"}}