{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:43CUU3VYS7C5MGOQSTJSB4QHJB","short_pith_number":"pith:43CUU3VY","canonical_record":{"source":{"id":"2606.02642","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"eess.AS","submitted_at":"2026-05-31T14:00:37Z","cross_cats_sorted":["cs.AI","cs.CV","cs.LG","cs.MM","cs.SD"],"title_canon_sha256":"c42b792a1e4006479a5deec71bbbbf3653e1feea5153e46034ea01e6e82451a6","abstract_canon_sha256":"c33780a069ed39a406fd3d5fa37bfca4ce394d2b0c4e0e8a2e7dcacac63fc1b6"},"schema_version":"1.0"},"canonical_sha256":"e6c54a6eb897c5d619d094d320f2074872bb744da7396e029545ec74592a52e0","source":{"kind":"arxiv","id":"2606.02642","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.02642","created_at":"2026-06-03T00:05:05Z"},{"alias_kind":"arxiv_version","alias_value":"2606.02642v1","created_at":"2026-06-03T00:05:05Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.02642","created_at":"2026-06-03T00:05:05Z"},{"alias_kind":"pith_short_12","alias_value":"43CUU3VYS7C5","created_at":"2026-06-03T00:05:05Z"},{"alias_kind":"pith_short_16","alias_value":"43CUU3VYS7C5MGOQ","created_at":"2026-06-03T00:05:05Z"},{"alias_kind":"pith_short_8","alias_value":"43CUU3VY","created_at":"2026-06-03T00:05:05Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:43CUU3VYS7C5MGOQSTJSB4QHJB","target":"record","payload":{"canonical_record":{"source":{"id":"2606.02642","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"eess.AS","submitted_at":"2026-05-31T14:00:37Z","cross_cats_sorted":["cs.AI","cs.CV","cs.LG","cs.MM","cs.SD"],"title_canon_sha256":"c42b792a1e4006479a5deec71bbbbf3653e1feea5153e46034ea01e6e82451a6","abstract_canon_sha256":"c33780a069ed39a406fd3d5fa37bfca4ce394d2b0c4e0e8a2e7dcacac63fc1b6"},"schema_version":"1.0"},"canonical_sha256":"e6c54a6eb897c5d619d094d320f2074872bb744da7396e029545ec74592a52e0","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-03T00:05:05.430867Z","signature_b64":"czn1mlRiR35Y3iFDv50n95y6YuhbIGi5hgU6+X+iSovvy8fHIWwoS786/C4K98QghEqEuKKrALPJy364YjRkBw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"e6c54a6eb897c5d619d094d320f2074872bb744da7396e029545ec74592a52e0","last_reissued_at":"2026-06-03T00:05:05.430456Z","signature_status":"signed_v1","first_computed_at":"2026-06-03T00:05:05.430456Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2606.02642","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-03T00:05:05Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"itoQUQ8raJCcXmwgjC7c4I4apXP4zfuKlLHztGJCyOUYbTpMFx0QcnDtlEvGGjolU7p0PTxZ8ZVm1ult9xD5Bg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-02T09:22:01.608806Z"},"content_sha256":"2ff501fd71bb64c92a04db022dc1627dc1eee926e7c59f59a62d305c243fc90a","schema_version":"1.0","event_id":"sha256:2ff501fd71bb64c92a04db022dc1627dc1eee926e7c59f59a62d305c243fc90a"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:43CUU3VYS7C5MGOQSTJSB4QHJB","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"SVHalluc: Benchmarking Speech-Vision Hallucination in Audio-Visual Large Language Models","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.CV","cs.LG","cs.MM","cs.SD"],"primary_cat":"eess.AS","authors_text":"Chengxin Liu, Chenshuang Zhang, Kyeong Seon Kim, Tae-Hyun Oh","submitted_at":"2026-05-31T14:00:37Z","abstract_excerpt":"Despite the success of audio-visual large-language models (LLMs), they can produce plausible but ungrounded outputs, termed hallucination. Existing benchmarks focus on environmental sounds (e.g., dog barking) to indicate event occurrence. In contrast, human speech carries fundamentally different, rich semantics and temporal structures, yet it remains unexplored whether current models can accurately align speech content with corresponding visual signals. In this work, we show that speech content can induce hallucinations in audio-visual LLMs. To systematically study this, we introduce SVHalluc,"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.02642","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.02642/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-03T00:05:05Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"tkpaf2PXvUwUyI3k7MhecwwuYgkEMnGOaB7qtZk6RcksfeHnyK1C0PmsuFfF+BFQ2RS3jQ1taxDGi9RUAdhzDw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-02T09:22:01.609193Z"},"content_sha256":"b3f9be7e857b220893bc0c756730083a8cb12b99eccd7a01597c0e7b4bcb6430","schema_version":"1.0","event_id":"sha256:b3f9be7e857b220893bc0c756730083a8cb12b99eccd7a01597c0e7b4bcb6430"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/43CUU3VYS7C5MGOQSTJSB4QHJB/bundle.json","state_url":"https://pith.science/pith/43CUU3VYS7C5MGOQSTJSB4QHJB/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/43CUU3VYS7C5MGOQSTJSB4QHJB/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-07-02T09:22:01Z","links":{"resolver":"https://pith.science/pith/43CUU3VYS7C5MGOQSTJSB4QHJB","bundle":"https://pith.science/pith/43CUU3VYS7C5MGOQSTJSB4QHJB/bundle.json","state":"https://pith.science/pith/43CUU3VYS7C5MGOQSTJSB4QHJB/state.json","well_known_bundle":"https://pith.science/.well-known/pith/43CUU3VYS7C5MGOQSTJSB4QHJB/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:43CUU3VYS7C5MGOQSTJSB4QHJB","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"c33780a069ed39a406fd3d5fa37bfca4ce394d2b0c4e0e8a2e7dcacac63fc1b6","cross_cats_sorted":["cs.AI","cs.CV","cs.LG","cs.MM","cs.SD"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"eess.AS","submitted_at":"2026-05-31T14:00:37Z","title_canon_sha256":"c42b792a1e4006479a5deec71bbbbf3653e1feea5153e46034ea01e6e82451a6"},"schema_version":"1.0","source":{"id":"2606.02642","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.02642","created_at":"2026-06-03T00:05:05Z"},{"alias_kind":"arxiv_version","alias_value":"2606.02642v1","created_at":"2026-06-03T00:05:05Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.02642","created_at":"2026-06-03T00:05:05Z"},{"alias_kind":"pith_short_12","alias_value":"43CUU3VYS7C5","created_at":"2026-06-03T00:05:05Z"},{"alias_kind":"pith_short_16","alias_value":"43CUU3VYS7C5MGOQ","created_at":"2026-06-03T00:05:05Z"},{"alias_kind":"pith_short_8","alias_value":"43CUU3VY","created_at":"2026-06-03T00:05:05Z"}],"graph_snapshots":[{"event_id":"sha256:b3f9be7e857b220893bc0c756730083a8cb12b99eccd7a01597c0e7b4bcb6430","target":"graph","created_at":"2026-06-03T00:05:05Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.02642/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Despite the success of audio-visual large-language models (LLMs), they can produce plausible but ungrounded outputs, termed hallucination. Existing benchmarks focus on environmental sounds (e.g., dog barking) to indicate event occurrence. In contrast, human speech carries fundamentally different, rich semantics and temporal structures, yet it remains unexplored whether current models can accurately align speech content with corresponding visual signals. In this work, we show that speech content can induce hallucinations in audio-visual LLMs. To systematically study this, we introduce SVHalluc,","authors_text":"Chengxin Liu, Chenshuang Zhang, Kyeong Seon Kim, Tae-Hyun Oh","cross_cats":["cs.AI","cs.CV","cs.LG","cs.MM","cs.SD"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"eess.AS","submitted_at":"2026-05-31T14:00:37Z","title":"SVHalluc: Benchmarking Speech-Vision Hallucination in Audio-Visual Large Language Models"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.02642","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:2ff501fd71bb64c92a04db022dc1627dc1eee926e7c59f59a62d305c243fc90a","target":"record","created_at":"2026-06-03T00:05:05Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"c33780a069ed39a406fd3d5fa37bfca4ce394d2b0c4e0e8a2e7dcacac63fc1b6","cross_cats_sorted":["cs.AI","cs.CV","cs.LG","cs.MM","cs.SD"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"eess.AS","submitted_at":"2026-05-31T14:00:37Z","title_canon_sha256":"c42b792a1e4006479a5deec71bbbbf3653e1feea5153e46034ea01e6e82451a6"},"schema_version":"1.0","source":{"id":"2606.02642","kind":"arxiv","version":1}},"canonical_sha256":"e6c54a6eb897c5d619d094d320f2074872bb744da7396e029545ec74592a52e0","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"e6c54a6eb897c5d619d094d320f2074872bb744da7396e029545ec74592a52e0","first_computed_at":"2026-06-03T00:05:05.430456Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-03T00:05:05.430456Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"czn1mlRiR35Y3iFDv50n95y6YuhbIGi5hgU6+X+iSovvy8fHIWwoS786/C4K98QghEqEuKKrALPJy364YjRkBw==","signature_status":"signed_v1","signed_at":"2026-06-03T00:05:05.430867Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.02642","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:2ff501fd71bb64c92a04db022dc1627dc1eee926e7c59f59a62d305c243fc90a","sha256:b3f9be7e857b220893bc0c756730083a8cb12b99eccd7a01597c0e7b4bcb6430"],"state_sha256":"7ffa1641a1ac6ecd5442352cab17895ddbf7a962104403f150a6492be2a651cf"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"SaA5D8goRS/PdCqa5/twFqhNIC8TulGF+ppb59mmtcR7CEiYcyamqKzGOEg+Ny8p7o28yERGYgG//P8lWypSBg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-07-02T09:22:01.611232Z","bundle_sha256":"d2dabbdf671bf873c8e11458fcb5d4b75a18b6ac8229d4d453d91e58ba85d139"}}