{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2024:NOPIGFO3Z633YKBFGD3L2Y72HY","short_pith_number":"pith:NOPIGFO3","canonical_record":{"source":{"id":"2402.18400","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.MM","submitted_at":"2024-02-28T15:19:59Z","cross_cats_sorted":[],"title_canon_sha256":"251bf5956aaa966de502d6a6bbcc9dbc22c1ec6328a072e12d1496fc2159bf6b","abstract_canon_sha256":"c836620eb278ab607251925355f1b7707c8bc81153c30e46dfb975b9451164df"},"schema_version":"1.0"},"canonical_sha256":"6b9e8315dbcfb7bc282530f6bd63fa3e1809adc94f3638715c14582cd739459a","source":{"kind":"arxiv","id":"2402.18400","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2402.18400","created_at":"2026-07-05T08:37:17Z"},{"alias_kind":"arxiv_version","alias_value":"2402.18400v2","created_at":"2026-07-05T08:37:17Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2402.18400","created_at":"2026-07-05T08:37:17Z"},{"alias_kind":"pith_short_12","alias_value":"NOPIGFO3Z633","created_at":"2026-07-05T08:37:17Z"},{"alias_kind":"pith_short_16","alias_value":"NOPIGFO3Z633YKBF","created_at":"2026-07-05T08:37:17Z"},{"alias_kind":"pith_short_8","alias_value":"NOPIGFO3","created_at":"2026-07-05T08:37:17Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2024:NOPIGFO3Z633YKBFGD3L2Y72HY","target":"record","payload":{"canonical_record":{"source":{"id":"2402.18400","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.MM","submitted_at":"2024-02-28T15:19:59Z","cross_cats_sorted":[],"title_canon_sha256":"251bf5956aaa966de502d6a6bbcc9dbc22c1ec6328a072e12d1496fc2159bf6b","abstract_canon_sha256":"c836620eb278ab607251925355f1b7707c8bc81153c30e46dfb975b9451164df"},"schema_version":"1.0"},"canonical_sha256":"6b9e8315dbcfb7bc282530f6bd63fa3e1809adc94f3638715c14582cd739459a","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-05T08:37:17.977107Z","signature_b64":"Pgqq1vWC4jFa3GkzESUVF1nReBfqX6KCMIBs8Dg6jnhapov0ewRmRY5ahOPVsqHQoyqnJckmNnQInRiFZ7hkCA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"6b9e8315dbcfb7bc282530f6bd63fa3e1809adc94f3638715c14582cd739459a","last_reissued_at":"2026-07-05T08:37:17.976570Z","signature_status":"signed_v1","first_computed_at":"2026-07-05T08:37:17.976570Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2402.18400","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-07-05T08:37:17Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"4ZDnPx7TO+3bZOexW5LtsuzTyzLpnbcwH7bU2fG0Po7ju/0kY3BZFS/3KLg55p7VW8NFB0xcVSxfJa6RJ39ACw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-05T13:54:28.270667Z"},"content_sha256":"41f0636d019a248c6d2e83ac7223c24c74ef880c81682c3b31ed77e981c1e976","schema_version":"1.0","event_id":"sha256:41f0636d019a248c6d2e83ac7223c24c74ef880c81682c3b31ed77e981c1e976"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2024:NOPIGFO3Z633YKBFGD3L2Y72HY","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Towards Alleviating Text-to-Image Retrieval Hallucination for CLIP in Zero-shot Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.MM","authors_text":"Hanyao Wang, Jun Yu, Liang Ding, Liu Liu, Yan Yang, Yibing Zhan","submitted_at":"2024-02-28T15:19:59Z","abstract_excerpt":"Pretrained cross-modal models, for instance, the most representative CLIP, have recently led to a boom in using pre-trained models for cross-modal zero-shot tasks, considering the generalization properties. However, we analytically discover that CLIP suffers from the text-to-image retrieval hallucination, adversely limiting its capabilities under zero-shot learning: CLIP would select the image with the highest score when asked to figure out which image perfectly matches one given query text among several candidate images even though CLIP knows contents in the image. Accordingly, we propose a B"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2402.18400","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2402.18400/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-07-05T08:37:17Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"V/6rajaW+VCpY8JSVms/OyPrpFHswkA6kwQST5+jqRXp2hiATnY7qSmjzflxRD2IG/zODMRmDQoJTh/e1dGKBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-05T13:54:28.271297Z"},"content_sha256":"49c7124edfab3834268d2dd92589c40948a9c7dba618a37c919e03570a007f8e","schema_version":"1.0","event_id":"sha256:49c7124edfab3834268d2dd92589c40948a9c7dba618a37c919e03570a007f8e"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/NOPIGFO3Z633YKBFGD3L2Y72HY/bundle.json","state_url":"https://pith.science/pith/NOPIGFO3Z633YKBFGD3L2Y72HY/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/NOPIGFO3Z633YKBFGD3L2Y72HY/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-07-05T13:54:28Z","links":{"resolver":"https://pith.science/pith/NOPIGFO3Z633YKBFGD3L2Y72HY","bundle":"https://pith.science/pith/NOPIGFO3Z633YKBFGD3L2Y72HY/bundle.json","state":"https://pith.science/pith/NOPIGFO3Z633YKBFGD3L2Y72HY/state.json","well_known_bundle":"https://pith.science/.well-known/pith/NOPIGFO3Z633YKBFGD3L2Y72HY/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2024:NOPIGFO3Z633YKBFGD3L2Y72HY","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"c836620eb278ab607251925355f1b7707c8bc81153c30e46dfb975b9451164df","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.MM","submitted_at":"2024-02-28T15:19:59Z","title_canon_sha256":"251bf5956aaa966de502d6a6bbcc9dbc22c1ec6328a072e12d1496fc2159bf6b"},"schema_version":"1.0","source":{"id":"2402.18400","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2402.18400","created_at":"2026-07-05T08:37:17Z"},{"alias_kind":"arxiv_version","alias_value":"2402.18400v2","created_at":"2026-07-05T08:37:17Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2402.18400","created_at":"2026-07-05T08:37:17Z"},{"alias_kind":"pith_short_12","alias_value":"NOPIGFO3Z633","created_at":"2026-07-05T08:37:17Z"},{"alias_kind":"pith_short_16","alias_value":"NOPIGFO3Z633YKBF","created_at":"2026-07-05T08:37:17Z"},{"alias_kind":"pith_short_8","alias_value":"NOPIGFO3","created_at":"2026-07-05T08:37:17Z"}],"graph_snapshots":[{"event_id":"sha256:49c7124edfab3834268d2dd92589c40948a9c7dba618a37c919e03570a007f8e","target":"graph","created_at":"2026-07-05T08:37:17Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2402.18400/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Pretrained cross-modal models, for instance, the most representative CLIP, have recently led to a boom in using pre-trained models for cross-modal zero-shot tasks, considering the generalization properties. However, we analytically discover that CLIP suffers from the text-to-image retrieval hallucination, adversely limiting its capabilities under zero-shot learning: CLIP would select the image with the highest score when asked to figure out which image perfectly matches one given query text among several candidate images even though CLIP knows contents in the image. Accordingly, we propose a B","authors_text":"Hanyao Wang, Jun Yu, Liang Ding, Liu Liu, Yan Yang, Yibing Zhan","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.MM","submitted_at":"2024-02-28T15:19:59Z","title":"Towards Alleviating Text-to-Image Retrieval Hallucination for CLIP in Zero-shot Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2402.18400","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:41f0636d019a248c6d2e83ac7223c24c74ef880c81682c3b31ed77e981c1e976","target":"record","created_at":"2026-07-05T08:37:17Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"c836620eb278ab607251925355f1b7707c8bc81153c30e46dfb975b9451164df","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.MM","submitted_at":"2024-02-28T15:19:59Z","title_canon_sha256":"251bf5956aaa966de502d6a6bbcc9dbc22c1ec6328a072e12d1496fc2159bf6b"},"schema_version":"1.0","source":{"id":"2402.18400","kind":"arxiv","version":2}},"canonical_sha256":"6b9e8315dbcfb7bc282530f6bd63fa3e1809adc94f3638715c14582cd739459a","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"6b9e8315dbcfb7bc282530f6bd63fa3e1809adc94f3638715c14582cd739459a","first_computed_at":"2026-07-05T08:37:17.976570Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-07-05T08:37:17.976570Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"Pgqq1vWC4jFa3GkzESUVF1nReBfqX6KCMIBs8Dg6jnhapov0ewRmRY5ahOPVsqHQoyqnJckmNnQInRiFZ7hkCA==","signature_status":"signed_v1","signed_at":"2026-07-05T08:37:17.977107Z","signed_message":"canonical_sha256_bytes"},"source_id":"2402.18400","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:41f0636d019a248c6d2e83ac7223c24c74ef880c81682c3b31ed77e981c1e976","sha256:49c7124edfab3834268d2dd92589c40948a9c7dba618a37c919e03570a007f8e"],"state_sha256":"c456529c29b966117b852950ffab17327625c4b9f60ba82045e5cc2dac110a8d"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"8SZ4oYSSD8B8Pv4Mj2RxDP1v5nEJO6FPyrqcygdPXRGp+rns8jWzUhGmPb2DbXXu4i7Idsn+hLIRRnEljqd/AA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-07-05T13:54:28.274359Z","bundle_sha256":"8432c42f013a72041b25cc3d84be4f411110b2e63bca4fa9e520bc01e8c0c934"}}