{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:QIJS7POPXR6RYWPL2KUE7Y3VYC","short_pith_number":"pith:QIJS7POP","canonical_record":{"source":{"id":"2605.13277","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-13T09:54:31Z","cross_cats_sorted":["cs.AI","cs.CV","cs.IR","cs.LG"],"title_canon_sha256":"9274f354ba30343f0b53f3f8d3261e368967506e66d00baef791b534caabfefa","abstract_canon_sha256":"a0eb75b8a0c660ed2a4f81960274c6969fbfa096c82856ae7542a61892f5e7d0"},"schema_version":"1.0"},"canonical_sha256":"82132fbdcfbc7d1c59ebd2a84fe375c09ce915c88ee618fca9b944316fd2c959","source":{"kind":"arxiv","id":"2605.13277","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.13277","created_at":"2026-05-18T02:44:49Z"},{"alias_kind":"arxiv_version","alias_value":"2605.13277v1","created_at":"2026-05-18T02:44:49Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.13277","created_at":"2026-05-18T02:44:49Z"},{"alias_kind":"pith_short_12","alias_value":"QIJS7POPXR6R","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"QIJS7POPXR6RYWPL","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"QIJS7POP","created_at":"2026-05-18T12:33:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:QIJS7POPXR6RYWPL2KUE7Y3VYC","target":"record","payload":{"canonical_record":{"source":{"id":"2605.13277","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-13T09:54:31Z","cross_cats_sorted":["cs.AI","cs.CV","cs.IR","cs.LG"],"title_canon_sha256":"9274f354ba30343f0b53f3f8d3261e368967506e66d00baef791b534caabfefa","abstract_canon_sha256":"a0eb75b8a0c660ed2a4f81960274c6969fbfa096c82856ae7542a61892f5e7d0"},"schema_version":"1.0"},"canonical_sha256":"82132fbdcfbc7d1c59ebd2a84fe375c09ce915c88ee618fca9b944316fd2c959","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T02:44:49.229283Z","signature_b64":"9ivqBOo/Hszp01f7pKiKn3UcoEo1bGmRz1ia9RDJ48C/OK/LFLAM7BgHYTr80eqoC6Y7/BraZHjnNO7er9+gCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"82132fbdcfbc7d1c59ebd2a84fe375c09ce915c88ee618fca9b944316fd2c959","last_reissued_at":"2026-05-18T02:44:49.228866Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T02:44:49.228866Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.13277","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T02:44:49Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"9RwGqUmYOE4EOL5OqbQi/zJa6Le4ey0tzYB2fWHGvyJyAFh/vgQtV0CYV1SumAk+8DcoMv21fQu80gwluUMeCg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T15:20:06.615573Z"},"content_sha256":"9b97ed36ebc82f17b7bae13445181045d6ebf3519311dfe3e25e434523caa16e","schema_version":"1.0","event_id":"sha256:9b97ed36ebc82f17b7bae13445181045d6ebf3519311dfe3e25e434523caa16e"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:QIJS7POPXR6RYWPL2KUE7Y3VYC","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Utility-Oriented Visual Evidence Selection for Multimodal Retrieval-Augmented Generation","license":"http://creativecommons.org/licenses/by/4.0/","headline":"Ranking visual evidence by information gain on a latent helpfulness variable matches its answer-space utility in multimodal RAG","cross_cats":["cs.AI","cs.CV","cs.IR","cs.LG"],"primary_cat":"cs.CL","authors_text":"Haofeng Zhang, Weiqing Luo, Xiao Wang, Zhiyuan Yu, Ziyi Huang, Zongye Hu","submitted_at":"2026-05-13T09:54:31Z","abstract_excerpt":"Visual evidence selection is a critical component of multimodal retrieval-augmented generation (RAG), yet existing methods typically rely on semantic relevance or surface-level similarity, which are often misaligned with the actual utility of visual evidence for downstream reasoning. We reformulate multimodal evidence selection from an information-theoretic perspective by defining evidence utility as the information gain induced on a model's output distribution. To overcome the intractability of answer-space optimization, we introduce a latent notion of evidence helpfulness and theoretically s"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"ranking evidence by information gain on this latent variable is equivalent to answer-space utility","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"under mild assumptions","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"Evidence utility is defined as information gain on the model's output distribution, with ranking by gain on a latent helpfulness variable shown equivalent to answer-space utility under mild assumptions, enabling a training-free surrogate framework that outperforms baselines.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Ranking visual evidence by information gain on a latent helpfulness variable matches its answer-space utility in multimodal RAG","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"43252e453d2c92aee16fccff049ee089523766acbfc4bfe88e0dd91c033d5d93"},"source":{"id":"2605.13277","kind":"arxiv","version":1},"verdict":{"id":"79da5243-cbc0-4d46-955a-e9a4fcc5c9ea","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-14T19:06:00.400435Z","strongest_claim":"ranking evidence by information gain on this latent variable is equivalent to answer-space utility","one_line_summary":"Evidence utility is defined as information gain on the model's output distribution, with ranking by gain on a latent helpfulness variable shown equivalent to answer-space utility under mild assumptions, enabling a training-free surrogate framework that outperforms baselines.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"under mild assumptions","pith_extraction_headline":"Ranking visual evidence by information gain on a latent helpfulness variable matches its answer-space utility in multimodal RAG"},"references":{"count":43,"sample":[{"doi":"","year":null,"title":"VLM2Vec: Training Vision-Language Models for Massive Multimodal Embedding Tasks","work_id":"ec376227-cc62-4775-9e03-bed3862151d0","ref_index":1,"cited_arxiv_id":"2410.05160","is_internal_anchor":true},{"doi":"","year":null,"title":"Vlm2vec-v2: Advancing multimodal em- bedding for videos, images, and visual documents","work_id":"645b5239-8642-4bd5-a661-7ff075ceb306","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers) , pages=","work_id":"f31acd76-d74b-4edb-aed0-20b9256ab082","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"E5-V: Universal Embeddings with Multimodal Large Language Models","work_id":"df2e178e-bd96-48f4-8431-51c61fbc63fd","ref_index":4,"cited_arxiv_id":"2407.12580","is_internal_anchor":true},{"doi":"","year":null,"title":"GME: Improving Universal Multimodal Retrieval by Multimodal","work_id":"a99a6453-de04-4fcb-8376-b10bf7dd68c7","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":43,"snapshot_sha256":"69ca431a7ca9f161e76ff13a3e48b127b21d43f61226233ae9f239837e85eda3","internal_anchors":11},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"79da5243-cbc0-4d46-955a-e9a4fcc5c9ea"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T02:44:49Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"kZUfKXQBtfUySVJmymfR9PiPHBwZ5RrD9Iix7QE3W6ujalo+R4JMeRPXy7NP4LN0lXHLQJ0w5YW3nNeRhiiyBQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T15:20:06.616324Z"},"content_sha256":"c5e44329d286823fecef0c1883639137a4b39cae43f44457f5d7d8a92ed19b84","schema_version":"1.0","event_id":"sha256:c5e44329d286823fecef0c1883639137a4b39cae43f44457f5d7d8a92ed19b84"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/QIJS7POPXR6RYWPL2KUE7Y3VYC/bundle.json","state_url":"https://pith.science/pith/QIJS7POPXR6RYWPL2KUE7Y3VYC/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/QIJS7POPXR6RYWPL2KUE7Y3VYC/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-26T15:20:06Z","links":{"resolver":"https://pith.science/pith/QIJS7POPXR6RYWPL2KUE7Y3VYC","bundle":"https://pith.science/pith/QIJS7POPXR6RYWPL2KUE7Y3VYC/bundle.json","state":"https://pith.science/pith/QIJS7POPXR6RYWPL2KUE7Y3VYC/state.json","well_known_bundle":"https://pith.science/.well-known/pith/QIJS7POPXR6RYWPL2KUE7Y3VYC/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:QIJS7POPXR6RYWPL2KUE7Y3VYC","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"a0eb75b8a0c660ed2a4f81960274c6969fbfa096c82856ae7542a61892f5e7d0","cross_cats_sorted":["cs.AI","cs.CV","cs.IR","cs.LG"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-13T09:54:31Z","title_canon_sha256":"9274f354ba30343f0b53f3f8d3261e368967506e66d00baef791b534caabfefa"},"schema_version":"1.0","source":{"id":"2605.13277","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.13277","created_at":"2026-05-18T02:44:49Z"},{"alias_kind":"arxiv_version","alias_value":"2605.13277v1","created_at":"2026-05-18T02:44:49Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.13277","created_at":"2026-05-18T02:44:49Z"},{"alias_kind":"pith_short_12","alias_value":"QIJS7POPXR6R","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"QIJS7POPXR6RYWPL","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"QIJS7POP","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:c5e44329d286823fecef0c1883639137a4b39cae43f44457f5d7d8a92ed19b84","target":"graph","created_at":"2026-05-18T02:44:49Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"ranking evidence by information gain on this latent variable is equivalent to answer-space utility"},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"under mild assumptions"},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"Evidence utility is defined as information gain on the model's output distribution, with ranking by gain on a latent helpfulness variable shown equivalent to answer-space utility under mild assumptions, enabling a training-free surrogate framework that outperforms baselines."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Ranking visual evidence by information gain on a latent helpfulness variable matches its answer-space utility in multimodal RAG"}],"snapshot_sha256":"43252e453d2c92aee16fccff049ee089523766acbfc4bfe88e0dd91c033d5d93"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Visual evidence selection is a critical component of multimodal retrieval-augmented generation (RAG), yet existing methods typically rely on semantic relevance or surface-level similarity, which are often misaligned with the actual utility of visual evidence for downstream reasoning. We reformulate multimodal evidence selection from an information-theoretic perspective by defining evidence utility as the information gain induced on a model's output distribution. To overcome the intractability of answer-space optimization, we introduce a latent notion of evidence helpfulness and theoretically s","authors_text":"Haofeng Zhang, Weiqing Luo, Xiao Wang, Zhiyuan Yu, Ziyi Huang, Zongye Hu","cross_cats":["cs.AI","cs.CV","cs.IR","cs.LG"],"headline":"Ranking visual evidence by information gain on a latent helpfulness variable matches its answer-space utility in multimodal RAG","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-13T09:54:31Z","title":"Utility-Oriented Visual Evidence Selection for Multimodal Retrieval-Augmented Generation"},"references":{"count":43,"internal_anchors":11,"resolved_work":43,"sample":[{"cited_arxiv_id":"2410.05160","doi":"","is_internal_anchor":true,"ref_index":1,"title":"VLM2Vec: Training Vision-Language Models for Massive Multimodal Embedding Tasks","work_id":"ec376227-cc62-4775-9e03-bed3862151d0","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"Vlm2vec-v2: Advancing multimodal em- bedding for videos, images, and visual documents","work_id":"645b5239-8642-4bd5-a661-7ff075ceb306","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers) , pages=","work_id":"f31acd76-d74b-4edb-aed0-20b9256ab082","year":null},{"cited_arxiv_id":"2407.12580","doi":"","is_internal_anchor":true,"ref_index":4,"title":"E5-V: Universal Embeddings with Multimodal Large Language Models","work_id":"df2e178e-bd96-48f4-8431-51c61fbc63fd","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"GME: Improving Universal Multimodal Retrieval by Multimodal","work_id":"a99a6453-de04-4fcb-8376-b10bf7dd68c7","year":null}],"snapshot_sha256":"69ca431a7ca9f161e76ff13a3e48b127b21d43f61226233ae9f239837e85eda3"},"source":{"id":"2605.13277","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-14T19:06:00.400435Z","id":"79da5243-cbc0-4d46-955a-e9a4fcc5c9ea","model_set":{"reader":"grok-4.3"},"one_line_summary":"Evidence utility is defined as information gain on the model's output distribution, with ranking by gain on a latent helpfulness variable shown equivalent to answer-space utility under mild assumptions, enabling a training-free surrogate framework that outperforms baselines.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Ranking visual evidence by information gain on a latent helpfulness variable matches its answer-space utility in multimodal RAG","strongest_claim":"ranking evidence by information gain on this latent variable is equivalent to answer-space utility","weakest_assumption":"under mild assumptions"}},"verdict_id":"79da5243-cbc0-4d46-955a-e9a4fcc5c9ea"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:9b97ed36ebc82f17b7bae13445181045d6ebf3519311dfe3e25e434523caa16e","target":"record","created_at":"2026-05-18T02:44:49Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"a0eb75b8a0c660ed2a4f81960274c6969fbfa096c82856ae7542a61892f5e7d0","cross_cats_sorted":["cs.AI","cs.CV","cs.IR","cs.LG"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-13T09:54:31Z","title_canon_sha256":"9274f354ba30343f0b53f3f8d3261e368967506e66d00baef791b534caabfefa"},"schema_version":"1.0","source":{"id":"2605.13277","kind":"arxiv","version":1}},"canonical_sha256":"82132fbdcfbc7d1c59ebd2a84fe375c09ce915c88ee618fca9b944316fd2c959","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"82132fbdcfbc7d1c59ebd2a84fe375c09ce915c88ee618fca9b944316fd2c959","first_computed_at":"2026-05-18T02:44:49.228866Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T02:44:49.228866Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"9ivqBOo/Hszp01f7pKiKn3UcoEo1bGmRz1ia9RDJ48C/OK/LFLAM7BgHYTr80eqoC6Y7/BraZHjnNO7er9+gCg==","signature_status":"signed_v1","signed_at":"2026-05-18T02:44:49.229283Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.13277","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:9b97ed36ebc82f17b7bae13445181045d6ebf3519311dfe3e25e434523caa16e","sha256:c5e44329d286823fecef0c1883639137a4b39cae43f44457f5d7d8a92ed19b84"],"state_sha256":"4e265bec40bc7fd11b34a60189fbae588ba0d062f3359ebd07c3d7da50718a25"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"1zMpf4ky2DBC3+qYgAxCTzUYPukQ0J+CNyG79P1/BpIPl4967rHBXwd/+m4UiTZ8bedAnKxIL/JdEBbTk9sGCQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-26T15:20:06.619778Z","bundle_sha256":"1936a106cc8e12607b4fe83fa629fb5f29c7e32edd7f1b7f7a6fb99999d994f6"}}