{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:QIJS7POPXR6RYWPL2KUE7Y3VYC","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"a0eb75b8a0c660ed2a4f81960274c6969fbfa096c82856ae7542a61892f5e7d0","cross_cats_sorted":["cs.AI","cs.CV","cs.IR","cs.LG"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-13T09:54:31Z","title_canon_sha256":"9274f354ba30343f0b53f3f8d3261e368967506e66d00baef791b534caabfefa"},"schema_version":"1.0","source":{"id":"2605.13277","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.13277","created_at":"2026-05-18T02:44:49Z"},{"alias_kind":"arxiv_version","alias_value":"2605.13277v1","created_at":"2026-05-18T02:44:49Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.13277","created_at":"2026-05-18T02:44:49Z"},{"alias_kind":"pith_short_12","alias_value":"QIJS7POPXR6R","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"QIJS7POPXR6RYWPL","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"QIJS7POP","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:c5e44329d286823fecef0c1883639137a4b39cae43f44457f5d7d8a92ed19b84","target":"graph","created_at":"2026-05-18T02:44:49Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"ranking evidence by information gain on this latent variable is equivalent to answer-space utility"},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"under mild assumptions"},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"Evidence utility is defined as information gain on the model's output distribution, with ranking by gain on a latent helpfulness variable shown equivalent to answer-space utility under mild assumptions, enabling a training-free surrogate framework that outperforms baselines."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Ranking visual evidence by information gain on a latent helpfulness variable matches its answer-space utility in multimodal RAG"}],"snapshot_sha256":"43252e453d2c92aee16fccff049ee089523766acbfc4bfe88e0dd91c033d5d93"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Visual evidence selection is a critical component of multimodal retrieval-augmented generation (RAG), yet existing methods typically rely on semantic relevance or surface-level similarity, which are often misaligned with the actual utility of visual evidence for downstream reasoning. We reformulate multimodal evidence selection from an information-theoretic perspective by defining evidence utility as the information gain induced on a model's output distribution. To overcome the intractability of answer-space optimization, we introduce a latent notion of evidence helpfulness and theoretically s","authors_text":"Haofeng Zhang, Weiqing Luo, Xiao Wang, Zhiyuan Yu, Ziyi Huang, Zongye Hu","cross_cats":["cs.AI","cs.CV","cs.IR","cs.LG"],"headline":"Ranking visual evidence by information gain on a latent helpfulness variable matches its answer-space utility in multimodal RAG","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-13T09:54:31Z","title":"Utility-Oriented Visual Evidence Selection for Multimodal Retrieval-Augmented Generation"},"references":{"count":43,"internal_anchors":11,"resolved_work":43,"sample":[{"cited_arxiv_id":"2410.05160","doi":"","is_internal_anchor":true,"ref_index":1,"title":"VLM2Vec: Training Vision-Language Models for Massive Multimodal Embedding Tasks","work_id":"ec376227-cc62-4775-9e03-bed3862151d0","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"Vlm2vec-v2: Advancing multimodal em- bedding for videos, images, and visual documents","work_id":"645b5239-8642-4bd5-a661-7ff075ceb306","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers) , pages=","work_id":"f31acd76-d74b-4edb-aed0-20b9256ab082","year":null},{"cited_arxiv_id":"2407.12580","doi":"","is_internal_anchor":true,"ref_index":4,"title":"E5-V: Universal Embeddings with Multimodal Large Language Models","work_id":"df2e178e-bd96-48f4-8431-51c61fbc63fd","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"GME: Improving Universal Multimodal Retrieval by Multimodal","work_id":"a99a6453-de04-4fcb-8376-b10bf7dd68c7","year":null}],"snapshot_sha256":"69ca431a7ca9f161e76ff13a3e48b127b21d43f61226233ae9f239837e85eda3"},"source":{"id":"2605.13277","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-14T19:06:00.400435Z","id":"79da5243-cbc0-4d46-955a-e9a4fcc5c9ea","model_set":{"reader":"grok-4.3"},"one_line_summary":"Evidence utility is defined as information gain on the model's output distribution, with ranking by gain on a latent helpfulness variable shown equivalent to answer-space utility under mild assumptions, enabling a training-free surrogate framework that outperforms baselines.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Ranking visual evidence by information gain on a latent helpfulness variable matches its answer-space utility in multimodal RAG","strongest_claim":"ranking evidence by information gain on this latent variable is equivalent to answer-space utility","weakest_assumption":"under mild assumptions"}},"verdict_id":"79da5243-cbc0-4d46-955a-e9a4fcc5c9ea"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:9b97ed36ebc82f17b7bae13445181045d6ebf3519311dfe3e25e434523caa16e","target":"record","created_at":"2026-05-18T02:44:49Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"a0eb75b8a0c660ed2a4f81960274c6969fbfa096c82856ae7542a61892f5e7d0","cross_cats_sorted":["cs.AI","cs.CV","cs.IR","cs.LG"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-13T09:54:31Z","title_canon_sha256":"9274f354ba30343f0b53f3f8d3261e368967506e66d00baef791b534caabfefa"},"schema_version":"1.0","source":{"id":"2605.13277","kind":"arxiv","version":1}},"canonical_sha256":"82132fbdcfbc7d1c59ebd2a84fe375c09ce915c88ee618fca9b944316fd2c959","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"82132fbdcfbc7d1c59ebd2a84fe375c09ce915c88ee618fca9b944316fd2c959","first_computed_at":"2026-05-18T02:44:49.228866Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T02:44:49.228866Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"9ivqBOo/Hszp01f7pKiKn3UcoEo1bGmRz1ia9RDJ48C/OK/LFLAM7BgHYTr80eqoC6Y7/BraZHjnNO7er9+gCg==","signature_status":"signed_v1","signed_at":"2026-05-18T02:44:49.229283Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.13277","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:9b97ed36ebc82f17b7bae13445181045d6ebf3519311dfe3e25e434523caa16e","sha256:c5e44329d286823fecef0c1883639137a4b39cae43f44457f5d7d8a92ed19b84"],"state_sha256":"4e265bec40bc7fd11b34a60189fbae588ba0d062f3359ebd07c3d7da50718a25"}