{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:GQ3K7COZB6FCUBKXM57QUKKDGQ","short_pith_number":"pith:GQ3K7COZ","canonical_record":{"source":{"id":"2605.01733","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-05-03T06:09:04Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"3ff95be860c28c40322b7bf26b65e24c59f0bd7facc4dec79c1051cd5999e4c9","abstract_canon_sha256":"a4000909f9852b0a7fd1b5d98d26b4e96c721160b63e30494fc79a09fe385781"},"schema_version":"1.0"},"canonical_sha256":"3436af89d90f8a2a0557677f0a2943343fd1ef2d654eac0a4622ecb755f33a4a","source":{"kind":"arxiv","id":"2605.01733","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.01733","created_at":"2026-05-20T01:05:15Z"},{"alias_kind":"arxiv_version","alias_value":"2605.01733v2","created_at":"2026-05-20T01:05:15Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.01733","created_at":"2026-05-20T01:05:15Z"},{"alias_kind":"pith_short_12","alias_value":"GQ3K7COZB6FC","created_at":"2026-05-20T01:05:15Z"},{"alias_kind":"pith_short_16","alias_value":"GQ3K7COZB6FCUBKX","created_at":"2026-05-20T01:05:15Z"},{"alias_kind":"pith_short_8","alias_value":"GQ3K7COZ","created_at":"2026-05-20T01:05:15Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:GQ3K7COZB6FCUBKXM57QUKKDGQ","target":"record","payload":{"canonical_record":{"source":{"id":"2605.01733","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-05-03T06:09:04Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"3ff95be860c28c40322b7bf26b65e24c59f0bd7facc4dec79c1051cd5999e4c9","abstract_canon_sha256":"a4000909f9852b0a7fd1b5d98d26b4e96c721160b63e30494fc79a09fe385781"},"schema_version":"1.0"},"canonical_sha256":"3436af89d90f8a2a0557677f0a2943343fd1ef2d654eac0a4622ecb755f33a4a","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T01:05:15.138504Z","signature_b64":"/HKPqBJ11M0EXi4AKAKXwb8LhJnS4aTawdZWGqwETCZeDPcuhV2Fi0Fpk0bgaJhgr97RLKQeApaexYbudxyPBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"3436af89d90f8a2a0557677f0a2943343fd1ef2d654eac0a4622ecb755f33a4a","last_reissued_at":"2026-05-20T01:05:15.137695Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T01:05:15.137695Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.01733","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T01:05:15Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"V7yTI7cNum207ksEuGf+gZ35wp30S0Xp0sf8cxbRtWqSzSHKdbUq9UB3zGV+IhvvF0+yFUAEugNaW4qNEnKkCQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-09T00:29:31.368626Z"},"content_sha256":"b183e44d7d74b5733767b2e1b38e80141c659ef32924ffe889cb68ddd4f6ba37","schema_version":"1.0","event_id":"sha256:b183e44d7d74b5733767b2e1b38e80141c659ef32924ffe889cb68ddd4f6ba37"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:GQ3K7COZB6FCUBKXM57QUKKDGQ","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"GEASS: Gated Evidence-Adaptive Selective Caption Trust for Vision-Language Models","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"GEASS lets vision-language models decide per query how much of a self-generated caption to trust, cutting hallucinations.","cross_cats":["cs.AI"],"primary_cat":"cs.CV","authors_text":"Jiashen Ding, Shuoyang Zhang, Zeshang Li","submitted_at":"2026-05-03T06:09:04Z","abstract_excerpt":"Vision-Language Models (VLMs) excel at grounded reasoning but remain prone to object hallucination. Recent work treats self-generated captions as a uniformly positive resource, yet we find that naively embedding one can degrade rather than help--dropping Qwen2.5-VL-3B accuracy on HallusionBench by nearly 10 points. Two structural properties explain this. First, captions anchor not only the model's final answer but also its reasoning trajectory and lexical choices. Second, caption errors are asymmetric: omissions vastly outnumber fabrications, yet each fabrication carries a much larger per-inst"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Experiments on POPE and HallusionBench across four VLMs show that GEASS consistently improves over vanilla inference and contrastive decoding, with only two extra forward passes per query.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That the combination of clean-path confidence and entropy reduction reliably identifies when and how much caption content is useful without discarding beneficial information or introducing new selection bias on a per-query basis.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"GEASS selectively gates and weights self-generated captions using confidence and entropy to reduce object hallucinations in VLMs, outperforming vanilla inference and contrastive decoding on POPE and HallusionBench.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"GEASS lets vision-language models decide per query how much of a self-generated caption to trust, cutting hallucinations.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"5814943da82c7c1de299bc3fc83449eaea971af8348362342ecc9475247726c1"},"source":{"id":"2605.01733","kind":"arxiv","version":2},"verdict":{"id":"b9fc24ee-b5bf-4066-8897-ed7526dd4c7b","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-10T15:30:55.142595Z","strongest_claim":"Experiments on POPE and HallusionBench across four VLMs show that GEASS consistently improves over vanilla inference and contrastive decoding, with only two extra forward passes per query.","one_line_summary":"GEASS selectively gates and weights self-generated captions using confidence and entropy to reduce object hallucinations in VLMs, outperforming vanilla inference and contrastive decoding on POPE and HallusionBench.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That the combination of clean-path confidence and entropy reduction reliably identifies when and how much caption content is useful without discarding beneficial information or introducing new selection bias on a per-query basis.","pith_extraction_headline":"GEASS lets vision-language models decide per query how much of a self-generated caption to trust, cutting hallucinations."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.01733/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"doi_compliance","ran_at":"2026-05-19T17:00:50.194655Z","status":"completed","version":"1.0.0","findings_count":0}],"snapshot_sha256":"cfcda64ef801a19bd91f01db784b8f39c6f8fb6fc1e720cfbc4c020bafe87a73"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"b9fc24ee-b5bf-4066-8897-ed7526dd4c7b"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T01:05:15Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"5nTz8a/Ik+43cRhVD0fKV7XU9AOCSRkJ5vmnI6TvovnZpeW+uuoqzybq7gc/doOwUXmx2jRvD49T3k9nDGSEBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-09T00:29:31.369088Z"},"content_sha256":"77d3fcc69a87938d32835cc8c921af7bbeca4403ba83c5ebd5ac52a3dd88b8f9","schema_version":"1.0","event_id":"sha256:77d3fcc69a87938d32835cc8c921af7bbeca4403ba83c5ebd5ac52a3dd88b8f9"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/GQ3K7COZB6FCUBKXM57QUKKDGQ/bundle.json","state_url":"https://pith.science/pith/GQ3K7COZB6FCUBKXM57QUKKDGQ/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/GQ3K7COZB6FCUBKXM57QUKKDGQ/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-09T00:29:31Z","links":{"resolver":"https://pith.science/pith/GQ3K7COZB6FCUBKXM57QUKKDGQ","bundle":"https://pith.science/pith/GQ3K7COZB6FCUBKXM57QUKKDGQ/bundle.json","state":"https://pith.science/pith/GQ3K7COZB6FCUBKXM57QUKKDGQ/state.json","well_known_bundle":"https://pith.science/.well-known/pith/GQ3K7COZB6FCUBKXM57QUKKDGQ/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:GQ3K7COZB6FCUBKXM57QUKKDGQ","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"a4000909f9852b0a7fd1b5d98d26b4e96c721160b63e30494fc79a09fe385781","cross_cats_sorted":["cs.AI"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-05-03T06:09:04Z","title_canon_sha256":"3ff95be860c28c40322b7bf26b65e24c59f0bd7facc4dec79c1051cd5999e4c9"},"schema_version":"1.0","source":{"id":"2605.01733","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.01733","created_at":"2026-05-20T01:05:15Z"},{"alias_kind":"arxiv_version","alias_value":"2605.01733v2","created_at":"2026-05-20T01:05:15Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.01733","created_at":"2026-05-20T01:05:15Z"},{"alias_kind":"pith_short_12","alias_value":"GQ3K7COZB6FC","created_at":"2026-05-20T01:05:15Z"},{"alias_kind":"pith_short_16","alias_value":"GQ3K7COZB6FCUBKX","created_at":"2026-05-20T01:05:15Z"},{"alias_kind":"pith_short_8","alias_value":"GQ3K7COZ","created_at":"2026-05-20T01:05:15Z"}],"graph_snapshots":[{"event_id":"sha256:77d3fcc69a87938d32835cc8c921af7bbeca4403ba83c5ebd5ac52a3dd88b8f9","target":"graph","created_at":"2026-05-20T01:05:15Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Experiments on POPE and HallusionBench across four VLMs show that GEASS consistently improves over vanilla inference and contrastive decoding, with only two extra forward passes per query."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That the combination of clean-path confidence and entropy reduction reliably identifies when and how much caption content is useful without discarding beneficial information or introducing new selection bias on a per-query basis."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"GEASS selectively gates and weights self-generated captions using confidence and entropy to reduce object hallucinations in VLMs, outperforming vanilla inference and contrastive decoding on POPE and HallusionBench."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"GEASS lets vision-language models decide per query how much of a self-generated caption to trust, cutting hallucinations."}],"snapshot_sha256":"5814943da82c7c1de299bc3fc83449eaea971af8348362342ecc9475247726c1"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[{"findings_count":0,"name":"doi_compliance","ran_at":"2026-05-19T17:00:50.194655Z","status":"completed","version":"1.0.0"}],"endpoint":"/pith/2605.01733/integrity.json","findings":[],"snapshot_sha256":"cfcda64ef801a19bd91f01db784b8f39c6f8fb6fc1e720cfbc4c020bafe87a73","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Vision-Language Models (VLMs) excel at grounded reasoning but remain prone to object hallucination. Recent work treats self-generated captions as a uniformly positive resource, yet we find that naively embedding one can degrade rather than help--dropping Qwen2.5-VL-3B accuracy on HallusionBench by nearly 10 points. Two structural properties explain this. First, captions anchor not only the model's final answer but also its reasoning trajectory and lexical choices. Second, caption errors are asymmetric: omissions vastly outnumber fabrications, yet each fabrication carries a much larger per-inst","authors_text":"Jiashen Ding, Shuoyang Zhang, Zeshang Li","cross_cats":["cs.AI"],"headline":"GEASS lets vision-language models decide per query how much of a self-generated caption to trust, cutting hallucinations.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-05-03T06:09:04Z","title":"GEASS: Gated Evidence-Adaptive Selective Caption Trust for Vision-Language Models"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.01733","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-10T15:30:55.142595Z","id":"b9fc24ee-b5bf-4066-8897-ed7526dd4c7b","model_set":{"reader":"grok-4.3"},"one_line_summary":"GEASS selectively gates and weights self-generated captions using confidence and entropy to reduce object hallucinations in VLMs, outperforming vanilla inference and contrastive decoding on POPE and HallusionBench.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"GEASS lets vision-language models decide per query how much of a self-generated caption to trust, cutting hallucinations.","strongest_claim":"Experiments on POPE and HallusionBench across four VLMs show that GEASS consistently improves over vanilla inference and contrastive decoding, with only two extra forward passes per query.","weakest_assumption":"That the combination of clean-path confidence and entropy reduction reliably identifies when and how much caption content is useful without discarding beneficial information or introducing new selection bias on a per-query basis."}},"verdict_id":"b9fc24ee-b5bf-4066-8897-ed7526dd4c7b"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:b183e44d7d74b5733767b2e1b38e80141c659ef32924ffe889cb68ddd4f6ba37","target":"record","created_at":"2026-05-20T01:05:15Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"a4000909f9852b0a7fd1b5d98d26b4e96c721160b63e30494fc79a09fe385781","cross_cats_sorted":["cs.AI"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-05-03T06:09:04Z","title_canon_sha256":"3ff95be860c28c40322b7bf26b65e24c59f0bd7facc4dec79c1051cd5999e4c9"},"schema_version":"1.0","source":{"id":"2605.01733","kind":"arxiv","version":2}},"canonical_sha256":"3436af89d90f8a2a0557677f0a2943343fd1ef2d654eac0a4622ecb755f33a4a","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"3436af89d90f8a2a0557677f0a2943343fd1ef2d654eac0a4622ecb755f33a4a","first_computed_at":"2026-05-20T01:05:15.137695Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-20T01:05:15.137695Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"/HKPqBJ11M0EXi4AKAKXwb8LhJnS4aTawdZWGqwETCZeDPcuhV2Fi0Fpk0bgaJhgr97RLKQeApaexYbudxyPBg==","signature_status":"signed_v1","signed_at":"2026-05-20T01:05:15.138504Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.01733","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:b183e44d7d74b5733767b2e1b38e80141c659ef32924ffe889cb68ddd4f6ba37","sha256:77d3fcc69a87938d32835cc8c921af7bbeca4403ba83c5ebd5ac52a3dd88b8f9"],"state_sha256":"74a95300072796c1c6f6e06cbd3eaf8c1b89369549e20afaeae4a7f27ffc98d0"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"vadzT5EEWhkDJkXGELynbLj0iB7NjK3lKdaRFeN3LuE8/LaHsH12OJ2f/gEDXDX2gFJMQDLxRlRPxm+KP9WkAg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-09T00:29:31.371380Z","bundle_sha256":"e63dd00bd9deb20a5a5c80d09e5d3cf8ff7c169f3710b86ea65830c61ba37e2f"}}