{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:RCMEQW5PULVUKL6M3M4T745CSP","short_pith_number":"pith:RCMEQW5P","schema_version":"1.0","canonical_sha256":"8898485bafa2eb452fccdb393ff3a293d37a281f4d60c33d591287d43317da8f","source":{"kind":"arxiv","id":"2607.00159","version":1},"attestation_state":"computed","paper":{"title":"Identifying and Resolving Pitfalls of Knowledge-Based VQA Benchmarks: Auditing, Repairing, and Augmenting","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CV","cs.IR","cs.MM"],"primary_cat":"cs.CL","authors_text":"Charles V. Stewart, Qian Ma, Qiong Wu, S M Rayeed, Yao Ma","submitted_at":"2026-06-30T20:35:30Z","abstract_excerpt":"Knowledge-Based Visual Question Answering (KB-VQA) aims to evaluate whether Visual Language Models (VLMs) can retrieve, ground, and reason over external structured knowledge beyond visual evidence. In practice, answer accuracy is widely adopted as the primary evaluation metric, implicitly treating correctness as a proxy for knowledge-grounded reasoning. However, for existing KB-VQA benchmarks, this proxy relies on critical assumptions that are often overlooked and rendered unreliable by benchmark issues: annotated answer must be derivable from the associated knowledge base, question must be we"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2607.00159","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-06-30T20:35:30Z","cross_cats_sorted":["cs.CV","cs.IR","cs.MM"],"title_canon_sha256":"fdd4f87be42ee4c7ebb56aaccad5a6051eed4d587a373b2697edaaf8e01ad0b4","abstract_canon_sha256":"88a1e150fe2b76b3fd9eefe7cb87c4a58c0dc77eccb72a7fa9804e6ee9c57689"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-02T00:18:37.059411Z","signature_b64":"o+t5c0hwTgssGAWSL6eZLB5aTXq2BY4RtpigSjk7TCLV1M4xRS+ms59VlAN6fW31NzoLuilWXMkL6vt/26BnDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"8898485bafa2eb452fccdb393ff3a293d37a281f4d60c33d591287d43317da8f","last_reissued_at":"2026-07-02T00:18:37.058764Z","signature_status":"signed_v1","first_computed_at":"2026-07-02T00:18:37.058764Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Identifying and Resolving Pitfalls of Knowledge-Based VQA Benchmarks: Auditing, Repairing, and Augmenting","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CV","cs.IR","cs.MM"],"primary_cat":"cs.CL","authors_text":"Charles V. Stewart, Qian Ma, Qiong Wu, S M Rayeed, Yao Ma","submitted_at":"2026-06-30T20:35:30Z","abstract_excerpt":"Knowledge-Based Visual Question Answering (KB-VQA) aims to evaluate whether Visual Language Models (VLMs) can retrieve, ground, and reason over external structured knowledge beyond visual evidence. In practice, answer accuracy is widely adopted as the primary evaluation metric, implicitly treating correctness as a proxy for knowledge-grounded reasoning. However, for existing KB-VQA benchmarks, this proxy relies on critical assumptions that are often overlooked and rendered unreliable by benchmark issues: annotated answer must be derivable from the associated knowledge base, question must be we"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2607.00159","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2607.00159/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2607.00159","created_at":"2026-07-02T00:18:37.058852+00:00"},{"alias_kind":"arxiv_version","alias_value":"2607.00159v1","created_at":"2026-07-02T00:18:37.058852+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2607.00159","created_at":"2026-07-02T00:18:37.058852+00:00"},{"alias_kind":"pith_short_12","alias_value":"RCMEQW5PULVU","created_at":"2026-07-02T00:18:37.058852+00:00"},{"alias_kind":"pith_short_16","alias_value":"RCMEQW5PULVUKL6M","created_at":"2026-07-02T00:18:37.058852+00:00"},{"alias_kind":"pith_short_8","alias_value":"RCMEQW5P","created_at":"2026-07-02T00:18:37.058852+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/RCMEQW5PULVUKL6M3M4T745CSP","json":"https://pith.science/pith/RCMEQW5PULVUKL6M3M4T745CSP.json","graph_json":"https://pith.science/api/pith-number/RCMEQW5PULVUKL6M3M4T745CSP/graph.json","events_json":"https://pith.science/api/pith-number/RCMEQW5PULVUKL6M3M4T745CSP/events.json","paper":"https://pith.science/paper/RCMEQW5P"},"agent_actions":{"view_html":"https://pith.science/pith/RCMEQW5PULVUKL6M3M4T745CSP","download_json":"https://pith.science/pith/RCMEQW5PULVUKL6M3M4T745CSP.json","view_paper":"https://pith.science/paper/RCMEQW5P","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2607.00159&json=true","fetch_graph":"https://pith.science/api/pith-number/RCMEQW5PULVUKL6M3M4T745CSP/graph.json","fetch_events":"https://pith.science/api/pith-number/RCMEQW5PULVUKL6M3M4T745CSP/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/RCMEQW5PULVUKL6M3M4T745CSP/action/timestamp_anchor","attest_storage":"https://pith.science/pith/RCMEQW5PULVUKL6M3M4T745CSP/action/storage_attestation","attest_author":"https://pith.science/pith/RCMEQW5PULVUKL6M3M4T745CSP/action/author_attestation","sign_citation":"https://pith.science/pith/RCMEQW5PULVUKL6M3M4T745CSP/action/citation_signature","submit_replication":"https://pith.science/pith/RCMEQW5PULVUKL6M3M4T745CSP/action/replication_record"}},"created_at":"2026-07-02T00:18:37.058852+00:00","updated_at":"2026-07-02T00:18:37.058852+00:00"}