{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:7PUVV6LBBR5WSNTNH3QOMXX3RS","short_pith_number":"pith:7PUVV6LB","canonical_record":{"source":{"id":"2605.20676","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-20T03:44:06Z","cross_cats_sorted":[],"title_canon_sha256":"0645ba7df823f3a19a2c7b33e18ce8eded965c39d8bc66f82137f2adf73ab5f5","abstract_canon_sha256":"f44bddbfaa4f2cc3142bc0491fc83fdb7c9aa02acc11dc41763693a4db5a38ad"},"schema_version":"1.0"},"canonical_sha256":"fbe95af9610c7b69366d3ee0e65efb8c94e2833f4d5198927cf7c811a2164b60","source":{"kind":"arxiv","id":"2605.20676","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.20676","created_at":"2026-05-21T01:04:48Z"},{"alias_kind":"arxiv_version","alias_value":"2605.20676v1","created_at":"2026-05-21T01:04:48Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.20676","created_at":"2026-05-21T01:04:48Z"},{"alias_kind":"pith_short_12","alias_value":"7PUVV6LBBR5W","created_at":"2026-05-21T01:04:48Z"},{"alias_kind":"pith_short_16","alias_value":"7PUVV6LBBR5WSNTN","created_at":"2026-05-21T01:04:48Z"},{"alias_kind":"pith_short_8","alias_value":"7PUVV6LB","created_at":"2026-05-21T01:04:48Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:7PUVV6LBBR5WSNTNH3QOMXX3RS","target":"record","payload":{"canonical_record":{"source":{"id":"2605.20676","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-20T03:44:06Z","cross_cats_sorted":[],"title_canon_sha256":"0645ba7df823f3a19a2c7b33e18ce8eded965c39d8bc66f82137f2adf73ab5f5","abstract_canon_sha256":"f44bddbfaa4f2cc3142bc0491fc83fdb7c9aa02acc11dc41763693a4db5a38ad"},"schema_version":"1.0"},"canonical_sha256":"fbe95af9610c7b69366d3ee0e65efb8c94e2833f4d5198927cf7c811a2164b60","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-21T01:04:48.746598Z","signature_b64":"g0TJZCJ/vnb7R9B1b3xOll4CQncXk2alGeG4qSYT97k/ehHBcjf535FizoKDMYibtS/BGfZJkCo+8YB00saHAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"fbe95af9610c7b69366d3ee0e65efb8c94e2833f4d5198927cf7c811a2164b60","last_reissued_at":"2026-05-21T01:04:48.746101Z","signature_status":"signed_v1","first_computed_at":"2026-05-21T01:04:48.746101Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.20676","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-21T01:04:48Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"9Tn0NZZ+k8OuKsDRiiu1aApcIe3WTDQv1v2eVWTiuAQj5ZtFWyLyz3KjbO8szC4X1v/TrhmdIlGbL1NCIlrWAQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T12:45:58.659074Z"},"content_sha256":"cbcbb406207c9dde0d41832d08cd0dc4e43320f7ecfbb824f97c036f5c0d2397","schema_version":"1.0","event_id":"sha256:cbcbb406207c9dde0d41832d08cd0dc4e43320f7ecfbb824f97c036f5c0d2397"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:7PUVV6LBBR5WSNTNH3QOMXX3RS","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"VISTAQA: Benchmarking Joint Visual Question Answering and Pixel-Level Evidence","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Krzysztof Czarnecki, Lihong Chen, Marco Pavone, Milan Ganai, Mozhgan Nasr Azadani, Sean Sedwards, Yimu Wang, Yongpeng Zhu","submitted_at":"2026-05-20T03:44:06Z","abstract_excerpt":"Establishing a clear link between model predictions and the visual evidence that supports them is critical for transparency and reliability in multimodal reasoning, yet current multimodal large language model (MLLM) evaluations do not explicitly enforce this alignment. Existing benchmarks assess either textual answer correctness or pixel-level localization in isolation, leaving the coupling of reasoning and grounding an open challenge. We introduce VISTAQA, a comprehensive benchmark for joint evaluation of free-form answer correctness and pixel-level evidence grounding in visual question answe"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.20676","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.20676/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-21T01:04:48Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"KRzNvmyVEWrmTMD/RTtT33CrVZHRaRLyKPy5gNSUIvqF1l/4qAUIAudSayfTInrx4CTI7VDklupM1tQxzTE/Cg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T12:45:58.659826Z"},"content_sha256":"5d5a247c6ad4973bd3850d2fd5fff2e4a12b6f9e64a4a1d205290f59ca383ea2","schema_version":"1.0","event_id":"sha256:5d5a247c6ad4973bd3850d2fd5fff2e4a12b6f9e64a4a1d205290f59ca383ea2"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/7PUVV6LBBR5WSNTNH3QOMXX3RS/bundle.json","state_url":"https://pith.science/pith/7PUVV6LBBR5WSNTNH3QOMXX3RS/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/7PUVV6LBBR5WSNTNH3QOMXX3RS/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-31T12:45:58Z","links":{"resolver":"https://pith.science/pith/7PUVV6LBBR5WSNTNH3QOMXX3RS","bundle":"https://pith.science/pith/7PUVV6LBBR5WSNTNH3QOMXX3RS/bundle.json","state":"https://pith.science/pith/7PUVV6LBBR5WSNTNH3QOMXX3RS/state.json","well_known_bundle":"https://pith.science/.well-known/pith/7PUVV6LBBR5WSNTNH3QOMXX3RS/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:7PUVV6LBBR5WSNTNH3QOMXX3RS","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"f44bddbfaa4f2cc3142bc0491fc83fdb7c9aa02acc11dc41763693a4db5a38ad","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-20T03:44:06Z","title_canon_sha256":"0645ba7df823f3a19a2c7b33e18ce8eded965c39d8bc66f82137f2adf73ab5f5"},"schema_version":"1.0","source":{"id":"2605.20676","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.20676","created_at":"2026-05-21T01:04:48Z"},{"alias_kind":"arxiv_version","alias_value":"2605.20676v1","created_at":"2026-05-21T01:04:48Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.20676","created_at":"2026-05-21T01:04:48Z"},{"alias_kind":"pith_short_12","alias_value":"7PUVV6LBBR5W","created_at":"2026-05-21T01:04:48Z"},{"alias_kind":"pith_short_16","alias_value":"7PUVV6LBBR5WSNTN","created_at":"2026-05-21T01:04:48Z"},{"alias_kind":"pith_short_8","alias_value":"7PUVV6LB","created_at":"2026-05-21T01:04:48Z"}],"graph_snapshots":[{"event_id":"sha256:5d5a247c6ad4973bd3850d2fd5fff2e4a12b6f9e64a4a1d205290f59ca383ea2","target":"graph","created_at":"2026-05-21T01:04:48Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.20676/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Establishing a clear link between model predictions and the visual evidence that supports them is critical for transparency and reliability in multimodal reasoning, yet current multimodal large language model (MLLM) evaluations do not explicitly enforce this alignment. Existing benchmarks assess either textual answer correctness or pixel-level localization in isolation, leaving the coupling of reasoning and grounding an open challenge. We introduce VISTAQA, a comprehensive benchmark for joint evaluation of free-form answer correctness and pixel-level evidence grounding in visual question answe","authors_text":"Krzysztof Czarnecki, Lihong Chen, Marco Pavone, Milan Ganai, Mozhgan Nasr Azadani, Sean Sedwards, Yimu Wang, Yongpeng Zhu","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-20T03:44:06Z","title":"VISTAQA: Benchmarking Joint Visual Question Answering and Pixel-Level Evidence"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.20676","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:cbcbb406207c9dde0d41832d08cd0dc4e43320f7ecfbb824f97c036f5c0d2397","target":"record","created_at":"2026-05-21T01:04:48Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"f44bddbfaa4f2cc3142bc0491fc83fdb7c9aa02acc11dc41763693a4db5a38ad","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-20T03:44:06Z","title_canon_sha256":"0645ba7df823f3a19a2c7b33e18ce8eded965c39d8bc66f82137f2adf73ab5f5"},"schema_version":"1.0","source":{"id":"2605.20676","kind":"arxiv","version":1}},"canonical_sha256":"fbe95af9610c7b69366d3ee0e65efb8c94e2833f4d5198927cf7c811a2164b60","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"fbe95af9610c7b69366d3ee0e65efb8c94e2833f4d5198927cf7c811a2164b60","first_computed_at":"2026-05-21T01:04:48.746101Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-21T01:04:48.746101Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"g0TJZCJ/vnb7R9B1b3xOll4CQncXk2alGeG4qSYT97k/ehHBcjf535FizoKDMYibtS/BGfZJkCo+8YB00saHAw==","signature_status":"signed_v1","signed_at":"2026-05-21T01:04:48.746598Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.20676","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:cbcbb406207c9dde0d41832d08cd0dc4e43320f7ecfbb824f97c036f5c0d2397","sha256:5d5a247c6ad4973bd3850d2fd5fff2e4a12b6f9e64a4a1d205290f59ca383ea2"],"state_sha256":"278379d92b9479cd1c95d4f481d066af19d0c2dbcdbe55f1f4f0bdcd1b0dfe9f"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"s4hRZTTFK0yjFtMfgXd5Z9meVZ2gQRZRabBh9RlC//fXnpzLuWEzR8oZoVydLoSHZVFPTO/E/AxG57SBRtMZDw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-31T12:45:58.663923Z","bundle_sha256":"7c2d3f8146ea838788e821e2ea12d01b70ad1a37c55c4ab2f0c6539154587e15"}}