{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2023:WN5NGGKGWZCNYQJCME6FZ32YDZ","short_pith_number":"pith:WN5NGGKG","canonical_record":{"source":{"id":"2305.10415","kind":"arxiv","version":6},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2023-05-17T17:50:16Z","cross_cats_sorted":[],"title_canon_sha256":"d54473c845024f3af2c48d2eec48eff4f4ba48a71417a05da866311e26db8ccc","abstract_canon_sha256":"814d4467633f689e887d433655429ab04ad7e5dfc245da5ae3c8e4e1774645c4"},"schema_version":"1.0"},"canonical_sha256":"b37ad31946b644dc4122613c5cef581e62ae96a3acbbf656ca39fc598d7a9411","source":{"kind":"arxiv","id":"2305.10415","version":6},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2305.10415","created_at":"2026-05-17T23:38:49Z"},{"alias_kind":"arxiv_version","alias_value":"2305.10415v6","created_at":"2026-05-17T23:38:49Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2305.10415","created_at":"2026-05-17T23:38:49Z"},{"alias_kind":"pith_short_12","alias_value":"WN5NGGKGWZCN","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"WN5NGGKGWZCNYQJC","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"WN5NGGKG","created_at":"2026-05-18T12:33:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2023:WN5NGGKGWZCNYQJCME6FZ32YDZ","target":"record","payload":{"canonical_record":{"source":{"id":"2305.10415","kind":"arxiv","version":6},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2023-05-17T17:50:16Z","cross_cats_sorted":[],"title_canon_sha256":"d54473c845024f3af2c48d2eec48eff4f4ba48a71417a05da866311e26db8ccc","abstract_canon_sha256":"814d4467633f689e887d433655429ab04ad7e5dfc245da5ae3c8e4e1774645c4"},"schema_version":"1.0"},"canonical_sha256":"b37ad31946b644dc4122613c5cef581e62ae96a3acbbf656ca39fc598d7a9411","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:38:49.837041Z","signature_b64":"fC4oQnDROrN/3WqgOnXZmtRbiq8Np8aknZqVctUeQ3ZsAVGJcLrIUaQdmSdqVhrv+I45jKrcorF5eBrG+S7/BQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"b37ad31946b644dc4122613c5cef581e62ae96a3acbbf656ca39fc598d7a9411","last_reissued_at":"2026-05-17T23:38:49.836419Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:38:49.836419Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2305.10415","source_version":6,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:49Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Rcp/BknUzAZQmdbVa2hWW9oendglyoUuI/aKDSOhlt8n0WSE+d3/AFNSKtlqUFnmTFG/UjoiAtcFrv/v7o0jAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T02:14:19.115702Z"},"content_sha256":"5f7e0f2e86ac17aedec0e095a16212b88e9bb830c34e7560e64eb7e2896aff98","schema_version":"1.0","event_id":"sha256:5f7e0f2e86ac17aedec0e095a16212b88e9bb830c34e7560e64eb7e2896aff98"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2023:WN5NGGKGWZCNYQJCME6FZ32YDZ","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"PMC-VQA: Visual Instruction Tuning for Medical Visual Question Answering","license":"http://creativecommons.org/licenses/by/4.0/","headline":"A generative model trained on a 227k-pair medical VQA dataset from literature outperforms prior systems on clinical benchmarks after fine-tuning.","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Chaoyi Wu, Weidi Xie, Weixiong Lin, Xiaoman Zhang, Yanfeng Wang, Ya Zhang, Ziheng Zhao","submitted_at":"2023-05-17T17:50:16Z","abstract_excerpt":"Medical Visual Question Answering (MedVQA) presents a significant opportunity to enhance diagnostic accuracy and healthcare delivery by leveraging artificial intelligence to interpret and answer questions based on medical images. In this study, we reframe the problem of MedVQA as a generation task that naturally follows the human-machine interaction and propose a generative-based model for medical visual understanding by aligning visual information from a pre-trained vision encoder with a large language model. We establish a scalable pipeline to construct a large-scale medical visual question-"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"We train the proposed model on PMC-VQA and then fine-tune it on multiple public benchmarks, e.g., VQA-RAD, SLAKE, and Image-Clef-2019, significantly outperforming existing MedVQA models in generating relevant, accurate free-form answers.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The PMC-VQA dataset constructed from literature sources provides representative coverage of real clinical images and questions without systematic biases from publication practices or selection effects.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"PMC-VQA dataset and MedVInT model achieve better generative performance on medical VQA benchmarks by visual instruction tuning on a newly constructed large-scale dataset.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"A generative model trained on a 227k-pair medical VQA dataset from literature outperforms prior systems on clinical benchmarks after fine-tuning.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"68cdf44249ac1c398bdd514dc9fe0028613f23bb9ec9df43bb6d46cd12503672"},"source":{"id":"2305.10415","kind":"arxiv","version":6},"verdict":{"id":"1281377d-2e49-4405-ba60-536e1872865e","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-15T23:04:29.314082Z","strongest_claim":"We train the proposed model on PMC-VQA and then fine-tune it on multiple public benchmarks, e.g., VQA-RAD, SLAKE, and Image-Clef-2019, significantly outperforming existing MedVQA models in generating relevant, accurate free-form answers.","one_line_summary":"PMC-VQA dataset and MedVInT model achieve better generative performance on medical VQA benchmarks by visual instruction tuning on a newly constructed large-scale dataset.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The PMC-VQA dataset constructed from literature sources provides representative coverage of real clinical images and questions without systematic biases from publication practices or selection effects.","pith_extraction_headline":"A generative model trained on a 227k-pair medical VQA dataset from literature outperforms prior systems on clinical benchmarks after fine-tuning."},"references":{"count":64,"sample":[{"doi":"","year":2022,"title":"Flamingo: a visual language model for few-shot learning","work_id":"40995f15-58e1-4bdb-8885-4ad729de9a28","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2022,"title":"Flamingo: a visual language model for few-shot learning","work_id":"597da3a9-ab88-4b28-af45-6a03d98fe19d","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2022,"title":"The medical segmentation decathlon.Nature Communications, 13(1):4128, 2022","work_id":"e1ca1c39-044b-48fe-adb2-e00047529c1f","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2023,"title":"Anas Awadalla, Irena Gao, Joshua Gardner, Jack Hessel, Yusuf Hanafy, Wanrong Zhu, Kalyani Marathe, Yonatan Bitton, Samir Gadre, Jenia Jitsev, et al. Openflamingo, 2023","work_id":"8f318ea8-ad3c-4346-90e6-758be1279df6","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2021,"title":"Artificial intelligence in healthcare: transforming the practice of medicine.Future healthcare journal, 8(2):e188–e194, 2021","work_id":"6d717124-ffbe-4a1b-a257-0d7c77516cf8","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":64,"snapshot_sha256":"c5af3927d5cd4d741314f5e228d044ef2666dee4c01cae43cbb08c6ee9d88fb8","internal_anchors":9},"formal_canon":{"evidence_count":2,"snapshot_sha256":"9f3a2619215f7d780091b241301df0b6e5756c2f4b1fa911559822a7af013136"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"1281377d-2e49-4405-ba60-536e1872865e"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:49Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"xRLQJ+jCZFDSJA0HwgIdGslOkJz1qpDFgLzbmxhwJMUdg2EGNO6QYCnoWbTh7Xdxc71YqIriQvdPkMSF1WHQBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T02:14:19.116584Z"},"content_sha256":"af38826f7285d1d25ded58485ef87c297c90369cd46c6d6266027e2008597502","schema_version":"1.0","event_id":"sha256:af38826f7285d1d25ded58485ef87c297c90369cd46c6d6266027e2008597502"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/WN5NGGKGWZCNYQJCME6FZ32YDZ/bundle.json","state_url":"https://pith.science/pith/WN5NGGKGWZCNYQJCME6FZ32YDZ/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/WN5NGGKGWZCNYQJCME6FZ32YDZ/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-31T02:14:19Z","links":{"resolver":"https://pith.science/pith/WN5NGGKGWZCNYQJCME6FZ32YDZ","bundle":"https://pith.science/pith/WN5NGGKGWZCNYQJCME6FZ32YDZ/bundle.json","state":"https://pith.science/pith/WN5NGGKGWZCNYQJCME6FZ32YDZ/state.json","well_known_bundle":"https://pith.science/.well-known/pith/WN5NGGKGWZCNYQJCME6FZ32YDZ/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2023:WN5NGGKGWZCNYQJCME6FZ32YDZ","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"814d4467633f689e887d433655429ab04ad7e5dfc245da5ae3c8e4e1774645c4","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2023-05-17T17:50:16Z","title_canon_sha256":"d54473c845024f3af2c48d2eec48eff4f4ba48a71417a05da866311e26db8ccc"},"schema_version":"1.0","source":{"id":"2305.10415","kind":"arxiv","version":6}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2305.10415","created_at":"2026-05-17T23:38:49Z"},{"alias_kind":"arxiv_version","alias_value":"2305.10415v6","created_at":"2026-05-17T23:38:49Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2305.10415","created_at":"2026-05-17T23:38:49Z"},{"alias_kind":"pith_short_12","alias_value":"WN5NGGKGWZCN","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"WN5NGGKGWZCNYQJC","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"WN5NGGKG","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:af38826f7285d1d25ded58485ef87c297c90369cd46c6d6266027e2008597502","target":"graph","created_at":"2026-05-17T23:38:49Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"We train the proposed model on PMC-VQA and then fine-tune it on multiple public benchmarks, e.g., VQA-RAD, SLAKE, and Image-Clef-2019, significantly outperforming existing MedVQA models in generating relevant, accurate free-form answers."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The PMC-VQA dataset constructed from literature sources provides representative coverage of real clinical images and questions without systematic biases from publication practices or selection effects."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"PMC-VQA dataset and MedVInT model achieve better generative performance on medical VQA benchmarks by visual instruction tuning on a newly constructed large-scale dataset."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"A generative model trained on a 227k-pair medical VQA dataset from literature outperforms prior systems on clinical benchmarks after fine-tuning."}],"snapshot_sha256":"68cdf44249ac1c398bdd514dc9fe0028613f23bb9ec9df43bb6d46cd12503672"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"9f3a2619215f7d780091b241301df0b6e5756c2f4b1fa911559822a7af013136"},"paper":{"abstract_excerpt":"Medical Visual Question Answering (MedVQA) presents a significant opportunity to enhance diagnostic accuracy and healthcare delivery by leveraging artificial intelligence to interpret and answer questions based on medical images. In this study, we reframe the problem of MedVQA as a generation task that naturally follows the human-machine interaction and propose a generative-based model for medical visual understanding by aligning visual information from a pre-trained vision encoder with a large language model. We establish a scalable pipeline to construct a large-scale medical visual question-","authors_text":"Chaoyi Wu, Weidi Xie, Weixiong Lin, Xiaoman Zhang, Yanfeng Wang, Ya Zhang, Ziheng Zhao","cross_cats":[],"headline":"A generative model trained on a 227k-pair medical VQA dataset from literature outperforms prior systems on clinical benchmarks after fine-tuning.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2023-05-17T17:50:16Z","title":"PMC-VQA: Visual Instruction Tuning for Medical Visual Question Answering"},"references":{"count":64,"internal_anchors":9,"resolved_work":64,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"Flamingo: a visual language model for few-shot learning","work_id":"40995f15-58e1-4bdb-8885-4ad729de9a28","year":2022},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"Flamingo: a visual language model for few-shot learning","work_id":"597da3a9-ab88-4b28-af45-6a03d98fe19d","year":2022},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"The medical segmentation decathlon.Nature Communications, 13(1):4128, 2022","work_id":"e1ca1c39-044b-48fe-adb2-e00047529c1f","year":2022},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"Anas Awadalla, Irena Gao, Joshua Gardner, Jack Hessel, Yusuf Hanafy, Wanrong Zhu, Kalyani Marathe, Yonatan Bitton, Samir Gadre, Jenia Jitsev, et al. Openflamingo, 2023","work_id":"8f318ea8-ad3c-4346-90e6-758be1279df6","year":2023},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"Artificial intelligence in healthcare: transforming the practice of medicine.Future healthcare journal, 8(2):e188–e194, 2021","work_id":"6d717124-ffbe-4a1b-a257-0d7c77516cf8","year":2021}],"snapshot_sha256":"c5af3927d5cd4d741314f5e228d044ef2666dee4c01cae43cbb08c6ee9d88fb8"},"source":{"id":"2305.10415","kind":"arxiv","version":6},"verdict":{"created_at":"2026-05-15T23:04:29.314082Z","id":"1281377d-2e49-4405-ba60-536e1872865e","model_set":{"reader":"grok-4.3"},"one_line_summary":"PMC-VQA dataset and MedVInT model achieve better generative performance on medical VQA benchmarks by visual instruction tuning on a newly constructed large-scale dataset.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"A generative model trained on a 227k-pair medical VQA dataset from literature outperforms prior systems on clinical benchmarks after fine-tuning.","strongest_claim":"We train the proposed model on PMC-VQA and then fine-tune it on multiple public benchmarks, e.g., VQA-RAD, SLAKE, and Image-Clef-2019, significantly outperforming existing MedVQA models in generating relevant, accurate free-form answers.","weakest_assumption":"The PMC-VQA dataset constructed from literature sources provides representative coverage of real clinical images and questions without systematic biases from publication practices or selection effects."}},"verdict_id":"1281377d-2e49-4405-ba60-536e1872865e"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:5f7e0f2e86ac17aedec0e095a16212b88e9bb830c34e7560e64eb7e2896aff98","target":"record","created_at":"2026-05-17T23:38:49Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"814d4467633f689e887d433655429ab04ad7e5dfc245da5ae3c8e4e1774645c4","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2023-05-17T17:50:16Z","title_canon_sha256":"d54473c845024f3af2c48d2eec48eff4f4ba48a71417a05da866311e26db8ccc"},"schema_version":"1.0","source":{"id":"2305.10415","kind":"arxiv","version":6}},"canonical_sha256":"b37ad31946b644dc4122613c5cef581e62ae96a3acbbf656ca39fc598d7a9411","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"b37ad31946b644dc4122613c5cef581e62ae96a3acbbf656ca39fc598d7a9411","first_computed_at":"2026-05-17T23:38:49.836419Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:38:49.836419Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"fC4oQnDROrN/3WqgOnXZmtRbiq8Np8aknZqVctUeQ3ZsAVGJcLrIUaQdmSdqVhrv+I45jKrcorF5eBrG+S7/BQ==","signature_status":"signed_v1","signed_at":"2026-05-17T23:38:49.837041Z","signed_message":"canonical_sha256_bytes"},"source_id":"2305.10415","source_kind":"arxiv","source_version":6}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:5f7e0f2e86ac17aedec0e095a16212b88e9bb830c34e7560e64eb7e2896aff98","sha256:af38826f7285d1d25ded58485ef87c297c90369cd46c6d6266027e2008597502"],"state_sha256":"266794f78a86b4205005133d05178671463f99056f542386eed5719ff2d22e5e"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"AsxebBV4IW66vClEsQGvYTzxK7ghz+Al2tyjsZK67wn6De6JUyw6H0cp7dXod1e8H68Oy4jk2O6PouSrSGdgBw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-31T02:14:19.122379Z","bundle_sha256":"22ca040f19ee2ed4468251d8d05a91ddd0d80f699b889605ca03488f0930ee35"}}