{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:DD6HZH6AMPUXV2Z2Y4KMMCRQZO","short_pith_number":"pith:DD6HZH6A","canonical_record":{"source":{"id":"1904.08920","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2019-04-18T17:55:37Z","cross_cats_sorted":["cs.CV","cs.LG"],"title_canon_sha256":"44ce10db791852ad63110af69ac2c851fd7da4782cf6eb2362ce97663462f27d","abstract_canon_sha256":"059346af8afa3cf5e6b3cd1f1d5427e96a8fdaae1c43761409fdf58d44ff2db3"},"schema_version":"1.0"},"canonical_sha256":"18fc7c9fc063e97aeb3ac714c60a30cb80fcf5b6c5964547c3351a42ed200af5","source":{"kind":"arxiv","id":"1904.08920","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1904.08920","created_at":"2026-05-17T23:46:17Z"},{"alias_kind":"arxiv_version","alias_value":"1904.08920v2","created_at":"2026-05-17T23:46:17Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1904.08920","created_at":"2026-05-17T23:46:17Z"},{"alias_kind":"pith_short_12","alias_value":"DD6HZH6AMPUX","created_at":"2026-05-18T12:33:15Z"},{"alias_kind":"pith_short_16","alias_value":"DD6HZH6AMPUXV2Z2","created_at":"2026-05-18T12:33:15Z"},{"alias_kind":"pith_short_8","alias_value":"DD6HZH6A","created_at":"2026-05-18T12:33:15Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:DD6HZH6AMPUXV2Z2Y4KMMCRQZO","target":"record","payload":{"canonical_record":{"source":{"id":"1904.08920","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2019-04-18T17:55:37Z","cross_cats_sorted":["cs.CV","cs.LG"],"title_canon_sha256":"44ce10db791852ad63110af69ac2c851fd7da4782cf6eb2362ce97663462f27d","abstract_canon_sha256":"059346af8afa3cf5e6b3cd1f1d5427e96a8fdaae1c43761409fdf58d44ff2db3"},"schema_version":"1.0"},"canonical_sha256":"18fc7c9fc063e97aeb3ac714c60a30cb80fcf5b6c5964547c3351a42ed200af5","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:46:17.703673Z","signature_b64":"WdvLrLUbo5hZwIxMsgPEn7j33GvpyVW/UNh+3JjYkyxx21aMzjuU6NnfAHycDMpS84srTB0fRF5sqw9EkH6HBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"18fc7c9fc063e97aeb3ac714c60a30cb80fcf5b6c5964547c3351a42ed200af5","last_reissued_at":"2026-05-17T23:46:17.703024Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:46:17.703024Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1904.08920","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:46:17Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"xZ+J3WstYsG3WL/muzU9/nxDj7Ux6WRTDH3L68JXvUtL4eZL9dI5bvszvZA79a+mTHqdgaW86PUkMbamVmR0AQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T23:16:54.106357Z"},"content_sha256":"40e43c74f719164e2fb04a063668933ec070afbae1c7f5c5a8f16533f331dcf6","schema_version":"1.0","event_id":"sha256:40e43c74f719164e2fb04a063668933ec070afbae1c7f5c5a8f16533f331dcf6"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:DD6HZH6AMPUXV2Z2Y4KMMCRQZO","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Towards VQA Models That Can Read","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CV","cs.LG"],"primary_cat":"cs.CL","authors_text":"Amanpreet Singh, Devi Parikh, Dhruv Batra, Marcus Rohrbach, Meet Shah, Vivek Natarajan, Xinlei Chen, Yu Jiang","submitted_at":"2019-04-18T17:55:37Z","abstract_excerpt":"Studies have shown that a dominant class of questions asked by visually impaired users on images of their surroundings involves reading text in the image. But today's VQA models can not read! Our paper takes a first step towards addressing this problem. First, we introduce a new \"TextVQA\" dataset to facilitate progress on this important problem. Existing datasets either have a small proportion of questions about text (e.g., the VQA dataset) or are too small (e.g., the VizWiz dataset). TextVQA contains 45,336 questions on 28,408 images that require reasoning about text to answer. Second, we int"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1904.08920","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:46:17Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"JQ89+Doy1oXPcUrQghA+y2G+cda/PPiW6XVhdfIQ+4GAlDMLmSUwsPv7gZsUDRj3c0u/LpY/jZAvI9uT5fH4Bw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T23:16:54.107060Z"},"content_sha256":"756e43b922b75a3cc3892f6c188d2a4e3595c05174e76f25aba7fd1b55e8c707","schema_version":"1.0","event_id":"sha256:756e43b922b75a3cc3892f6c188d2a4e3595c05174e76f25aba7fd1b55e8c707"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/DD6HZH6AMPUXV2Z2Y4KMMCRQZO/bundle.json","state_url":"https://pith.science/pith/DD6HZH6AMPUXV2Z2Y4KMMCRQZO/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/DD6HZH6AMPUXV2Z2Y4KMMCRQZO/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-25T23:16:54Z","links":{"resolver":"https://pith.science/pith/DD6HZH6AMPUXV2Z2Y4KMMCRQZO","bundle":"https://pith.science/pith/DD6HZH6AMPUXV2Z2Y4KMMCRQZO/bundle.json","state":"https://pith.science/pith/DD6HZH6AMPUXV2Z2Y4KMMCRQZO/state.json","well_known_bundle":"https://pith.science/.well-known/pith/DD6HZH6AMPUXV2Z2Y4KMMCRQZO/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:DD6HZH6AMPUXV2Z2Y4KMMCRQZO","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"059346af8afa3cf5e6b3cd1f1d5427e96a8fdaae1c43761409fdf58d44ff2db3","cross_cats_sorted":["cs.CV","cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2019-04-18T17:55:37Z","title_canon_sha256":"44ce10db791852ad63110af69ac2c851fd7da4782cf6eb2362ce97663462f27d"},"schema_version":"1.0","source":{"id":"1904.08920","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1904.08920","created_at":"2026-05-17T23:46:17Z"},{"alias_kind":"arxiv_version","alias_value":"1904.08920v2","created_at":"2026-05-17T23:46:17Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1904.08920","created_at":"2026-05-17T23:46:17Z"},{"alias_kind":"pith_short_12","alias_value":"DD6HZH6AMPUX","created_at":"2026-05-18T12:33:15Z"},{"alias_kind":"pith_short_16","alias_value":"DD6HZH6AMPUXV2Z2","created_at":"2026-05-18T12:33:15Z"},{"alias_kind":"pith_short_8","alias_value":"DD6HZH6A","created_at":"2026-05-18T12:33:15Z"}],"graph_snapshots":[{"event_id":"sha256:756e43b922b75a3cc3892f6c188d2a4e3595c05174e76f25aba7fd1b55e8c707","target":"graph","created_at":"2026-05-17T23:46:17Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Studies have shown that a dominant class of questions asked by visually impaired users on images of their surroundings involves reading text in the image. But today's VQA models can not read! Our paper takes a first step towards addressing this problem. First, we introduce a new \"TextVQA\" dataset to facilitate progress on this important problem. Existing datasets either have a small proportion of questions about text (e.g., the VQA dataset) or are too small (e.g., the VizWiz dataset). TextVQA contains 45,336 questions on 28,408 images that require reasoning about text to answer. Second, we int","authors_text":"Amanpreet Singh, Devi Parikh, Dhruv Batra, Marcus Rohrbach, Meet Shah, Vivek Natarajan, Xinlei Chen, Yu Jiang","cross_cats":["cs.CV","cs.LG"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2019-04-18T17:55:37Z","title":"Towards VQA Models That Can Read"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1904.08920","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:40e43c74f719164e2fb04a063668933ec070afbae1c7f5c5a8f16533f331dcf6","target":"record","created_at":"2026-05-17T23:46:17Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"059346af8afa3cf5e6b3cd1f1d5427e96a8fdaae1c43761409fdf58d44ff2db3","cross_cats_sorted":["cs.CV","cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2019-04-18T17:55:37Z","title_canon_sha256":"44ce10db791852ad63110af69ac2c851fd7da4782cf6eb2362ce97663462f27d"},"schema_version":"1.0","source":{"id":"1904.08920","kind":"arxiv","version":2}},"canonical_sha256":"18fc7c9fc063e97aeb3ac714c60a30cb80fcf5b6c5964547c3351a42ed200af5","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"18fc7c9fc063e97aeb3ac714c60a30cb80fcf5b6c5964547c3351a42ed200af5","first_computed_at":"2026-05-17T23:46:17.703024Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:46:17.703024Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"WdvLrLUbo5hZwIxMsgPEn7j33GvpyVW/UNh+3JjYkyxx21aMzjuU6NnfAHycDMpS84srTB0fRF5sqw9EkH6HBA==","signature_status":"signed_v1","signed_at":"2026-05-17T23:46:17.703673Z","signed_message":"canonical_sha256_bytes"},"source_id":"1904.08920","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:40e43c74f719164e2fb04a063668933ec070afbae1c7f5c5a8f16533f331dcf6","sha256:756e43b922b75a3cc3892f6c188d2a4e3595c05174e76f25aba7fd1b55e8c707"],"state_sha256":"29f3bafa4732ba70ddc705bb9b1eca28270267ee5cf8641a5a1ba207921143da"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"NSLCzeSJqA/t7E3Y6bNpDD9AQ/YfiH9KJUZtJVBWowfPtHnMNWVcNFfGyrRVx2B4ReR2o110zfE2hk7HH4x4AA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-25T23:16:54.110612Z","bundle_sha256":"14e633f81cdfa2ae709a4046e4b66fb59c4a7dc9943f181150ef8091c38a6938"}}