{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2016:P5ZOUKNGIAP6REN4KPXRJPX3P4","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"592f2572ce4b7b20bb19fa4d324532bb359242059486b0b9625c6f586ac64c87","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2016-05-04T18:54:09Z","title_canon_sha256":"fd859504613d6f62c17e37ebe9e8e26a2334ddd0240748465f6a9146ff8cb63c"},"schema_version":"1.0","source":{"id":"1605.01379","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1605.01379","created_at":"2026-05-18T01:06:28Z"},{"alias_kind":"arxiv_version","alias_value":"1605.01379v2","created_at":"2026-05-18T01:06:28Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1605.01379","created_at":"2026-05-18T01:06:28Z"},{"alias_kind":"pith_short_12","alias_value":"P5ZOUKNGIAP6","created_at":"2026-05-18T12:30:36Z"},{"alias_kind":"pith_short_16","alias_value":"P5ZOUKNGIAP6REN4","created_at":"2026-05-18T12:30:36Z"},{"alias_kind":"pith_short_8","alias_value":"P5ZOUKNG","created_at":"2026-05-18T12:30:36Z"}],"graph_snapshots":[{"event_id":"sha256:bf896997887ea7177a7d0838d71cfcf2f97ef32868f1091a5cb49e14593c2f4d","target":"graph","created_at":"2026-05-18T01:06:28Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Visual Question Answering (VQA) is the task of taking as input an image and a free-form natural language question about the image, and producing an accurate answer. In this work we view VQA as a \"feature extraction\" module to extract image and caption representations. We employ these representations for the task of image-caption ranking. Each feature dimension captures (imagines) whether a fact (question-answer pair) could plausibly be true for the image and caption. This allows the model to interpret images and captions from a wide variety of perspectives. We propose score-level and represent","authors_text":"Devi Parikh, Xiao Lin","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2016-05-04T18:54:09Z","title":"Leveraging Visual Question Answering for Image-Caption Ranking"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1605.01379","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:290ccd67121e9791acfb0e9358091c23216cbfbe04c31235919b966b44825ed1","target":"record","created_at":"2026-05-18T01:06:28Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"592f2572ce4b7b20bb19fa4d324532bb359242059486b0b9625c6f586ac64c87","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2016-05-04T18:54:09Z","title_canon_sha256":"fd859504613d6f62c17e37ebe9e8e26a2334ddd0240748465f6a9146ff8cb63c"},"schema_version":"1.0","source":{"id":"1605.01379","kind":"arxiv","version":2}},"canonical_sha256":"7f72ea29a6401fe891bc53ef14befb7f1c772ca4674cc502d5f8c0a6b3bc9732","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"7f72ea29a6401fe891bc53ef14befb7f1c772ca4674cc502d5f8c0a6b3bc9732","first_computed_at":"2026-05-18T01:06:28.645201Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T01:06:28.645201Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"pnsebTxmI3ij3ZtmhuAlM63AhWRkFaDwma0FJiKOzvnjW1Nf+wsOlNsigvUxLeeaujkjUp8tMFwhQAvhrae2CQ==","signature_status":"signed_v1","signed_at":"2026-05-18T01:06:28.645922Z","signed_message":"canonical_sha256_bytes"},"source_id":"1605.01379","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:290ccd67121e9791acfb0e9358091c23216cbfbe04c31235919b966b44825ed1","sha256:bf896997887ea7177a7d0838d71cfcf2f97ef32868f1091a5cb49e14593c2f4d"],"state_sha256":"e2f2130d36b26443aa505a25d673aa67e50942de57f5914e347aee35c161a38e"}