{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:NB344PD6PB6MIVD34AGNQ27HTG","short_pith_number":"pith:NB344PD6","canonical_record":{"source":{"id":"1804.02088","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2018-04-06T00:28:57Z","cross_cats_sorted":[],"title_canon_sha256":"a0e8e8411dddb8010952a4f3cc7e91c6d8f3e2d00d35df5ef208396f19a11d64","abstract_canon_sha256":"cc9a16bebb9d09028936b75ff5fcaffa091fcb3fd332dd76f55cff192047d199"},"schema_version":"1.0"},"canonical_sha256":"6877ce3c7e787cc4547be00cd86be7998d653c2a9d86e86910052d154f973b99","source":{"kind":"arxiv","id":"1804.02088","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1804.02088","created_at":"2026-05-18T00:10:21Z"},{"alias_kind":"arxiv_version","alias_value":"1804.02088v2","created_at":"2026-05-18T00:10:21Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1804.02088","created_at":"2026-05-18T00:10:21Z"},{"alias_kind":"pith_short_12","alias_value":"NB344PD6PB6M","created_at":"2026-05-18T12:32:40Z"},{"alias_kind":"pith_short_16","alias_value":"NB344PD6PB6MIVD3","created_at":"2026-05-18T12:32:40Z"},{"alias_kind":"pith_short_8","alias_value":"NB344PD6","created_at":"2026-05-18T12:32:40Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:NB344PD6PB6MIVD34AGNQ27HTG","target":"record","payload":{"canonical_record":{"source":{"id":"1804.02088","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2018-04-06T00:28:57Z","cross_cats_sorted":[],"title_canon_sha256":"a0e8e8411dddb8010952a4f3cc7e91c6d8f3e2d00d35df5ef208396f19a11d64","abstract_canon_sha256":"cc9a16bebb9d09028936b75ff5fcaffa091fcb3fd332dd76f55cff192047d199"},"schema_version":"1.0"},"canonical_sha256":"6877ce3c7e787cc4547be00cd86be7998d653c2a9d86e86910052d154f973b99","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:10:21.893914Z","signature_b64":"d9gOwCaytHI8zg50FSrzYjKHw/ojbMXGKtHA+AblaAZjD2dt4Owbmms36+pIMu8YJNIKn3VsFB+enQx2pyxhBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"6877ce3c7e787cc4547be00cd86be7998d653c2a9d86e86910052d154f973b99","last_reissued_at":"2026-05-18T00:10:21.893274Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:10:21.893274Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1804.02088","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:10:21Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"+G3ey/GQBWzXlT0j9rFAvcw2billyPX8OoQiILbGjc0iDhE0i2yCZQRSCHsIU3g//7WFaOTemIm0r/pYuRz0Bg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-29T14:36:39.536113Z"},"content_sha256":"5a71b84cbcb887d64a019f53cce28cfb167616cfb3c799ba3a687654ea0b68a8","schema_version":"1.0","event_id":"sha256:5a71b84cbcb887d64a019f53cce28cfb167616cfb3c799ba3a687654ea0b68a8"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:NB344PD6PB6MIVD34AGNQ27HTG","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Question Type Guided Attention in Visual Question Answering","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Animashree Anandkumar, Sheng Zha, Tommaso Furlanello, Yang Shi","submitted_at":"2018-04-06T00:28:57Z","abstract_excerpt":"Visual Question Answering (VQA) requires integration of feature maps with drastically different structures and focus of the correct regions. Image descriptors have structures at multiple spatial scales, while lexical inputs inherently follow a temporal sequence and naturally cluster into semantically different question types. A lot of previous works use complex models to extract feature representations but neglect to use high-level information summary such as question types in learning. In this work, we propose Question Type-guided Attention (QTA). It utilizes the information of question type "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1804.02088","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:10:21Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"3Zjk7fIY7sVJe/l2CdC8wxNwsCbNlbQgUyYOWAP3vLTA7Gu4HXGCB+XqokxT99cKg6s6pMyj0CqjfIdGF1aPDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-29T14:36:39.536775Z"},"content_sha256":"6c854889682bd1a8a052b7c34c9fb0c85799c434017d033641205565c63ad46e","schema_version":"1.0","event_id":"sha256:6c854889682bd1a8a052b7c34c9fb0c85799c434017d033641205565c63ad46e"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/NB344PD6PB6MIVD34AGNQ27HTG/bundle.json","state_url":"https://pith.science/pith/NB344PD6PB6MIVD34AGNQ27HTG/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/NB344PD6PB6MIVD34AGNQ27HTG/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-29T14:36:39Z","links":{"resolver":"https://pith.science/pith/NB344PD6PB6MIVD34AGNQ27HTG","bundle":"https://pith.science/pith/NB344PD6PB6MIVD34AGNQ27HTG/bundle.json","state":"https://pith.science/pith/NB344PD6PB6MIVD34AGNQ27HTG/state.json","well_known_bundle":"https://pith.science/.well-known/pith/NB344PD6PB6MIVD34AGNQ27HTG/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:NB344PD6PB6MIVD34AGNQ27HTG","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"cc9a16bebb9d09028936b75ff5fcaffa091fcb3fd332dd76f55cff192047d199","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2018-04-06T00:28:57Z","title_canon_sha256":"a0e8e8411dddb8010952a4f3cc7e91c6d8f3e2d00d35df5ef208396f19a11d64"},"schema_version":"1.0","source":{"id":"1804.02088","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1804.02088","created_at":"2026-05-18T00:10:21Z"},{"alias_kind":"arxiv_version","alias_value":"1804.02088v2","created_at":"2026-05-18T00:10:21Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1804.02088","created_at":"2026-05-18T00:10:21Z"},{"alias_kind":"pith_short_12","alias_value":"NB344PD6PB6M","created_at":"2026-05-18T12:32:40Z"},{"alias_kind":"pith_short_16","alias_value":"NB344PD6PB6MIVD3","created_at":"2026-05-18T12:32:40Z"},{"alias_kind":"pith_short_8","alias_value":"NB344PD6","created_at":"2026-05-18T12:32:40Z"}],"graph_snapshots":[{"event_id":"sha256:6c854889682bd1a8a052b7c34c9fb0c85799c434017d033641205565c63ad46e","target":"graph","created_at":"2026-05-18T00:10:21Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Visual Question Answering (VQA) requires integration of feature maps with drastically different structures and focus of the correct regions. Image descriptors have structures at multiple spatial scales, while lexical inputs inherently follow a temporal sequence and naturally cluster into semantically different question types. A lot of previous works use complex models to extract feature representations but neglect to use high-level information summary such as question types in learning. In this work, we propose Question Type-guided Attention (QTA). It utilizes the information of question type ","authors_text":"Animashree Anandkumar, Sheng Zha, Tommaso Furlanello, Yang Shi","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2018-04-06T00:28:57Z","title":"Question Type Guided Attention in Visual Question Answering"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1804.02088","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:5a71b84cbcb887d64a019f53cce28cfb167616cfb3c799ba3a687654ea0b68a8","target":"record","created_at":"2026-05-18T00:10:21Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"cc9a16bebb9d09028936b75ff5fcaffa091fcb3fd332dd76f55cff192047d199","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2018-04-06T00:28:57Z","title_canon_sha256":"a0e8e8411dddb8010952a4f3cc7e91c6d8f3e2d00d35df5ef208396f19a11d64"},"schema_version":"1.0","source":{"id":"1804.02088","kind":"arxiv","version":2}},"canonical_sha256":"6877ce3c7e787cc4547be00cd86be7998d653c2a9d86e86910052d154f973b99","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"6877ce3c7e787cc4547be00cd86be7998d653c2a9d86e86910052d154f973b99","first_computed_at":"2026-05-18T00:10:21.893274Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:10:21.893274Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"d9gOwCaytHI8zg50FSrzYjKHw/ojbMXGKtHA+AblaAZjD2dt4Owbmms36+pIMu8YJNIKn3VsFB+enQx2pyxhBQ==","signature_status":"signed_v1","signed_at":"2026-05-18T00:10:21.893914Z","signed_message":"canonical_sha256_bytes"},"source_id":"1804.02088","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:5a71b84cbcb887d64a019f53cce28cfb167616cfb3c799ba3a687654ea0b68a8","sha256:6c854889682bd1a8a052b7c34c9fb0c85799c434017d033641205565c63ad46e"],"state_sha256":"5393299bd102bb8863255e54796ee1442b7c72ef7eeebcd39731aaa996a17b7e"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"HjvcaMS4l8KfPRYkyGqQffUh1YUsgeBTu/413EUsl9pvr0CowdqrzR+eD0axQBPloWApTaUGG7huX6g2qaMxDQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-29T14:36:39.540288Z","bundle_sha256":"c8ba4f2b040e7324f32d43892dce4b0382c4bd031fbff3b28c42d66144d09e9f"}}