{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2015:FJFBUJVWJEKWH52PD2PWJWQYGM","short_pith_number":"pith:FJFBUJVW","schema_version":"1.0","canonical_sha256":"2a4a1a26b6491563f74f1e9f64da18330d1dce60159310dd8cf672e291dae684","source":{"kind":"arxiv","id":"1511.05676","version":1},"attestation_state":"computed","paper":{"title":"Compositional Memory for Visual Question Answering","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Aiwen Jiang, Fang Wang, Fatih Porikli, Yi Li","submitted_at":"2015-11-18T07:25:16Z","abstract_excerpt":"Visual Question Answering (VQA) emerges as one of the most fascinating topics in computer vision recently. Many state of the art methods naively use holistic visual features with language features into a Long Short-Term Memory (LSTM) module, neglecting the sophisticated interaction between them. This coarse modeling also blocks the possibilities of exploring finer-grained local features that contribute to the question answering dynamically over time.\n  This paper addresses this fundamental problem by directly modeling the temporal dynamics between language and all possible local image patches."},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1511.05676","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2015-11-18T07:25:16Z","cross_cats_sorted":[],"title_canon_sha256":"2ad2286bdc4fac5ec78a6efb2e22e6c58a31f4bc39c8461ca158d38bab5dd78e","abstract_canon_sha256":"b59cf33362f28718fd38ce102c3a3e6a7c734c66b211115f051c2fdee5498c64"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:26:33.070336Z","signature_b64":"iClob5Zp0qU2+ZxGmSYXXBJVkFx6J1N3wYyA22JOI7+j4iEcWArOJ3gLae+pJWi/tY+AIRHXsEDkBa53ElLHDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"2a4a1a26b6491563f74f1e9f64da18330d1dce60159310dd8cf672e291dae684","last_reissued_at":"2026-05-18T01:26:33.069510Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:26:33.069510Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Compositional Memory for Visual Question Answering","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Aiwen Jiang, Fang Wang, Fatih Porikli, Yi Li","submitted_at":"2015-11-18T07:25:16Z","abstract_excerpt":"Visual Question Answering (VQA) emerges as one of the most fascinating topics in computer vision recently. Many state of the art methods naively use holistic visual features with language features into a Long Short-Term Memory (LSTM) module, neglecting the sophisticated interaction between them. This coarse modeling also blocks the possibilities of exploring finer-grained local features that contribute to the question answering dynamically over time.\n  This paper addresses this fundamental problem by directly modeling the temporal dynamics between language and all possible local image patches."},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1511.05676","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1511.05676","created_at":"2026-05-18T01:26:33.069648+00:00"},{"alias_kind":"arxiv_version","alias_value":"1511.05676v1","created_at":"2026-05-18T01:26:33.069648+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1511.05676","created_at":"2026-05-18T01:26:33.069648+00:00"},{"alias_kind":"pith_short_12","alias_value":"FJFBUJVWJEKW","created_at":"2026-05-18T12:29:19.899920+00:00"},{"alias_kind":"pith_short_16","alias_value":"FJFBUJVWJEKWH52P","created_at":"2026-05-18T12:29:19.899920+00:00"},{"alias_kind":"pith_short_8","alias_value":"FJFBUJVW","created_at":"2026-05-18T12:29:19.899920+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/FJFBUJVWJEKWH52PD2PWJWQYGM","json":"https://pith.science/pith/FJFBUJVWJEKWH52PD2PWJWQYGM.json","graph_json":"https://pith.science/api/pith-number/FJFBUJVWJEKWH52PD2PWJWQYGM/graph.json","events_json":"https://pith.science/api/pith-number/FJFBUJVWJEKWH52PD2PWJWQYGM/events.json","paper":"https://pith.science/paper/FJFBUJVW"},"agent_actions":{"view_html":"https://pith.science/pith/FJFBUJVWJEKWH52PD2PWJWQYGM","download_json":"https://pith.science/pith/FJFBUJVWJEKWH52PD2PWJWQYGM.json","view_paper":"https://pith.science/paper/FJFBUJVW","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1511.05676&json=true","fetch_graph":"https://pith.science/api/pith-number/FJFBUJVWJEKWH52PD2PWJWQYGM/graph.json","fetch_events":"https://pith.science/api/pith-number/FJFBUJVWJEKWH52PD2PWJWQYGM/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/FJFBUJVWJEKWH52PD2PWJWQYGM/action/timestamp_anchor","attest_storage":"https://pith.science/pith/FJFBUJVWJEKWH52PD2PWJWQYGM/action/storage_attestation","attest_author":"https://pith.science/pith/FJFBUJVWJEKWH52PD2PWJWQYGM/action/author_attestation","sign_citation":"https://pith.science/pith/FJFBUJVWJEKWH52PD2PWJWQYGM/action/citation_signature","submit_replication":"https://pith.science/pith/FJFBUJVWJEKWH52PD2PWJWQYGM/action/replication_record"}},"created_at":"2026-05-18T01:26:33.069648+00:00","updated_at":"2026-05-18T01:26:33.069648+00:00"}