{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:ZMW73WRQVPO4LNHMTTASZVCOL2","short_pith_number":"pith:ZMW73WRQ","canonical_record":{"source":{"id":"1709.07992","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2017-09-23T02:53:48Z","cross_cats_sorted":[],"title_canon_sha256":"9f6bb4ad5b49bcad50706dee702a41ae5d6aeb00110c1ae4208660082d63884e","abstract_canon_sha256":"79ace571d6361786697a0bf8f09a7ac8dc8a101e6898a0bfd3ac698302e9e402"},"schema_version":"1.0"},"canonical_sha256":"cb2dfdda30abddc5b4ec9cc12cd44e5ea9a3352c8935254a3e06e58fe817e339","source":{"kind":"arxiv","id":"1709.07992","version":3},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1709.07992","created_at":"2026-05-18T00:08:48Z"},{"alias_kind":"arxiv_version","alias_value":"1709.07992v3","created_at":"2026-05-18T00:08:48Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1709.07992","created_at":"2026-05-18T00:08:48Z"},{"alias_kind":"pith_short_12","alias_value":"ZMW73WRQVPO4","created_at":"2026-05-18T12:31:59Z"},{"alias_kind":"pith_short_16","alias_value":"ZMW73WRQVPO4LNHM","created_at":"2026-05-18T12:31:59Z"},{"alias_kind":"pith_short_8","alias_value":"ZMW73WRQ","created_at":"2026-05-18T12:31:59Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:ZMW73WRQVPO4LNHMTTASZVCOL2","target":"record","payload":{"canonical_record":{"source":{"id":"1709.07992","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2017-09-23T02:53:48Z","cross_cats_sorted":[],"title_canon_sha256":"9f6bb4ad5b49bcad50706dee702a41ae5d6aeb00110c1ae4208660082d63884e","abstract_canon_sha256":"79ace571d6361786697a0bf8f09a7ac8dc8a101e6898a0bfd3ac698302e9e402"},"schema_version":"1.0"},"canonical_sha256":"cb2dfdda30abddc5b4ec9cc12cd44e5ea9a3352c8935254a3e06e58fe817e339","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:08:48.458538Z","signature_b64":"oEnIfLaCkkVHa0gj4BaHIIVbzGGkRBMmlS2g61pUD9jLoLvydVNtun5VMifHr2kt+6jjBjv0obegqM1xRE/VCQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"cb2dfdda30abddc5b4ec9cc12cd44e5ea9a3352c8935254a3e06e58fe817e339","last_reissued_at":"2026-05-18T00:08:48.457992Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:08:48.457992Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1709.07992","source_version":3,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:08:48Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"fzcSJi+0LFwGEmqKkVML8KXqR5dtthV2jWcuGE+UE2NscbN+h3pW/1Vu29wpLlx1Uyp/FwZDccXoR8s39ULNBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T14:52:56.221709Z"},"content_sha256":"39aa0baaaa1ab5177a5ee3a189289e0ad47cd1faeda30d51eab097a826ca0fc3","schema_version":"1.0","event_id":"sha256:39aa0baaaa1ab5177a5ee3a189289e0ad47cd1faeda30d51eab097a826ca0fc3"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:ZMW73WRQVPO4LNHMTTASZVCOL2","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Visual Reference Resolution using Attention Memory for Visual Dialog","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Andreas Lehrmann, Bohyung Han, Leonid Sigal, Paul Hongsuck Seo","submitted_at":"2017-09-23T02:53:48Z","abstract_excerpt":"Visual dialog is a task of answering a series of inter-dependent questions given an input image, and often requires to resolve visual references among the questions. This problem is different from visual question answering (VQA), which relies on spatial attention (a.k.a. visual grounding) estimated from an image and question pair. We propose a novel attention mechanism that exploits visual attentions in the past to resolve the current reference in the visual dialog scenario. The proposed model is equipped with an associative attention memory storing a sequence of previous (attention, key) pair"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1709.07992","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:08:48Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"wTAxoHwhe0gLYrveNw8naYEJzhqLGvaAWSrA4kTaQaa5NAdMpF8B9xE7iGmbSHwZoa5UhdAQ6FaHipF+rRrKAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T14:52:56.222073Z"},"content_sha256":"4c2ab7b57831a7e12d751e92c120b9d88994cf315123227eb6a68f2bb2abdd3e","schema_version":"1.0","event_id":"sha256:4c2ab7b57831a7e12d751e92c120b9d88994cf315123227eb6a68f2bb2abdd3e"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/ZMW73WRQVPO4LNHMTTASZVCOL2/bundle.json","state_url":"https://pith.science/pith/ZMW73WRQVPO4LNHMTTASZVCOL2/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/ZMW73WRQVPO4LNHMTTASZVCOL2/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-28T14:52:56Z","links":{"resolver":"https://pith.science/pith/ZMW73WRQVPO4LNHMTTASZVCOL2","bundle":"https://pith.science/pith/ZMW73WRQVPO4LNHMTTASZVCOL2/bundle.json","state":"https://pith.science/pith/ZMW73WRQVPO4LNHMTTASZVCOL2/state.json","well_known_bundle":"https://pith.science/.well-known/pith/ZMW73WRQVPO4LNHMTTASZVCOL2/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:ZMW73WRQVPO4LNHMTTASZVCOL2","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"79ace571d6361786697a0bf8f09a7ac8dc8a101e6898a0bfd3ac698302e9e402","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2017-09-23T02:53:48Z","title_canon_sha256":"9f6bb4ad5b49bcad50706dee702a41ae5d6aeb00110c1ae4208660082d63884e"},"schema_version":"1.0","source":{"id":"1709.07992","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1709.07992","created_at":"2026-05-18T00:08:48Z"},{"alias_kind":"arxiv_version","alias_value":"1709.07992v3","created_at":"2026-05-18T00:08:48Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1709.07992","created_at":"2026-05-18T00:08:48Z"},{"alias_kind":"pith_short_12","alias_value":"ZMW73WRQVPO4","created_at":"2026-05-18T12:31:59Z"},{"alias_kind":"pith_short_16","alias_value":"ZMW73WRQVPO4LNHM","created_at":"2026-05-18T12:31:59Z"},{"alias_kind":"pith_short_8","alias_value":"ZMW73WRQ","created_at":"2026-05-18T12:31:59Z"}],"graph_snapshots":[{"event_id":"sha256:4c2ab7b57831a7e12d751e92c120b9d88994cf315123227eb6a68f2bb2abdd3e","target":"graph","created_at":"2026-05-18T00:08:48Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Visual dialog is a task of answering a series of inter-dependent questions given an input image, and often requires to resolve visual references among the questions. This problem is different from visual question answering (VQA), which relies on spatial attention (a.k.a. visual grounding) estimated from an image and question pair. We propose a novel attention mechanism that exploits visual attentions in the past to resolve the current reference in the visual dialog scenario. The proposed model is equipped with an associative attention memory storing a sequence of previous (attention, key) pair","authors_text":"Andreas Lehrmann, Bohyung Han, Leonid Sigal, Paul Hongsuck Seo","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2017-09-23T02:53:48Z","title":"Visual Reference Resolution using Attention Memory for Visual Dialog"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1709.07992","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:39aa0baaaa1ab5177a5ee3a189289e0ad47cd1faeda30d51eab097a826ca0fc3","target":"record","created_at":"2026-05-18T00:08:48Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"79ace571d6361786697a0bf8f09a7ac8dc8a101e6898a0bfd3ac698302e9e402","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2017-09-23T02:53:48Z","title_canon_sha256":"9f6bb4ad5b49bcad50706dee702a41ae5d6aeb00110c1ae4208660082d63884e"},"schema_version":"1.0","source":{"id":"1709.07992","kind":"arxiv","version":3}},"canonical_sha256":"cb2dfdda30abddc5b4ec9cc12cd44e5ea9a3352c8935254a3e06e58fe817e339","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"cb2dfdda30abddc5b4ec9cc12cd44e5ea9a3352c8935254a3e06e58fe817e339","first_computed_at":"2026-05-18T00:08:48.457992Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:08:48.457992Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"oEnIfLaCkkVHa0gj4BaHIIVbzGGkRBMmlS2g61pUD9jLoLvydVNtun5VMifHr2kt+6jjBjv0obegqM1xRE/VCQ==","signature_status":"signed_v1","signed_at":"2026-05-18T00:08:48.458538Z","signed_message":"canonical_sha256_bytes"},"source_id":"1709.07992","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:39aa0baaaa1ab5177a5ee3a189289e0ad47cd1faeda30d51eab097a826ca0fc3","sha256:4c2ab7b57831a7e12d751e92c120b9d88994cf315123227eb6a68f2bb2abdd3e"],"state_sha256":"8a1fc0bd7c0c455cd27f1761cafbd4c8b53630f6fe7b94548ef385a76c09c81e"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"w8ydDuc7ye5OO4ujUagpwE/jNJvMVt5DuVJyMXexYZr5atmralve/VP0roPUQtV//UUrVEXruVMxeHnAMu8GDg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-28T14:52:56.224122Z","bundle_sha256":"3cea89a289029d491f435fef9bfc2ee42ed3ac9b4bdb351c66b64ab57c28edb3"}}