{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:PMOCJ5LLAS44JTWHHBYK4C5LN6","short_pith_number":"pith:PMOCJ5LL","canonical_record":{"source":{"id":"1707.07998","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2017-07-25T13:50:17Z","cross_cats_sorted":[],"title_canon_sha256":"24271257c3b8ca30d13b7b691bf38a2785aee4441f7e9cec0770ed89582263fb","abstract_canon_sha256":"f3fd517fabb8b2c190efd224cd3cf2bd3d4fe1d6ef2ead3b9c68fa427e872d7c"},"schema_version":"1.0"},"canonical_sha256":"7b1c24f56b04b9c4cec73870ae0bab6fa056b2bfcda218ba5e7bcbdc0d9c1aec","source":{"kind":"arxiv","id":"1707.07998","version":3},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1707.07998","created_at":"2026-05-18T00:21:04Z"},{"alias_kind":"arxiv_version","alias_value":"1707.07998v3","created_at":"2026-05-18T00:21:04Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1707.07998","created_at":"2026-05-18T00:21:04Z"},{"alias_kind":"pith_short_12","alias_value":"PMOCJ5LLAS44","created_at":"2026-05-18T12:31:37Z"},{"alias_kind":"pith_short_16","alias_value":"PMOCJ5LLAS44JTWH","created_at":"2026-05-18T12:31:37Z"},{"alias_kind":"pith_short_8","alias_value":"PMOCJ5LL","created_at":"2026-05-18T12:31:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:PMOCJ5LLAS44JTWHHBYK4C5LN6","target":"record","payload":{"canonical_record":{"source":{"id":"1707.07998","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2017-07-25T13:50:17Z","cross_cats_sorted":[],"title_canon_sha256":"24271257c3b8ca30d13b7b691bf38a2785aee4441f7e9cec0770ed89582263fb","abstract_canon_sha256":"f3fd517fabb8b2c190efd224cd3cf2bd3d4fe1d6ef2ead3b9c68fa427e872d7c"},"schema_version":"1.0"},"canonical_sha256":"7b1c24f56b04b9c4cec73870ae0bab6fa056b2bfcda218ba5e7bcbdc0d9c1aec","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:21:04.812709Z","signature_b64":"rSzEmLq8KATK0MFFRFijVFnB4l/164/jsRS6tu1WKlXtz5Zl+3DxxPBksb7Pce7G5SlWguYjBRvZN+WRTdmECQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"7b1c24f56b04b9c4cec73870ae0bab6fa056b2bfcda218ba5e7bcbdc0d9c1aec","last_reissued_at":"2026-05-18T00:21:04.811983Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:21:04.811983Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1707.07998","source_version":3,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:21:04Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"XFA8e0f6qp+ll/ikx9uZFfGyhAGwY3EVbLVBbxo+W3tFeoKzGJLbH6yC7wQmhVMdPeeLSuwJG5K935ib1gwjDQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-24T08:00:59.015687Z"},"content_sha256":"d1f17f2ba732236b1a5d803e15d3f99c1efdcaa455f08104d72fc0393d50491a","schema_version":"1.0","event_id":"sha256:d1f17f2ba732236b1a5d803e15d3f99c1efdcaa455f08104d72fc0393d50491a"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:PMOCJ5LLAS44JTWHHBYK4C5LN6","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Bottom-Up and Top-Down Attention for Image Captioning and Visual Question Answering","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Chris Buehler, Damien Teney, Lei Zhang, Mark Johnson, Peter Anderson, Stephen Gould, Xiaodong He","submitted_at":"2017-07-25T13:50:17Z","abstract_excerpt":"Top-down visual attention mechanisms have been used extensively in image captioning and visual question answering (VQA) to enable deeper image understanding through fine-grained analysis and even multiple steps of reasoning. In this work, we propose a combined bottom-up and top-down attention mechanism that enables attention to be calculated at the level of objects and other salient image regions. This is the natural basis for attention to be considered. Within our approach, the bottom-up mechanism (based on Faster R-CNN) proposes image regions, each with an associated feature vector, while th"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1707.07998","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:21:04Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"QOQwAdbACZ7MM7SvSqgLTJew9jgO7hWt9lp1jCALEDVt4lzUspi5IeTr7nmhZornRY5zsvpnBpfZKHtU4+SpBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-24T08:00:59.016147Z"},"content_sha256":"83f1539d727dcc9850ca4e2ec7cbd52e99fc794d0198ecb7b7cce8e42d0b5b90","schema_version":"1.0","event_id":"sha256:83f1539d727dcc9850ca4e2ec7cbd52e99fc794d0198ecb7b7cce8e42d0b5b90"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/PMOCJ5LLAS44JTWHHBYK4C5LN6/bundle.json","state_url":"https://pith.science/pith/PMOCJ5LLAS44JTWHHBYK4C5LN6/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/PMOCJ5LLAS44JTWHHBYK4C5LN6/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-24T08:00:59Z","links":{"resolver":"https://pith.science/pith/PMOCJ5LLAS44JTWHHBYK4C5LN6","bundle":"https://pith.science/pith/PMOCJ5LLAS44JTWHHBYK4C5LN6/bundle.json","state":"https://pith.science/pith/PMOCJ5LLAS44JTWHHBYK4C5LN6/state.json","well_known_bundle":"https://pith.science/.well-known/pith/PMOCJ5LLAS44JTWHHBYK4C5LN6/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:PMOCJ5LLAS44JTWHHBYK4C5LN6","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"f3fd517fabb8b2c190efd224cd3cf2bd3d4fe1d6ef2ead3b9c68fa427e872d7c","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2017-07-25T13:50:17Z","title_canon_sha256":"24271257c3b8ca30d13b7b691bf38a2785aee4441f7e9cec0770ed89582263fb"},"schema_version":"1.0","source":{"id":"1707.07998","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1707.07998","created_at":"2026-05-18T00:21:04Z"},{"alias_kind":"arxiv_version","alias_value":"1707.07998v3","created_at":"2026-05-18T00:21:04Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1707.07998","created_at":"2026-05-18T00:21:04Z"},{"alias_kind":"pith_short_12","alias_value":"PMOCJ5LLAS44","created_at":"2026-05-18T12:31:37Z"},{"alias_kind":"pith_short_16","alias_value":"PMOCJ5LLAS44JTWH","created_at":"2026-05-18T12:31:37Z"},{"alias_kind":"pith_short_8","alias_value":"PMOCJ5LL","created_at":"2026-05-18T12:31:37Z"}],"graph_snapshots":[{"event_id":"sha256:83f1539d727dcc9850ca4e2ec7cbd52e99fc794d0198ecb7b7cce8e42d0b5b90","target":"graph","created_at":"2026-05-18T00:21:04Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Top-down visual attention mechanisms have been used extensively in image captioning and visual question answering (VQA) to enable deeper image understanding through fine-grained analysis and even multiple steps of reasoning. In this work, we propose a combined bottom-up and top-down attention mechanism that enables attention to be calculated at the level of objects and other salient image regions. This is the natural basis for attention to be considered. Within our approach, the bottom-up mechanism (based on Faster R-CNN) proposes image regions, each with an associated feature vector, while th","authors_text":"Chris Buehler, Damien Teney, Lei Zhang, Mark Johnson, Peter Anderson, Stephen Gould, Xiaodong He","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2017-07-25T13:50:17Z","title":"Bottom-Up and Top-Down Attention for Image Captioning and Visual Question Answering"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1707.07998","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:d1f17f2ba732236b1a5d803e15d3f99c1efdcaa455f08104d72fc0393d50491a","target":"record","created_at":"2026-05-18T00:21:04Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"f3fd517fabb8b2c190efd224cd3cf2bd3d4fe1d6ef2ead3b9c68fa427e872d7c","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2017-07-25T13:50:17Z","title_canon_sha256":"24271257c3b8ca30d13b7b691bf38a2785aee4441f7e9cec0770ed89582263fb"},"schema_version":"1.0","source":{"id":"1707.07998","kind":"arxiv","version":3}},"canonical_sha256":"7b1c24f56b04b9c4cec73870ae0bab6fa056b2bfcda218ba5e7bcbdc0d9c1aec","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"7b1c24f56b04b9c4cec73870ae0bab6fa056b2bfcda218ba5e7bcbdc0d9c1aec","first_computed_at":"2026-05-18T00:21:04.811983Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:21:04.811983Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"rSzEmLq8KATK0MFFRFijVFnB4l/164/jsRS6tu1WKlXtz5Zl+3DxxPBksb7Pce7G5SlWguYjBRvZN+WRTdmECQ==","signature_status":"signed_v1","signed_at":"2026-05-18T00:21:04.812709Z","signed_message":"canonical_sha256_bytes"},"source_id":"1707.07998","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:d1f17f2ba732236b1a5d803e15d3f99c1efdcaa455f08104d72fc0393d50491a","sha256:83f1539d727dcc9850ca4e2ec7cbd52e99fc794d0198ecb7b7cce8e42d0b5b90"],"state_sha256":"ff73773a860d181eeddde0a8c917bcd506f2de62e1982be91758a69e7dce3528"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Jco5MeXG3ZUit3S+TWekZ3WjBN2OLCB2jm0a0VqyaKAHOe7XQb8Rwg6hXrRcYKnakIO8SL74btEhJiASS5xUAw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-24T08:00:59.019351Z","bundle_sha256":"4e09cae024a1a38b7563ac3a6484d5a610f786e8d071af2ce51ae33534f0b538"}}