{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:FMDGLBOPJP5MS3CXYU6Q2SPD6G","short_pith_number":"pith:FMDGLBOP","canonical_record":{"source":{"id":"1702.05658","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2017-02-18T21:35:06Z","cross_cats_sorted":[],"title_canon_sha256":"dfb218df281974dda68d9439f672e608e9d52b72ba0928012e34bb87fb16938a","abstract_canon_sha256":"af064ec8c942d79a508df2c76df3a28ccc0c63ccd2c72a6060c99635aace8aed"},"schema_version":"1.0"},"canonical_sha256":"2b066585cf4bfac96c57c53d0d49e3f180511f0b45c4b337b8744c523bc18e1f","source":{"kind":"arxiv","id":"1702.05658","version":3},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1702.05658","created_at":"2026-05-18T00:38:18Z"},{"alias_kind":"arxiv_version","alias_value":"1702.05658v3","created_at":"2026-05-18T00:38:18Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1702.05658","created_at":"2026-05-18T00:38:18Z"},{"alias_kind":"pith_short_12","alias_value":"FMDGLBOPJP5M","created_at":"2026-05-18T12:31:15Z"},{"alias_kind":"pith_short_16","alias_value":"FMDGLBOPJP5MS3CX","created_at":"2026-05-18T12:31:15Z"},{"alias_kind":"pith_short_8","alias_value":"FMDGLBOP","created_at":"2026-05-18T12:31:15Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:FMDGLBOPJP5MS3CXYU6Q2SPD6G","target":"record","payload":{"canonical_record":{"source":{"id":"1702.05658","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2017-02-18T21:35:06Z","cross_cats_sorted":[],"title_canon_sha256":"dfb218df281974dda68d9439f672e608e9d52b72ba0928012e34bb87fb16938a","abstract_canon_sha256":"af064ec8c942d79a508df2c76df3a28ccc0c63ccd2c72a6060c99635aace8aed"},"schema_version":"1.0"},"canonical_sha256":"2b066585cf4bfac96c57c53d0d49e3f180511f0b45c4b337b8744c523bc18e1f","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:38:18.259767Z","signature_b64":"i6oxjZTwL5TqORrEDGR/vnQ3sRSmowkBc+KnOWBVQ9eyIsGMPUht7yRBcm45L5Yf3eDiOxzWni3B+FiOqpJRDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"2b066585cf4bfac96c57c53d0d49e3f180511f0b45c4b337b8744c523bc18e1f","last_reissued_at":"2026-05-18T00:38:18.259042Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:38:18.259042Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1702.05658","source_version":3,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:38:18Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"w1yg+9M8MDAeKiDCehBJ5I5Rqw81qb6j44Uvo/RSlykysjkPVuL3ZHLPLmMDbt9aWpHq490H3xijHaEg0wdlAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T06:31:46.862902Z"},"content_sha256":"67a5fae137c32d6704f830e2840054a3df673b9fb0fecbd7f96de3d5e7ed0a0a","schema_version":"1.0","event_id":"sha256:67a5fae137c32d6704f830e2840054a3df673b9fb0fecbd7f96de3d5e7ed0a0a"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:FMDGLBOPJP5MS3CXYU6Q2SPD6G","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"MAT: A Multimodal Attentive Translator for Image Captioning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Alan Yuille, Changhu Wang, Chang Liu, Feng Wang, Fuchun Sun","submitted_at":"2017-02-18T21:35:06Z","abstract_excerpt":"In this work we formulate the problem of image captioning as a multimodal translation task. Analogous to machine translation, we present a sequence-to-sequence recurrent neural networks (RNN) model for image caption generation. Different from most existing work where the whole image is represented by convolutional neural network (CNN) feature, we propose to represent the input image as a sequence of detected objects which feeds as the source sequence of the RNN model. In this way, the sequential representation of an image can be naturally translated to a sequence of words, as the target sequen"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1702.05658","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:38:18Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"fnu9fJV2hNUoYws4Q9VYazWkLKYhgwWEVlgEu6b9SONUO+/mb3nKvglWU0JecB3UolRsxglTAKqErpE6kbXRBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T06:31:46.863542Z"},"content_sha256":"eba2f04c7b0cf5e41b5c9ad364c716d157695b26e478519686246d4dd6b797be","schema_version":"1.0","event_id":"sha256:eba2f04c7b0cf5e41b5c9ad364c716d157695b26e478519686246d4dd6b797be"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/FMDGLBOPJP5MS3CXYU6Q2SPD6G/bundle.json","state_url":"https://pith.science/pith/FMDGLBOPJP5MS3CXYU6Q2SPD6G/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/FMDGLBOPJP5MS3CXYU6Q2SPD6G/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-27T06:31:46Z","links":{"resolver":"https://pith.science/pith/FMDGLBOPJP5MS3CXYU6Q2SPD6G","bundle":"https://pith.science/pith/FMDGLBOPJP5MS3CXYU6Q2SPD6G/bundle.json","state":"https://pith.science/pith/FMDGLBOPJP5MS3CXYU6Q2SPD6G/state.json","well_known_bundle":"https://pith.science/.well-known/pith/FMDGLBOPJP5MS3CXYU6Q2SPD6G/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:FMDGLBOPJP5MS3CXYU6Q2SPD6G","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"af064ec8c942d79a508df2c76df3a28ccc0c63ccd2c72a6060c99635aace8aed","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2017-02-18T21:35:06Z","title_canon_sha256":"dfb218df281974dda68d9439f672e608e9d52b72ba0928012e34bb87fb16938a"},"schema_version":"1.0","source":{"id":"1702.05658","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1702.05658","created_at":"2026-05-18T00:38:18Z"},{"alias_kind":"arxiv_version","alias_value":"1702.05658v3","created_at":"2026-05-18T00:38:18Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1702.05658","created_at":"2026-05-18T00:38:18Z"},{"alias_kind":"pith_short_12","alias_value":"FMDGLBOPJP5M","created_at":"2026-05-18T12:31:15Z"},{"alias_kind":"pith_short_16","alias_value":"FMDGLBOPJP5MS3CX","created_at":"2026-05-18T12:31:15Z"},{"alias_kind":"pith_short_8","alias_value":"FMDGLBOP","created_at":"2026-05-18T12:31:15Z"}],"graph_snapshots":[{"event_id":"sha256:eba2f04c7b0cf5e41b5c9ad364c716d157695b26e478519686246d4dd6b797be","target":"graph","created_at":"2026-05-18T00:38:18Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"In this work we formulate the problem of image captioning as a multimodal translation task. Analogous to machine translation, we present a sequence-to-sequence recurrent neural networks (RNN) model for image caption generation. Different from most existing work where the whole image is represented by convolutional neural network (CNN) feature, we propose to represent the input image as a sequence of detected objects which feeds as the source sequence of the RNN model. In this way, the sequential representation of an image can be naturally translated to a sequence of words, as the target sequen","authors_text":"Alan Yuille, Changhu Wang, Chang Liu, Feng Wang, Fuchun Sun","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2017-02-18T21:35:06Z","title":"MAT: A Multimodal Attentive Translator for Image Captioning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1702.05658","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:67a5fae137c32d6704f830e2840054a3df673b9fb0fecbd7f96de3d5e7ed0a0a","target":"record","created_at":"2026-05-18T00:38:18Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"af064ec8c942d79a508df2c76df3a28ccc0c63ccd2c72a6060c99635aace8aed","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2017-02-18T21:35:06Z","title_canon_sha256":"dfb218df281974dda68d9439f672e608e9d52b72ba0928012e34bb87fb16938a"},"schema_version":"1.0","source":{"id":"1702.05658","kind":"arxiv","version":3}},"canonical_sha256":"2b066585cf4bfac96c57c53d0d49e3f180511f0b45c4b337b8744c523bc18e1f","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"2b066585cf4bfac96c57c53d0d49e3f180511f0b45c4b337b8744c523bc18e1f","first_computed_at":"2026-05-18T00:38:18.259042Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:38:18.259042Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"i6oxjZTwL5TqORrEDGR/vnQ3sRSmowkBc+KnOWBVQ9eyIsGMPUht7yRBcm45L5Yf3eDiOxzWni3B+FiOqpJRDQ==","signature_status":"signed_v1","signed_at":"2026-05-18T00:38:18.259767Z","signed_message":"canonical_sha256_bytes"},"source_id":"1702.05658","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:67a5fae137c32d6704f830e2840054a3df673b9fb0fecbd7f96de3d5e7ed0a0a","sha256:eba2f04c7b0cf5e41b5c9ad364c716d157695b26e478519686246d4dd6b797be"],"state_sha256":"1f4884c95536dd38a74c684b23af048011aaf9c2874b510c69ed8d63ca5da3bf"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"MlFTqGGgEOWayHqYsdCpDcep/lr5OVy45KQUw8asiNz4PDQlQuCVEK8RPSciLRiSigrAYRBRNHAQwpXYYpFODw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-27T06:31:46.866904Z","bundle_sha256":"2ec6b40dd6f940bf3fea712bd567e7231693b80fee8c5e2e961e3c92dae4c30e"}}