{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2020:EIKSSXR2HKSXSJFF6I324W4DZ3","short_pith_number":"pith:EIKSSXR2","canonical_record":{"source":{"id":"2012.07061","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2020-12-13T13:38:58Z","cross_cats_sorted":[],"title_canon_sha256":"674591b2de35565f2428d9c45ba0a855e8b393544a3727e7eec725c281d2bf28","abstract_canon_sha256":"3a9ebf1fde683545ed1387e12b19ddf28471db7645ef24f47f2df9ece142ef6f"},"schema_version":"1.0"},"canonical_sha256":"2215295e3a3aa57924a5f237ae5b83cec92483429578dba34944fc62494a4be3","source":{"kind":"arxiv","id":"2012.07061","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2012.07061","created_at":"2026-07-05T01:59:16Z"},{"alias_kind":"arxiv_version","alias_value":"2012.07061v1","created_at":"2026-07-05T01:59:16Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2012.07061","created_at":"2026-07-05T01:59:16Z"},{"alias_kind":"pith_short_12","alias_value":"EIKSSXR2HKSX","created_at":"2026-07-05T01:59:16Z"},{"alias_kind":"pith_short_16","alias_value":"EIKSSXR2HKSXSJFF","created_at":"2026-07-05T01:59:16Z"},{"alias_kind":"pith_short_8","alias_value":"EIKSSXR2","created_at":"2026-07-05T01:59:16Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2020:EIKSSXR2HKSXSJFF6I324W4DZ3","target":"record","payload":{"canonical_record":{"source":{"id":"2012.07061","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2020-12-13T13:38:58Z","cross_cats_sorted":[],"title_canon_sha256":"674591b2de35565f2428d9c45ba0a855e8b393544a3727e7eec725c281d2bf28","abstract_canon_sha256":"3a9ebf1fde683545ed1387e12b19ddf28471db7645ef24f47f2df9ece142ef6f"},"schema_version":"1.0"},"canonical_sha256":"2215295e3a3aa57924a5f237ae5b83cec92483429578dba34944fc62494a4be3","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-05T01:59:16.875520Z","signature_b64":"n2v2F8mIcQjzPiYtiJj12E2/01fCujcfHV0zRsjMqesnrbH11ZVpB/YYJ0IxfbevFaO0zclg6nTnOrT8GxnyCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"2215295e3a3aa57924a5f237ae5b83cec92483429578dba34944fc62494a4be3","last_reissued_at":"2026-07-05T01:59:16.875072Z","signature_status":"signed_v1","first_computed_at":"2026-07-05T01:59:16.875072Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2012.07061","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-07-05T01:59:16Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"PDnmouym0VvJzluzJJXMCO4IDR6dMOthsYqvt5UQ6n20nuzAUmvKcecqti6SIT9MtxY8TafyVUZNNxhF50P2Cg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-05T09:08:03.223693Z"},"content_sha256":"7ca7539e546ab24360a1a112174244fa6bdb0f35d6d19f3347927e136b0f7ab3","schema_version":"1.0","event_id":"sha256:7ca7539e546ab24360a1a112174244fa6bdb0f35d6d19f3347927e136b0f7ab3"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2020:EIKSSXR2HKSXSJFF6I324W4DZ3","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Improving Image Captioning by Leveraging Intra- and Inter-layer Global Representation in Transformer Network","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Fuhai Chen, Gen Luo, Jiayi Ji, Rongrong Ji, Xiaoshuai Sun, Yongjian Wu, Yue Gao, Yunpeng Luo","submitted_at":"2020-12-13T13:38:58Z","abstract_excerpt":"Transformer-based architectures have shown great success in image captioning, where object regions are encoded and then attended into the vectorial representations to guide the caption decoding. However, such vectorial representations only contain region-level information without considering the global information reflecting the entire image, which fails to expand the capability of complex multi-modal reasoning in image captioning. In this paper, we introduce a Global Enhanced Transformer (termed GET) to enable the extraction of a more comprehensive global representation, and then adaptively g"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2012.07061","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2012.07061/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-07-05T01:59:16Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"DHbrqEbmQZ3NulopeNcvXU99m1UxB7EfArfGlOb3HPUdmJgofrJ3GMEhSk8gvqSJ8u0U6Qo3W4XFXqtIcQzEDQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-05T09:08:03.224069Z"},"content_sha256":"890f879222149551e0565e2ce3224d386a0c195a2111a47aeeb89965b6ec1c9c","schema_version":"1.0","event_id":"sha256:890f879222149551e0565e2ce3224d386a0c195a2111a47aeeb89965b6ec1c9c"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/EIKSSXR2HKSXSJFF6I324W4DZ3/bundle.json","state_url":"https://pith.science/pith/EIKSSXR2HKSXSJFF6I324W4DZ3/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/EIKSSXR2HKSXSJFF6I324W4DZ3/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-07-05T09:08:03Z","links":{"resolver":"https://pith.science/pith/EIKSSXR2HKSXSJFF6I324W4DZ3","bundle":"https://pith.science/pith/EIKSSXR2HKSXSJFF6I324W4DZ3/bundle.json","state":"https://pith.science/pith/EIKSSXR2HKSXSJFF6I324W4DZ3/state.json","well_known_bundle":"https://pith.science/.well-known/pith/EIKSSXR2HKSXSJFF6I324W4DZ3/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2020:EIKSSXR2HKSXSJFF6I324W4DZ3","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"3a9ebf1fde683545ed1387e12b19ddf28471db7645ef24f47f2df9ece142ef6f","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2020-12-13T13:38:58Z","title_canon_sha256":"674591b2de35565f2428d9c45ba0a855e8b393544a3727e7eec725c281d2bf28"},"schema_version":"1.0","source":{"id":"2012.07061","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2012.07061","created_at":"2026-07-05T01:59:16Z"},{"alias_kind":"arxiv_version","alias_value":"2012.07061v1","created_at":"2026-07-05T01:59:16Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2012.07061","created_at":"2026-07-05T01:59:16Z"},{"alias_kind":"pith_short_12","alias_value":"EIKSSXR2HKSX","created_at":"2026-07-05T01:59:16Z"},{"alias_kind":"pith_short_16","alias_value":"EIKSSXR2HKSXSJFF","created_at":"2026-07-05T01:59:16Z"},{"alias_kind":"pith_short_8","alias_value":"EIKSSXR2","created_at":"2026-07-05T01:59:16Z"}],"graph_snapshots":[{"event_id":"sha256:890f879222149551e0565e2ce3224d386a0c195a2111a47aeeb89965b6ec1c9c","target":"graph","created_at":"2026-07-05T01:59:16Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2012.07061/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Transformer-based architectures have shown great success in image captioning, where object regions are encoded and then attended into the vectorial representations to guide the caption decoding. However, such vectorial representations only contain region-level information without considering the global information reflecting the entire image, which fails to expand the capability of complex multi-modal reasoning in image captioning. In this paper, we introduce a Global Enhanced Transformer (termed GET) to enable the extraction of a more comprehensive global representation, and then adaptively g","authors_text":"Fuhai Chen, Gen Luo, Jiayi Ji, Rongrong Ji, Xiaoshuai Sun, Yongjian Wu, Yue Gao, Yunpeng Luo","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2020-12-13T13:38:58Z","title":"Improving Image Captioning by Leveraging Intra- and Inter-layer Global Representation in Transformer Network"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2012.07061","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:7ca7539e546ab24360a1a112174244fa6bdb0f35d6d19f3347927e136b0f7ab3","target":"record","created_at":"2026-07-05T01:59:16Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"3a9ebf1fde683545ed1387e12b19ddf28471db7645ef24f47f2df9ece142ef6f","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2020-12-13T13:38:58Z","title_canon_sha256":"674591b2de35565f2428d9c45ba0a855e8b393544a3727e7eec725c281d2bf28"},"schema_version":"1.0","source":{"id":"2012.07061","kind":"arxiv","version":1}},"canonical_sha256":"2215295e3a3aa57924a5f237ae5b83cec92483429578dba34944fc62494a4be3","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"2215295e3a3aa57924a5f237ae5b83cec92483429578dba34944fc62494a4be3","first_computed_at":"2026-07-05T01:59:16.875072Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-07-05T01:59:16.875072Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"n2v2F8mIcQjzPiYtiJj12E2/01fCujcfHV0zRsjMqesnrbH11ZVpB/YYJ0IxfbevFaO0zclg6nTnOrT8GxnyCg==","signature_status":"signed_v1","signed_at":"2026-07-05T01:59:16.875520Z","signed_message":"canonical_sha256_bytes"},"source_id":"2012.07061","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:7ca7539e546ab24360a1a112174244fa6bdb0f35d6d19f3347927e136b0f7ab3","sha256:890f879222149551e0565e2ce3224d386a0c195a2111a47aeeb89965b6ec1c9c"],"state_sha256":"3d7ec7bdd2d289c744fc1385fb41a633bf38ba3bcf95056ed8fc1430d8564539"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"IDIlM7YZKh09fGdA+nvD3Wi/zJpIW7zOcdXzrXRRlLTDWuvfOo3ZKvXCLgcZZi2nHY40NmNJw37h6CKsOC4ZCQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-07-05T09:08:03.226125Z","bundle_sha256":"82f050033e2e46c5d047818aeb8a81494c77c2609a7aafa145c69ae48f7bf0a8"}}