{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:VUEA2TJGUY45FJCCD3NC4MKYU2","short_pith_number":"pith:VUEA2TJG","canonical_record":{"source":{"id":"1906.01787","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2019-06-05T02:24:12Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"e0ce9bc302885d5a1bb8ce6b46c559a9811d61d181823bd0185713abdf7617c2","abstract_canon_sha256":"3401233289da564ff6627bb43e1c90d330ea421c66ac33a73dc5fce092fbeeb2"},"schema_version":"1.0"},"canonical_sha256":"ad080d4d26a639d2a4421eda2e3158a6ba7d6202c9c3d2cb70a3e75f31edad41","source":{"kind":"arxiv","id":"1906.01787","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1906.01787","created_at":"2026-05-17T23:44:06Z"},{"alias_kind":"arxiv_version","alias_value":"1906.01787v1","created_at":"2026-05-17T23:44:06Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1906.01787","created_at":"2026-05-17T23:44:06Z"},{"alias_kind":"pith_short_12","alias_value":"VUEA2TJGUY45","created_at":"2026-05-18T12:33:30Z"},{"alias_kind":"pith_short_16","alias_value":"VUEA2TJGUY45FJCC","created_at":"2026-05-18T12:33:30Z"},{"alias_kind":"pith_short_8","alias_value":"VUEA2TJG","created_at":"2026-05-18T12:33:30Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:VUEA2TJGUY45FJCCD3NC4MKYU2","target":"record","payload":{"canonical_record":{"source":{"id":"1906.01787","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2019-06-05T02:24:12Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"e0ce9bc302885d5a1bb8ce6b46c559a9811d61d181823bd0185713abdf7617c2","abstract_canon_sha256":"3401233289da564ff6627bb43e1c90d330ea421c66ac33a73dc5fce092fbeeb2"},"schema_version":"1.0"},"canonical_sha256":"ad080d4d26a639d2a4421eda2e3158a6ba7d6202c9c3d2cb70a3e75f31edad41","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:44:06.233808Z","signature_b64":"tCDKva626BLkXoppT7SCI1A3zec6h4scFBVF6Y4LEnaZh4E9ha5KrWIp00Cb6+XdD/T/3nodcbPu0+Pm9gfqBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"ad080d4d26a639d2a4421eda2e3158a6ba7d6202c9c3d2cb70a3e75f31edad41","last_reissued_at":"2026-05-17T23:44:06.233179Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:44:06.233179Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1906.01787","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:44:06Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"3sj9Zl0pHOqsXW3BSVpfa7NIziNnAQFtCXgrujSSxYUoVBhFpiWWjAWKE2+TlSKMH/Q5r6QAM5o5HtWxrUIXBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-22T02:00:12.530562Z"},"content_sha256":"02cb8cd08099343720f2638111337ed7a1661606a03cd9160252ed671c9c7d9b","schema_version":"1.0","event_id":"sha256:02cb8cd08099343720f2638111337ed7a1661606a03cd9160252ed671c9c7d9b"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:VUEA2TJGUY45FJCCD3NC4MKYU2","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Learning Deep Transformer Models for Machine Translation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.CL","authors_text":"Bei Li, Changliang Li, Derek F. Wong, Jingbo Zhu, Lidia S. Chao, Qiang Wang, Tong Xiao","submitted_at":"2019-06-05T02:24:12Z","abstract_excerpt":"Transformer is the state-of-the-art model in recent machine translation evaluations. Two strands of research are promising to improve models of this kind: the first uses wide networks (a.k.a. Transformer-Big) and has been the de facto standard for the development of the Transformer system, and the other uses deeper language representation but faces the difficulty arising from learning deep networks. Here, we continue the line of research on the latter. We claim that a truly deep Transformer model can surpass the Transformer-Big counterpart by 1) proper use of layer normalization and 2) a novel"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1906.01787","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:44:06Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"+rg7iUzTMp48OMhBAissFzOZETPyiJBZLY3ZrB4NgGAF7s0peZM3JHEaK+1ENBUpchZhmAd53FlhOE24WRf4Dg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-22T02:00:12.530923Z"},"content_sha256":"0c7c74d5c70619f65d9b01d0d5c9d66bdfe7645c338f9c20b4b2eaea8c282b24","schema_version":"1.0","event_id":"sha256:0c7c74d5c70619f65d9b01d0d5c9d66bdfe7645c338f9c20b4b2eaea8c282b24"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/VUEA2TJGUY45FJCCD3NC4MKYU2/bundle.json","state_url":"https://pith.science/pith/VUEA2TJGUY45FJCCD3NC4MKYU2/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/VUEA2TJGUY45FJCCD3NC4MKYU2/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-22T02:00:12Z","links":{"resolver":"https://pith.science/pith/VUEA2TJGUY45FJCCD3NC4MKYU2","bundle":"https://pith.science/pith/VUEA2TJGUY45FJCCD3NC4MKYU2/bundle.json","state":"https://pith.science/pith/VUEA2TJGUY45FJCCD3NC4MKYU2/state.json","well_known_bundle":"https://pith.science/.well-known/pith/VUEA2TJGUY45FJCCD3NC4MKYU2/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:VUEA2TJGUY45FJCCD3NC4MKYU2","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"3401233289da564ff6627bb43e1c90d330ea421c66ac33a73dc5fce092fbeeb2","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2019-06-05T02:24:12Z","title_canon_sha256":"e0ce9bc302885d5a1bb8ce6b46c559a9811d61d181823bd0185713abdf7617c2"},"schema_version":"1.0","source":{"id":"1906.01787","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1906.01787","created_at":"2026-05-17T23:44:06Z"},{"alias_kind":"arxiv_version","alias_value":"1906.01787v1","created_at":"2026-05-17T23:44:06Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1906.01787","created_at":"2026-05-17T23:44:06Z"},{"alias_kind":"pith_short_12","alias_value":"VUEA2TJGUY45","created_at":"2026-05-18T12:33:30Z"},{"alias_kind":"pith_short_16","alias_value":"VUEA2TJGUY45FJCC","created_at":"2026-05-18T12:33:30Z"},{"alias_kind":"pith_short_8","alias_value":"VUEA2TJG","created_at":"2026-05-18T12:33:30Z"}],"graph_snapshots":[{"event_id":"sha256:0c7c74d5c70619f65d9b01d0d5c9d66bdfe7645c338f9c20b4b2eaea8c282b24","target":"graph","created_at":"2026-05-17T23:44:06Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Transformer is the state-of-the-art model in recent machine translation evaluations. Two strands of research are promising to improve models of this kind: the first uses wide networks (a.k.a. Transformer-Big) and has been the de facto standard for the development of the Transformer system, and the other uses deeper language representation but faces the difficulty arising from learning deep networks. Here, we continue the line of research on the latter. We claim that a truly deep Transformer model can surpass the Transformer-Big counterpart by 1) proper use of layer normalization and 2) a novel","authors_text":"Bei Li, Changliang Li, Derek F. Wong, Jingbo Zhu, Lidia S. Chao, Qiang Wang, Tong Xiao","cross_cats":["cs.LG"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2019-06-05T02:24:12Z","title":"Learning Deep Transformer Models for Machine Translation"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1906.01787","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:02cb8cd08099343720f2638111337ed7a1661606a03cd9160252ed671c9c7d9b","target":"record","created_at":"2026-05-17T23:44:06Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"3401233289da564ff6627bb43e1c90d330ea421c66ac33a73dc5fce092fbeeb2","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2019-06-05T02:24:12Z","title_canon_sha256":"e0ce9bc302885d5a1bb8ce6b46c559a9811d61d181823bd0185713abdf7617c2"},"schema_version":"1.0","source":{"id":"1906.01787","kind":"arxiv","version":1}},"canonical_sha256":"ad080d4d26a639d2a4421eda2e3158a6ba7d6202c9c3d2cb70a3e75f31edad41","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"ad080d4d26a639d2a4421eda2e3158a6ba7d6202c9c3d2cb70a3e75f31edad41","first_computed_at":"2026-05-17T23:44:06.233179Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:44:06.233179Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"tCDKva626BLkXoppT7SCI1A3zec6h4scFBVF6Y4LEnaZh4E9ha5KrWIp00Cb6+XdD/T/3nodcbPu0+Pm9gfqBA==","signature_status":"signed_v1","signed_at":"2026-05-17T23:44:06.233808Z","signed_message":"canonical_sha256_bytes"},"source_id":"1906.01787","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:02cb8cd08099343720f2638111337ed7a1661606a03cd9160252ed671c9c7d9b","sha256:0c7c74d5c70619f65d9b01d0d5c9d66bdfe7645c338f9c20b4b2eaea8c282b24"],"state_sha256":"ce8b716150ecc9f4f5ac146c7254b08ae64e8ad72943bcd626ee68843d0c3646"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"XjEPqHn54DPX96ljYPUo2NxBvCPgeDbo1Ztp701dRBkwda72hjyQXD0VVaoeG3OMwbWrpmH1j08TajzdiiUgDQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-22T02:00:12.533924Z","bundle_sha256":"0a7f8e93c1fc86d987590cd58b15c22ec6964c750578d3ae04db39acd62906a3"}}