{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:54U5RVL6WBFRSGTIGTESFNCOFB","short_pith_number":"pith:54U5RVL6","canonical_record":{"source":{"id":"1811.09021","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"eess.AS","submitted_at":"2018-11-22T04:37:55Z","cross_cats_sorted":["cs.CL","cs.LG","cs.SD"],"title_canon_sha256":"8c55c558f2b563e588c6ba937c3c8375ab93ad0d604978c074f90c780a80524a","abstract_canon_sha256":"5976472841a98cc7c16d9e0063602ebb49ce4946b79467a0af865e5bd8ab42c7"},"schema_version":"1.0"},"canonical_sha256":"ef29d8d57eb04b191a6834c922b44e28451f51a25c7317059bd9f292feb33b87","source":{"kind":"arxiv","id":"1811.09021","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1811.09021","created_at":"2026-05-17T23:59:57Z"},{"alias_kind":"arxiv_version","alias_value":"1811.09021v1","created_at":"2026-05-17T23:59:57Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1811.09021","created_at":"2026-05-17T23:59:57Z"},{"alias_kind":"pith_short_12","alias_value":"54U5RVL6WBFR","created_at":"2026-05-18T12:32:05Z"},{"alias_kind":"pith_short_16","alias_value":"54U5RVL6WBFRSGTI","created_at":"2026-05-18T12:32:05Z"},{"alias_kind":"pith_short_8","alias_value":"54U5RVL6","created_at":"2026-05-18T12:32:05Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:54U5RVL6WBFRSGTIGTESFNCOFB","target":"record","payload":{"canonical_record":{"source":{"id":"1811.09021","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"eess.AS","submitted_at":"2018-11-22T04:37:55Z","cross_cats_sorted":["cs.CL","cs.LG","cs.SD"],"title_canon_sha256":"8c55c558f2b563e588c6ba937c3c8375ab93ad0d604978c074f90c780a80524a","abstract_canon_sha256":"5976472841a98cc7c16d9e0063602ebb49ce4946b79467a0af865e5bd8ab42c7"},"schema_version":"1.0"},"canonical_sha256":"ef29d8d57eb04b191a6834c922b44e28451f51a25c7317059bd9f292feb33b87","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:59:57.387375Z","signature_b64":"hNeEMknY/WZddLtu1mNyerVTas3ZHK3MtQ6vm+BxKeEJz2fXeUvGM6dTve+RR+jydxx2hTFCxqQ9x0EnTEqABA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"ef29d8d57eb04b191a6834c922b44e28451f51a25c7317059bd9f292feb33b87","last_reissued_at":"2026-05-17T23:59:57.386828Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:59:57.386828Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1811.09021","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:59:57Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"xP9rINaEYPLN3O6UH2sWl8dk9sFlkg2lvvPosew3dCc3FnrLekrtzY04qXoDhveXu4NhC4cU0f3LzXqBmPflBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T06:10:03.752096Z"},"content_sha256":"392dd76dcaab294b67d4bed662804d05ca88246113ac433dc95c6a7eb5fd9032","schema_version":"1.0","event_id":"sha256:392dd76dcaab294b67d4bed662804d05ca88246113ac433dc95c6a7eb5fd9032"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:54U5RVL6WBFRSGTIGTESFNCOFB","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Bytes are All You Need: End-to-End Multilingual Speech Recognition and Synthesis with Bytes","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL","cs.LG","cs.SD"],"primary_cat":"eess.AS","authors_text":"Bo Li, Tara Sainath, William Chan, Yonghui Wu, Yu Zhang","submitted_at":"2018-11-22T04:37:55Z","abstract_excerpt":"We present two end-to-end models: Audio-to-Byte (A2B) and Byte-to-Audio (B2A), for multilingual speech recognition and synthesis. Prior work has predominantly used characters, sub-words or words as the unit of choice to model text. These units are difficult to scale to languages with large vocabularies, particularly in the case of multilingual processing. In this work, we model text via a sequence of Unicode bytes, specifically, the UTF-8 variable length byte sequence for each character. Bytes allow us to avoid large softmaxes in languages with large vocabularies, and share representations in "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1811.09021","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:59:57Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"bQZyTzKQZc1y33cBWDFpTTUo42oJ2Y2YHC/f74P9YMwQW5J9jG/Jo5ESdJIEg+1xxM7hWHxhnlmnw95F0RirBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T06:10:03.752465Z"},"content_sha256":"b395393ebb8d6e859aa3997dc5323f57dbd50f0db246d6bfe3763db680df72ee","schema_version":"1.0","event_id":"sha256:b395393ebb8d6e859aa3997dc5323f57dbd50f0db246d6bfe3763db680df72ee"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/54U5RVL6WBFRSGTIGTESFNCOFB/bundle.json","state_url":"https://pith.science/pith/54U5RVL6WBFRSGTIGTESFNCOFB/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/54U5RVL6WBFRSGTIGTESFNCOFB/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-30T06:10:03Z","links":{"resolver":"https://pith.science/pith/54U5RVL6WBFRSGTIGTESFNCOFB","bundle":"https://pith.science/pith/54U5RVL6WBFRSGTIGTESFNCOFB/bundle.json","state":"https://pith.science/pith/54U5RVL6WBFRSGTIGTESFNCOFB/state.json","well_known_bundle":"https://pith.science/.well-known/pith/54U5RVL6WBFRSGTIGTESFNCOFB/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:54U5RVL6WBFRSGTIGTESFNCOFB","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"5976472841a98cc7c16d9e0063602ebb49ce4946b79467a0af865e5bd8ab42c7","cross_cats_sorted":["cs.CL","cs.LG","cs.SD"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"eess.AS","submitted_at":"2018-11-22T04:37:55Z","title_canon_sha256":"8c55c558f2b563e588c6ba937c3c8375ab93ad0d604978c074f90c780a80524a"},"schema_version":"1.0","source":{"id":"1811.09021","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1811.09021","created_at":"2026-05-17T23:59:57Z"},{"alias_kind":"arxiv_version","alias_value":"1811.09021v1","created_at":"2026-05-17T23:59:57Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1811.09021","created_at":"2026-05-17T23:59:57Z"},{"alias_kind":"pith_short_12","alias_value":"54U5RVL6WBFR","created_at":"2026-05-18T12:32:05Z"},{"alias_kind":"pith_short_16","alias_value":"54U5RVL6WBFRSGTI","created_at":"2026-05-18T12:32:05Z"},{"alias_kind":"pith_short_8","alias_value":"54U5RVL6","created_at":"2026-05-18T12:32:05Z"}],"graph_snapshots":[{"event_id":"sha256:b395393ebb8d6e859aa3997dc5323f57dbd50f0db246d6bfe3763db680df72ee","target":"graph","created_at":"2026-05-17T23:59:57Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We present two end-to-end models: Audio-to-Byte (A2B) and Byte-to-Audio (B2A), for multilingual speech recognition and synthesis. Prior work has predominantly used characters, sub-words or words as the unit of choice to model text. These units are difficult to scale to languages with large vocabularies, particularly in the case of multilingual processing. In this work, we model text via a sequence of Unicode bytes, specifically, the UTF-8 variable length byte sequence for each character. Bytes allow us to avoid large softmaxes in languages with large vocabularies, and share representations in ","authors_text":"Bo Li, Tara Sainath, William Chan, Yonghui Wu, Yu Zhang","cross_cats":["cs.CL","cs.LG","cs.SD"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"eess.AS","submitted_at":"2018-11-22T04:37:55Z","title":"Bytes are All You Need: End-to-End Multilingual Speech Recognition and Synthesis with Bytes"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1811.09021","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:392dd76dcaab294b67d4bed662804d05ca88246113ac433dc95c6a7eb5fd9032","target":"record","created_at":"2026-05-17T23:59:57Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"5976472841a98cc7c16d9e0063602ebb49ce4946b79467a0af865e5bd8ab42c7","cross_cats_sorted":["cs.CL","cs.LG","cs.SD"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"eess.AS","submitted_at":"2018-11-22T04:37:55Z","title_canon_sha256":"8c55c558f2b563e588c6ba937c3c8375ab93ad0d604978c074f90c780a80524a"},"schema_version":"1.0","source":{"id":"1811.09021","kind":"arxiv","version":1}},"canonical_sha256":"ef29d8d57eb04b191a6834c922b44e28451f51a25c7317059bd9f292feb33b87","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"ef29d8d57eb04b191a6834c922b44e28451f51a25c7317059bd9f292feb33b87","first_computed_at":"2026-05-17T23:59:57.386828Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:59:57.386828Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"hNeEMknY/WZddLtu1mNyerVTas3ZHK3MtQ6vm+BxKeEJz2fXeUvGM6dTve+RR+jydxx2hTFCxqQ9x0EnTEqABA==","signature_status":"signed_v1","signed_at":"2026-05-17T23:59:57.387375Z","signed_message":"canonical_sha256_bytes"},"source_id":"1811.09021","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:392dd76dcaab294b67d4bed662804d05ca88246113ac433dc95c6a7eb5fd9032","sha256:b395393ebb8d6e859aa3997dc5323f57dbd50f0db246d6bfe3763db680df72ee"],"state_sha256":"76bfca5b9c890bca668e566a5f288ff77c7cede12f4611f0bf613ec86706d6a0"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"C9ki8UXnkQYXkwcpiTcOqkgsqrBaPcqmpjai3pdfg0rcWJ/SUL5wRKt2j21zsPPhneTaOMfbGYcz/Q01LAObAw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-30T06:10:03.754460Z","bundle_sha256":"b736944455a2c342ab3db3845b5e8994c8f6f37a6dd365df7ea6fc6a58b74dbb"}}