{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2019:NDYZD6C2PIOF3R3BFWUNAIUPNT","short_pith_number":"pith:NDYZD6C2","schema_version":"1.0","canonical_sha256":"68f191f85a7a1c5dc7612da8d0228f6cfad7bd16d308ca60cd315d8c6174ae7f","source":{"kind":"arxiv","id":"1906.07307","version":1},"attestation_state":"computed","paper":{"title":"Towards Transfer Learning for End-to-End Speech Synthesis from Deep Pre-Trained Language Models","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.SD","eess.AS"],"primary_cat":"cs.CL","authors_text":"James Glass, Wei Fang, Yu-An Chung","submitted_at":"2019-06-17T23:48:05Z","abstract_excerpt":"Modern text-to-speech (TTS) systems are able to generate audio that sounds almost as natural as human speech. However, the bar of developing high-quality TTS systems remains high since a sizable set of studio-quality <text, audio> pairs is usually required. Compared to commercial data used to develop state-of-the-art systems, publicly available data are usually worse in terms of both quality and size. Audio generated by TTS systems trained on publicly available data tends to not only sound less natural, but also exhibits more background noise. In this work, we aim to lower TTS systems' relianc"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1906.07307","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2019-06-17T23:48:05Z","cross_cats_sorted":["cs.SD","eess.AS"],"title_canon_sha256":"2f7bfb717a7278393020b2a91c6d12ae2cf7a2e3cdebecfd0329dad7ae87345d","abstract_canon_sha256":"c30a486a629c5fa206a851f85e80a269b4cd11e470033485b845160dd9586bed"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:43:08.247613Z","signature_b64":"AE+tA65xlvKv/xG3nq2COWL+MDQj1S8z4bWG3ATb3XxG5QzJoOjoxm/nbEBbdxSCa9gUlDXpHxJpi4MW0eR2CA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"68f191f85a7a1c5dc7612da8d0228f6cfad7bd16d308ca60cd315d8c6174ae7f","last_reissued_at":"2026-05-17T23:43:08.246992Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:43:08.246992Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Towards Transfer Learning for End-to-End Speech Synthesis from Deep Pre-Trained Language Models","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.SD","eess.AS"],"primary_cat":"cs.CL","authors_text":"James Glass, Wei Fang, Yu-An Chung","submitted_at":"2019-06-17T23:48:05Z","abstract_excerpt":"Modern text-to-speech (TTS) systems are able to generate audio that sounds almost as natural as human speech. However, the bar of developing high-quality TTS systems remains high since a sizable set of studio-quality <text, audio> pairs is usually required. Compared to commercial data used to develop state-of-the-art systems, publicly available data are usually worse in terms of both quality and size. Audio generated by TTS systems trained on publicly available data tends to not only sound less natural, but also exhibits more background noise. In this work, we aim to lower TTS systems' relianc"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1906.07307","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1906.07307","created_at":"2026-05-17T23:43:08.247081+00:00"},{"alias_kind":"arxiv_version","alias_value":"1906.07307v1","created_at":"2026-05-17T23:43:08.247081+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1906.07307","created_at":"2026-05-17T23:43:08.247081+00:00"},{"alias_kind":"pith_short_12","alias_value":"NDYZD6C2PIOF","created_at":"2026-05-18T12:33:24.271573+00:00"},{"alias_kind":"pith_short_16","alias_value":"NDYZD6C2PIOF3R3B","created_at":"2026-05-18T12:33:24.271573+00:00"},{"alias_kind":"pith_short_8","alias_value":"NDYZD6C2","created_at":"2026-05-18T12:33:24.271573+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/NDYZD6C2PIOF3R3BFWUNAIUPNT","json":"https://pith.science/pith/NDYZD6C2PIOF3R3BFWUNAIUPNT.json","graph_json":"https://pith.science/api/pith-number/NDYZD6C2PIOF3R3BFWUNAIUPNT/graph.json","events_json":"https://pith.science/api/pith-number/NDYZD6C2PIOF3R3BFWUNAIUPNT/events.json","paper":"https://pith.science/paper/NDYZD6C2"},"agent_actions":{"view_html":"https://pith.science/pith/NDYZD6C2PIOF3R3BFWUNAIUPNT","download_json":"https://pith.science/pith/NDYZD6C2PIOF3R3BFWUNAIUPNT.json","view_paper":"https://pith.science/paper/NDYZD6C2","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1906.07307&json=true","fetch_graph":"https://pith.science/api/pith-number/NDYZD6C2PIOF3R3BFWUNAIUPNT/graph.json","fetch_events":"https://pith.science/api/pith-number/NDYZD6C2PIOF3R3BFWUNAIUPNT/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/NDYZD6C2PIOF3R3BFWUNAIUPNT/action/timestamp_anchor","attest_storage":"https://pith.science/pith/NDYZD6C2PIOF3R3BFWUNAIUPNT/action/storage_attestation","attest_author":"https://pith.science/pith/NDYZD6C2PIOF3R3BFWUNAIUPNT/action/author_attestation","sign_citation":"https://pith.science/pith/NDYZD6C2PIOF3R3BFWUNAIUPNT/action/citation_signature","submit_replication":"https://pith.science/pith/NDYZD6C2PIOF3R3BFWUNAIUPNT/action/replication_record"}},"created_at":"2026-05-17T23:43:08.247081+00:00","updated_at":"2026-05-17T23:43:08.247081+00:00"}