{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:N4CKBXFLXWO65SMXRLQQRFQIHU","short_pith_number":"pith:N4CKBXFL","schema_version":"1.0","canonical_sha256":"6f04a0dcabbd9deec9978ae10896083d080e37f9ebf63e92b58c745d28d9b5bc","source":{"kind":"arxiv","id":"1807.11679","version":1},"attestation_state":"computed","paper":{"title":"Wasserstein GAN and Waveform Loss-based Acoustic Model Training for Multi-speaker Text-to-Speech Synthesis Systems Using a WaveNet Vocoder","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL","cs.SD","stat.ML"],"primary_cat":"eess.AS","authors_text":"Daisuke Saito, Hieu-Thi Luong, Junichi Yamagishi, Nobuaki Minematsu, Shinji Takaki, Yi Zhao","submitted_at":"2018-07-31T06:38:54Z","abstract_excerpt":"Recent neural networks such as WaveNet and sampleRNN that learn directly from speech waveform samples have achieved very high-quality synthetic speech in terms of both naturalness and speaker similarity even in multi-speaker text-to-speech synthesis systems. Such neural networks are being used as an alternative to vocoders and hence they are often called neural vocoders. The neural vocoder uses acoustic features as local condition parameters, and these parameters need to be accurately predicted by another acoustic model. However, it is not yet clear how to train this acoustic model, which is p"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1807.11679","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"eess.AS","submitted_at":"2018-07-31T06:38:54Z","cross_cats_sorted":["cs.CL","cs.SD","stat.ML"],"title_canon_sha256":"93040039a380d1f91a15c22375a75154a26d679aece8df8fceba58addeb09e47","abstract_canon_sha256":"3a805b434a1d36441e96dff7ed8ef4616f67ab3e40bf2fb76241da96dc5e5a62"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:09:21.431928Z","signature_b64":"HM7519qaxUWSFGSlnepPANsCzw5kX4Abz5CCiYGjw0X4k4a1XzzhUjeGpoZJZQV3kPqYXZSzNn71tBtyLWPNAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"6f04a0dcabbd9deec9978ae10896083d080e37f9ebf63e92b58c745d28d9b5bc","last_reissued_at":"2026-05-18T00:09:21.431439Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:09:21.431439Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Wasserstein GAN and Waveform Loss-based Acoustic Model Training for Multi-speaker Text-to-Speech Synthesis Systems Using a WaveNet Vocoder","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL","cs.SD","stat.ML"],"primary_cat":"eess.AS","authors_text":"Daisuke Saito, Hieu-Thi Luong, Junichi Yamagishi, Nobuaki Minematsu, Shinji Takaki, Yi Zhao","submitted_at":"2018-07-31T06:38:54Z","abstract_excerpt":"Recent neural networks such as WaveNet and sampleRNN that learn directly from speech waveform samples have achieved very high-quality synthetic speech in terms of both naturalness and speaker similarity even in multi-speaker text-to-speech synthesis systems. Such neural networks are being used as an alternative to vocoders and hence they are often called neural vocoders. The neural vocoder uses acoustic features as local condition parameters, and these parameters need to be accurately predicted by another acoustic model. However, it is not yet clear how to train this acoustic model, which is p"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1807.11679","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1807.11679","created_at":"2026-05-18T00:09:21.431516+00:00"},{"alias_kind":"arxiv_version","alias_value":"1807.11679v1","created_at":"2026-05-18T00:09:21.431516+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1807.11679","created_at":"2026-05-18T00:09:21.431516+00:00"},{"alias_kind":"pith_short_12","alias_value":"N4CKBXFLXWO6","created_at":"2026-05-18T12:32:40.477152+00:00"},{"alias_kind":"pith_short_16","alias_value":"N4CKBXFLXWO65SMX","created_at":"2026-05-18T12:32:40.477152+00:00"},{"alias_kind":"pith_short_8","alias_value":"N4CKBXFL","created_at":"2026-05-18T12:32:40.477152+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/N4CKBXFLXWO65SMXRLQQRFQIHU","json":"https://pith.science/pith/N4CKBXFLXWO65SMXRLQQRFQIHU.json","graph_json":"https://pith.science/api/pith-number/N4CKBXFLXWO65SMXRLQQRFQIHU/graph.json","events_json":"https://pith.science/api/pith-number/N4CKBXFLXWO65SMXRLQQRFQIHU/events.json","paper":"https://pith.science/paper/N4CKBXFL"},"agent_actions":{"view_html":"https://pith.science/pith/N4CKBXFLXWO65SMXRLQQRFQIHU","download_json":"https://pith.science/pith/N4CKBXFLXWO65SMXRLQQRFQIHU.json","view_paper":"https://pith.science/paper/N4CKBXFL","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1807.11679&json=true","fetch_graph":"https://pith.science/api/pith-number/N4CKBXFLXWO65SMXRLQQRFQIHU/graph.json","fetch_events":"https://pith.science/api/pith-number/N4CKBXFLXWO65SMXRLQQRFQIHU/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/N4CKBXFLXWO65SMXRLQQRFQIHU/action/timestamp_anchor","attest_storage":"https://pith.science/pith/N4CKBXFLXWO65SMXRLQQRFQIHU/action/storage_attestation","attest_author":"https://pith.science/pith/N4CKBXFLXWO65SMXRLQQRFQIHU/action/author_attestation","sign_citation":"https://pith.science/pith/N4CKBXFLXWO65SMXRLQQRFQIHU/action/citation_signature","submit_replication":"https://pith.science/pith/N4CKBXFLXWO65SMXRLQQRFQIHU/action/replication_record"}},"created_at":"2026-05-18T00:09:21.431516+00:00","updated_at":"2026-05-18T00:09:21.431516+00:00"}