{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:BW3IRTVAQEUHXQSNCU5FWHY2VG","short_pith_number":"pith:BW3IRTVA","schema_version":"1.0","canonical_sha256":"0db688cea081287bc24d153a5b1f1aa9a78374929a5c7501f9787a07e60f5eef","source":{"kind":"arxiv","id":"1810.11945","version":2},"attestation_state":"computed","paper":{"title":"STFT spectral loss for training a neural speech waveform model","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.CL","cs.SD","stat.ML"],"primary_cat":"eess.AS","authors_text":"Junichi Yamagishi, Shinji Takaki, Toru Nakashika, Xin Wang","submitted_at":"2018-10-29T04:05:35Z","abstract_excerpt":"This paper proposes a new loss using short-time Fourier transform (STFT) spectra for the aim of training a high-performance neural speech waveform model that predicts raw continuous speech waveform samples directly. Not only amplitude spectra but also phase spectra obtained from generated speech waveforms are used to calculate the proposed loss. We also mathematically show that training of the waveform model on the basis of the proposed loss can be interpreted as maximum likelihood training that assumes the amplitude and phase spectra of generated speech waveforms following Gaussian and von Mi"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1810.11945","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"eess.AS","submitted_at":"2018-10-29T04:05:35Z","cross_cats_sorted":["cs.CL","cs.SD","stat.ML"],"title_canon_sha256":"5f11f5f3e270213e3702907f8ab6eaf82a8baf3465bb997802b0b4b89016f6d9","abstract_canon_sha256":"e02a51a1b86ff7303ce42c5551e4be036703f022ab43fc1195bda35c05c257e2"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:01:57.029028Z","signature_b64":"p1I815O8aFDvcRzR2xxbXgCBHtvhsbb4QawfI4QmnGMHXL3R2lzch2LYo/SdUX2jtczI73G5mguKGqfJnDPJDw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"0db688cea081287bc24d153a5b1f1aa9a78374929a5c7501f9787a07e60f5eef","last_reissued_at":"2026-05-18T00:01:57.028478Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:01:57.028478Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"STFT spectral loss for training a neural speech waveform model","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.CL","cs.SD","stat.ML"],"primary_cat":"eess.AS","authors_text":"Junichi Yamagishi, Shinji Takaki, Toru Nakashika, Xin Wang","submitted_at":"2018-10-29T04:05:35Z","abstract_excerpt":"This paper proposes a new loss using short-time Fourier transform (STFT) spectra for the aim of training a high-performance neural speech waveform model that predicts raw continuous speech waveform samples directly. Not only amplitude spectra but also phase spectra obtained from generated speech waveforms are used to calculate the proposed loss. We also mathematically show that training of the waveform model on the basis of the proposed loss can be interpreted as maximum likelihood training that assumes the amplitude and phase spectra of generated speech waveforms following Gaussian and von Mi"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1810.11945","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1810.11945","created_at":"2026-05-18T00:01:57.028578+00:00"},{"alias_kind":"arxiv_version","alias_value":"1810.11945v2","created_at":"2026-05-18T00:01:57.028578+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1810.11945","created_at":"2026-05-18T00:01:57.028578+00:00"},{"alias_kind":"pith_short_12","alias_value":"BW3IRTVAQEUH","created_at":"2026-05-18T12:32:16.446611+00:00"},{"alias_kind":"pith_short_16","alias_value":"BW3IRTVAQEUHXQSN","created_at":"2026-05-18T12:32:16.446611+00:00"},{"alias_kind":"pith_short_8","alias_value":"BW3IRTVA","created_at":"2026-05-18T12:32:16.446611+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/BW3IRTVAQEUHXQSNCU5FWHY2VG","json":"https://pith.science/pith/BW3IRTVAQEUHXQSNCU5FWHY2VG.json","graph_json":"https://pith.science/api/pith-number/BW3IRTVAQEUHXQSNCU5FWHY2VG/graph.json","events_json":"https://pith.science/api/pith-number/BW3IRTVAQEUHXQSNCU5FWHY2VG/events.json","paper":"https://pith.science/paper/BW3IRTVA"},"agent_actions":{"view_html":"https://pith.science/pith/BW3IRTVAQEUHXQSNCU5FWHY2VG","download_json":"https://pith.science/pith/BW3IRTVAQEUHXQSNCU5FWHY2VG.json","view_paper":"https://pith.science/paper/BW3IRTVA","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1810.11945&json=true","fetch_graph":"https://pith.science/api/pith-number/BW3IRTVAQEUHXQSNCU5FWHY2VG/graph.json","fetch_events":"https://pith.science/api/pith-number/BW3IRTVAQEUHXQSNCU5FWHY2VG/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/BW3IRTVAQEUHXQSNCU5FWHY2VG/action/timestamp_anchor","attest_storage":"https://pith.science/pith/BW3IRTVAQEUHXQSNCU5FWHY2VG/action/storage_attestation","attest_author":"https://pith.science/pith/BW3IRTVAQEUHXQSNCU5FWHY2VG/action/author_attestation","sign_citation":"https://pith.science/pith/BW3IRTVAQEUHXQSNCU5FWHY2VG/action/citation_signature","submit_replication":"https://pith.science/pith/BW3IRTVAQEUHXQSNCU5FWHY2VG/action/replication_record"}},"created_at":"2026-05-18T00:01:57.028578+00:00","updated_at":"2026-05-18T00:01:57.028578+00:00"}