{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:HRO3Z5WK26H3I4NEZRRXH2VFBO","short_pith_number":"pith:HRO3Z5WK","canonical_record":{"source":{"id":"1903.12392","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"eess.AS","submitted_at":"2019-03-29T08:36:06Z","cross_cats_sorted":["cs.CL","cs.SD","stat.ML"],"title_canon_sha256":"25151220cf94af3008546fb96e677632d196c302896b1f8005dc1adc45bf15b0","abstract_canon_sha256":"6557f8f0b576f84678ad6e96ed150a2d4159736500376d7c12c48307f8b03969"},"schema_version":"1.0"},"canonical_sha256":"3c5dbcf6cad78fb471a4cc6373eaa50bacef25fd637f6c28010b24483e8173ec","source":{"kind":"arxiv","id":"1903.12392","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1903.12392","created_at":"2026-05-17T23:49:08Z"},{"alias_kind":"arxiv_version","alias_value":"1903.12392v2","created_at":"2026-05-17T23:49:08Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1903.12392","created_at":"2026-05-17T23:49:08Z"},{"alias_kind":"pith_short_12","alias_value":"HRO3Z5WK26H3","created_at":"2026-05-18T12:33:18Z"},{"alias_kind":"pith_short_16","alias_value":"HRO3Z5WK26H3I4NE","created_at":"2026-05-18T12:33:18Z"},{"alias_kind":"pith_short_8","alias_value":"HRO3Z5WK","created_at":"2026-05-18T12:33:18Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:HRO3Z5WK26H3I4NEZRRXH2VFBO","target":"record","payload":{"canonical_record":{"source":{"id":"1903.12392","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"eess.AS","submitted_at":"2019-03-29T08:36:06Z","cross_cats_sorted":["cs.CL","cs.SD","stat.ML"],"title_canon_sha256":"25151220cf94af3008546fb96e677632d196c302896b1f8005dc1adc45bf15b0","abstract_canon_sha256":"6557f8f0b576f84678ad6e96ed150a2d4159736500376d7c12c48307f8b03969"},"schema_version":"1.0"},"canonical_sha256":"3c5dbcf6cad78fb471a4cc6373eaa50bacef25fd637f6c28010b24483e8173ec","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:49:08.631429Z","signature_b64":"GkDA40TaVHF9Ijmx3/CdDX0ytnyo6a/lLY8gShB7s31ey1x3G760k9wHGr446OBaDOh70zwQC+ioxjoWnO8YAQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"3c5dbcf6cad78fb471a4cc6373eaa50bacef25fd637f6c28010b24483e8173ec","last_reissued_at":"2026-05-17T23:49:08.630813Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:49:08.630813Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1903.12392","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:49:08Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"MClOvyBRyNMOXVNrfqdYv0JPskURCCPwb36FWEWJCLaFU55mMm4cOn5kRb5D+1CguSZb+eR/4czmAzPP//h+Cg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T09:04:27.696523Z"},"content_sha256":"e71b2c25ab4884f437f161653eb5131099518f416cbc3ea317dcb0bae15b019f","schema_version":"1.0","event_id":"sha256:e71b2c25ab4884f437f161653eb5131099518f416cbc3ea317dcb0bae15b019f"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:HRO3Z5WK26H3I4NEZRRXH2VFBO","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Training a Neural Speech Waveform Model using Spectral Losses of Short-Time Fourier Transform and Continuous Wavelet Transform","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL","cs.SD","stat.ML"],"primary_cat":"eess.AS","authors_text":"Hirokazu Kameoka, Junichi Yamagishi, Shinji Takaki","submitted_at":"2019-03-29T08:36:06Z","abstract_excerpt":"Recently, we proposed short-time Fourier transform (STFT)-based loss functions for training a neural speech waveform model. In this paper, we generalize the above framework and propose a training scheme for such models based on spectral amplitude and phase losses obtained by either STFT or continuous wavelet transform (CWT), or both of them. Since CWT is capable of having time and frequency resolutions different from those of STFT and is cable of considering those closer to human auditory scales, the proposed loss functions could provide complementary information on speech signals. Experimenta"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1903.12392","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:49:08Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ERJmfvYb2dvJLBiHEdOeVINJypYn0WZTgLKZE5fveb3UGkiIA0txp+UzgO62oywg8dnZPZNiO6YvOEvWVcoHCw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T09:04:27.697288Z"},"content_sha256":"eef1fbb8db595dc55b33131f77cad1b8ca6ae47932d3aca01e92ba24d8e2fd8a","schema_version":"1.0","event_id":"sha256:eef1fbb8db595dc55b33131f77cad1b8ca6ae47932d3aca01e92ba24d8e2fd8a"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/HRO3Z5WK26H3I4NEZRRXH2VFBO/bundle.json","state_url":"https://pith.science/pith/HRO3Z5WK26H3I4NEZRRXH2VFBO/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/HRO3Z5WK26H3I4NEZRRXH2VFBO/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-30T09:04:27Z","links":{"resolver":"https://pith.science/pith/HRO3Z5WK26H3I4NEZRRXH2VFBO","bundle":"https://pith.science/pith/HRO3Z5WK26H3I4NEZRRXH2VFBO/bundle.json","state":"https://pith.science/pith/HRO3Z5WK26H3I4NEZRRXH2VFBO/state.json","well_known_bundle":"https://pith.science/.well-known/pith/HRO3Z5WK26H3I4NEZRRXH2VFBO/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:HRO3Z5WK26H3I4NEZRRXH2VFBO","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"6557f8f0b576f84678ad6e96ed150a2d4159736500376d7c12c48307f8b03969","cross_cats_sorted":["cs.CL","cs.SD","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"eess.AS","submitted_at":"2019-03-29T08:36:06Z","title_canon_sha256":"25151220cf94af3008546fb96e677632d196c302896b1f8005dc1adc45bf15b0"},"schema_version":"1.0","source":{"id":"1903.12392","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1903.12392","created_at":"2026-05-17T23:49:08Z"},{"alias_kind":"arxiv_version","alias_value":"1903.12392v2","created_at":"2026-05-17T23:49:08Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1903.12392","created_at":"2026-05-17T23:49:08Z"},{"alias_kind":"pith_short_12","alias_value":"HRO3Z5WK26H3","created_at":"2026-05-18T12:33:18Z"},{"alias_kind":"pith_short_16","alias_value":"HRO3Z5WK26H3I4NE","created_at":"2026-05-18T12:33:18Z"},{"alias_kind":"pith_short_8","alias_value":"HRO3Z5WK","created_at":"2026-05-18T12:33:18Z"}],"graph_snapshots":[{"event_id":"sha256:eef1fbb8db595dc55b33131f77cad1b8ca6ae47932d3aca01e92ba24d8e2fd8a","target":"graph","created_at":"2026-05-17T23:49:08Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Recently, we proposed short-time Fourier transform (STFT)-based loss functions for training a neural speech waveform model. In this paper, we generalize the above framework and propose a training scheme for such models based on spectral amplitude and phase losses obtained by either STFT or continuous wavelet transform (CWT), or both of them. Since CWT is capable of having time and frequency resolutions different from those of STFT and is cable of considering those closer to human auditory scales, the proposed loss functions could provide complementary information on speech signals. Experimenta","authors_text":"Hirokazu Kameoka, Junichi Yamagishi, Shinji Takaki","cross_cats":["cs.CL","cs.SD","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"eess.AS","submitted_at":"2019-03-29T08:36:06Z","title":"Training a Neural Speech Waveform Model using Spectral Losses of Short-Time Fourier Transform and Continuous Wavelet Transform"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1903.12392","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:e71b2c25ab4884f437f161653eb5131099518f416cbc3ea317dcb0bae15b019f","target":"record","created_at":"2026-05-17T23:49:08Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"6557f8f0b576f84678ad6e96ed150a2d4159736500376d7c12c48307f8b03969","cross_cats_sorted":["cs.CL","cs.SD","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"eess.AS","submitted_at":"2019-03-29T08:36:06Z","title_canon_sha256":"25151220cf94af3008546fb96e677632d196c302896b1f8005dc1adc45bf15b0"},"schema_version":"1.0","source":{"id":"1903.12392","kind":"arxiv","version":2}},"canonical_sha256":"3c5dbcf6cad78fb471a4cc6373eaa50bacef25fd637f6c28010b24483e8173ec","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"3c5dbcf6cad78fb471a4cc6373eaa50bacef25fd637f6c28010b24483e8173ec","first_computed_at":"2026-05-17T23:49:08.630813Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:49:08.630813Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"GkDA40TaVHF9Ijmx3/CdDX0ytnyo6a/lLY8gShB7s31ey1x3G760k9wHGr446OBaDOh70zwQC+ioxjoWnO8YAQ==","signature_status":"signed_v1","signed_at":"2026-05-17T23:49:08.631429Z","signed_message":"canonical_sha256_bytes"},"source_id":"1903.12392","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:e71b2c25ab4884f437f161653eb5131099518f416cbc3ea317dcb0bae15b019f","sha256:eef1fbb8db595dc55b33131f77cad1b8ca6ae47932d3aca01e92ba24d8e2fd8a"],"state_sha256":"aec37b16f209c2e66f165411baff7b878c9c7257b57cc37b5b573fc975c35a0c"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"h6IdtQ7nEFC6f+LSJ/hbRVLat1gqOQ2/Yp6UaZ7azloM5eSwGDPo7dbzjeMj6OmeiZ5TNlPOFWshcr1DBzaOAg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-30T09:04:27.701269Z","bundle_sha256":"867481ee2376cdc7a6455015b19c55e5ee8c41aea7969348be6032e8a38e5a94"}}