{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:SPK2H62UIA36AGKX2J6UKFEKWA","short_pith_number":"pith:SPK2H62U","canonical_record":{"source":{"id":"1810.07217","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-10-16T18:20:02Z","cross_cats_sorted":["cs.LG","cs.SD","eess.AS"],"title_canon_sha256":"feafb640b47bdfa90991dc18c1d2ea5e955fb0c5a3e1921f78ad093c2f4f2484","abstract_canon_sha256":"c2b9df581183d1095eb01630dd0a139fd4863804d336d80c2c24508a1a2678a5"},"schema_version":"1.0"},"canonical_sha256":"93d5a3fb544037e01957d27d45148ab034f9e8f79020f2a96cb6d82ede1884eb","source":{"kind":"arxiv","id":"1810.07217","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1810.07217","created_at":"2026-05-17T23:57:21Z"},{"alias_kind":"arxiv_version","alias_value":"1810.07217v2","created_at":"2026-05-17T23:57:21Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1810.07217","created_at":"2026-05-17T23:57:21Z"},{"alias_kind":"pith_short_12","alias_value":"SPK2H62UIA36","created_at":"2026-05-18T12:32:53Z"},{"alias_kind":"pith_short_16","alias_value":"SPK2H62UIA36AGKX","created_at":"2026-05-18T12:32:53Z"},{"alias_kind":"pith_short_8","alias_value":"SPK2H62U","created_at":"2026-05-18T12:32:53Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:SPK2H62UIA36AGKX2J6UKFEKWA","target":"record","payload":{"canonical_record":{"source":{"id":"1810.07217","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-10-16T18:20:02Z","cross_cats_sorted":["cs.LG","cs.SD","eess.AS"],"title_canon_sha256":"feafb640b47bdfa90991dc18c1d2ea5e955fb0c5a3e1921f78ad093c2f4f2484","abstract_canon_sha256":"c2b9df581183d1095eb01630dd0a139fd4863804d336d80c2c24508a1a2678a5"},"schema_version":"1.0"},"canonical_sha256":"93d5a3fb544037e01957d27d45148ab034f9e8f79020f2a96cb6d82ede1884eb","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:57:21.310634Z","signature_b64":"XiNDUlX61kViV9l7rcriv96AiyrtKYZR4VRdpEjvOJo53qTe2+Kob0rsng5vfJbQtm4kKy8fG6fBsMZFxokTDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"93d5a3fb544037e01957d27d45148ab034f9e8f79020f2a96cb6d82ede1884eb","last_reissued_at":"2026-05-17T23:57:21.309986Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:57:21.309986Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1810.07217","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:57:21Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"CNq5LQ8JEbIeinzO5YG9YG2QFIIQ7vsGtKV8O+x4q472/SpUKZvpxieq2NY3JCZnLEbjOlrBTc35u5sk5kvyBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T21:44:03.676405Z"},"content_sha256":"6f0bb35f7426d56c9e9112a1f2e863056ab27d39ca26e538847ac3226629d8e7","schema_version":"1.0","event_id":"sha256:6f0bb35f7426d56c9e9112a1f2e863056ab27d39ca26e538847ac3226629d8e7"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:SPK2H62UIA36AGKX2J6UKFEKWA","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Hierarchical Generative Modeling for Controllable Speech Synthesis","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG","cs.SD","eess.AS"],"primary_cat":"cs.CL","authors_text":"Heiga Zen, Jonathan Shen, Patrick Nguyen, Ron J. Weiss, Ruoming Pang, Wei-Ning Hsu, Ye Jia, Yonghui Wu, Yuan Cao, Yuxuan Wang, Yu Zhang, Zhifeng Chen","submitted_at":"2018-10-16T18:20:02Z","abstract_excerpt":"This paper proposes a neural sequence-to-sequence text-to-speech (TTS) model which can control latent attributes in the generated speech that are rarely annotated in the training data, such as speaking style, accent, background noise, and recording conditions. The model is formulated as a conditional generative model based on the variational autoencoder (VAE) framework, with two levels of hierarchical latent variables. The first level is a categorical variable, which represents attribute groups (e.g. clean/noisy) and provides interpretability. The second level, conditioned on the first, is a m"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1810.07217","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:57:21Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"O2H764mgBZ8ZASPcqs+HPy8O0v6TC/kMJn4Sk5k/Jk7sAtbPly0ZsTMn6Lr2KKAQQ3SshNdMxtIfDTDSn9GdDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T21:44:03.677096Z"},"content_sha256":"c1976b1176a4107f1f6d1c24198c6f7df3915f2439f0aeb9b90ffcdd21708110","schema_version":"1.0","event_id":"sha256:c1976b1176a4107f1f6d1c24198c6f7df3915f2439f0aeb9b90ffcdd21708110"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/SPK2H62UIA36AGKX2J6UKFEKWA/bundle.json","state_url":"https://pith.science/pith/SPK2H62UIA36AGKX2J6UKFEKWA/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/SPK2H62UIA36AGKX2J6UKFEKWA/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-25T21:44:03Z","links":{"resolver":"https://pith.science/pith/SPK2H62UIA36AGKX2J6UKFEKWA","bundle":"https://pith.science/pith/SPK2H62UIA36AGKX2J6UKFEKWA/bundle.json","state":"https://pith.science/pith/SPK2H62UIA36AGKX2J6UKFEKWA/state.json","well_known_bundle":"https://pith.science/.well-known/pith/SPK2H62UIA36AGKX2J6UKFEKWA/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:SPK2H62UIA36AGKX2J6UKFEKWA","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"c2b9df581183d1095eb01630dd0a139fd4863804d336d80c2c24508a1a2678a5","cross_cats_sorted":["cs.LG","cs.SD","eess.AS"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-10-16T18:20:02Z","title_canon_sha256":"feafb640b47bdfa90991dc18c1d2ea5e955fb0c5a3e1921f78ad093c2f4f2484"},"schema_version":"1.0","source":{"id":"1810.07217","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1810.07217","created_at":"2026-05-17T23:57:21Z"},{"alias_kind":"arxiv_version","alias_value":"1810.07217v2","created_at":"2026-05-17T23:57:21Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1810.07217","created_at":"2026-05-17T23:57:21Z"},{"alias_kind":"pith_short_12","alias_value":"SPK2H62UIA36","created_at":"2026-05-18T12:32:53Z"},{"alias_kind":"pith_short_16","alias_value":"SPK2H62UIA36AGKX","created_at":"2026-05-18T12:32:53Z"},{"alias_kind":"pith_short_8","alias_value":"SPK2H62U","created_at":"2026-05-18T12:32:53Z"}],"graph_snapshots":[{"event_id":"sha256:c1976b1176a4107f1f6d1c24198c6f7df3915f2439f0aeb9b90ffcdd21708110","target":"graph","created_at":"2026-05-17T23:57:21Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"This paper proposes a neural sequence-to-sequence text-to-speech (TTS) model which can control latent attributes in the generated speech that are rarely annotated in the training data, such as speaking style, accent, background noise, and recording conditions. The model is formulated as a conditional generative model based on the variational autoencoder (VAE) framework, with two levels of hierarchical latent variables. The first level is a categorical variable, which represents attribute groups (e.g. clean/noisy) and provides interpretability. The second level, conditioned on the first, is a m","authors_text":"Heiga Zen, Jonathan Shen, Patrick Nguyen, Ron J. Weiss, Ruoming Pang, Wei-Ning Hsu, Ye Jia, Yonghui Wu, Yuan Cao, Yuxuan Wang, Yu Zhang, Zhifeng Chen","cross_cats":["cs.LG","cs.SD","eess.AS"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-10-16T18:20:02Z","title":"Hierarchical Generative Modeling for Controllable Speech Synthesis"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1810.07217","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:6f0bb35f7426d56c9e9112a1f2e863056ab27d39ca26e538847ac3226629d8e7","target":"record","created_at":"2026-05-17T23:57:21Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"c2b9df581183d1095eb01630dd0a139fd4863804d336d80c2c24508a1a2678a5","cross_cats_sorted":["cs.LG","cs.SD","eess.AS"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-10-16T18:20:02Z","title_canon_sha256":"feafb640b47bdfa90991dc18c1d2ea5e955fb0c5a3e1921f78ad093c2f4f2484"},"schema_version":"1.0","source":{"id":"1810.07217","kind":"arxiv","version":2}},"canonical_sha256":"93d5a3fb544037e01957d27d45148ab034f9e8f79020f2a96cb6d82ede1884eb","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"93d5a3fb544037e01957d27d45148ab034f9e8f79020f2a96cb6d82ede1884eb","first_computed_at":"2026-05-17T23:57:21.309986Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:57:21.309986Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"XiNDUlX61kViV9l7rcriv96AiyrtKYZR4VRdpEjvOJo53qTe2+Kob0rsng5vfJbQtm4kKy8fG6fBsMZFxokTDQ==","signature_status":"signed_v1","signed_at":"2026-05-17T23:57:21.310634Z","signed_message":"canonical_sha256_bytes"},"source_id":"1810.07217","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:6f0bb35f7426d56c9e9112a1f2e863056ab27d39ca26e538847ac3226629d8e7","sha256:c1976b1176a4107f1f6d1c24198c6f7df3915f2439f0aeb9b90ffcdd21708110"],"state_sha256":"d7b47ed502e99123cf60d51fcddc77ba8b2d4169a371cedc19302a30badcb282"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"pTp8+MRQsGY7X4LeyDRjSX17gDhLebNtm3QKNRLY3GkK1OEnSbtpGoncg2iiBcy+m+8b3cKA4rUv94SsClQmBg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-25T21:44:03.679997Z","bundle_sha256":"487d087e63741c23a874a7995b8e688dfb55bad7f206fab145ffc5d8f7232649"}}