{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:2XA2ZQC6EZM4EHS5ZOFOX7UJDH","short_pith_number":"pith:2XA2ZQC6","canonical_record":{"source":{"id":"1803.09047","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-03-24T02:52:58Z","cross_cats_sorted":["cs.LG","cs.SD","eess.AS"],"title_canon_sha256":"ea56430525a30d4e11c49205eb106e2fcd3129731ba5519e708ab154319001ea","abstract_canon_sha256":"4653c9b397f646541af66fb6540cfcf19548d11b84c273fa5450174284f402f6"},"schema_version":"1.0"},"canonical_sha256":"d5c1acc05e2659c21e5dcb8aebfe8919d2021757851a191f55686c190ab6b533","source":{"kind":"arxiv","id":"1803.09047","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1803.09047","created_at":"2026-05-18T00:20:12Z"},{"alias_kind":"arxiv_version","alias_value":"1803.09047v1","created_at":"2026-05-18T00:20:12Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1803.09047","created_at":"2026-05-18T00:20:12Z"},{"alias_kind":"pith_short_12","alias_value":"2XA2ZQC6EZM4","created_at":"2026-05-18T12:32:02Z"},{"alias_kind":"pith_short_16","alias_value":"2XA2ZQC6EZM4EHS5","created_at":"2026-05-18T12:32:02Z"},{"alias_kind":"pith_short_8","alias_value":"2XA2ZQC6","created_at":"2026-05-18T12:32:02Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:2XA2ZQC6EZM4EHS5ZOFOX7UJDH","target":"record","payload":{"canonical_record":{"source":{"id":"1803.09047","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-03-24T02:52:58Z","cross_cats_sorted":["cs.LG","cs.SD","eess.AS"],"title_canon_sha256":"ea56430525a30d4e11c49205eb106e2fcd3129731ba5519e708ab154319001ea","abstract_canon_sha256":"4653c9b397f646541af66fb6540cfcf19548d11b84c273fa5450174284f402f6"},"schema_version":"1.0"},"canonical_sha256":"d5c1acc05e2659c21e5dcb8aebfe8919d2021757851a191f55686c190ab6b533","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:20:12.771639Z","signature_b64":"LGjKyL8mmZHq/1/omFL7VR8TGbK8/iw8OkGKAU46T0vX8ngJ1jk7KA1XxjO1xpQ92YcXqzUMo9Pct1imABLEBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"d5c1acc05e2659c21e5dcb8aebfe8919d2021757851a191f55686c190ab6b533","last_reissued_at":"2026-05-18T00:20:12.771019Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:20:12.771019Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1803.09047","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:20:12Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"g7LirZqHvnINvYwjtUd13IIIU7GLWP36H6Sct/9eaNW++C8aUeVUxv93j/SOwE40VIgIZv8upiHpnsga/LDXAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-01T16:26:47.604460Z"},"content_sha256":"8f2c887afad3a15d1b2c449e06e3ccd38973cb6b63c6d2bf7d50d8ff482ea811","schema_version":"1.0","event_id":"sha256:8f2c887afad3a15d1b2c449e06e3ccd38973cb6b63c6d2bf7d50d8ff482ea811"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:2XA2ZQC6EZM4EHS5ZOFOX7UJDH","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Towards End-to-End Prosody Transfer for Expressive Speech Synthesis with Tacotron","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG","cs.SD","eess.AS"],"primary_cat":"cs.CL","authors_text":"Daisy Stanton, Eric Battenberg, Joel Shor, Rif A. Saurous, RJ Skerry-Ryan, Rob Clark, Ron J. Weiss, Ying Xiao, Yuxuan Wang","submitted_at":"2018-03-24T02:52:58Z","abstract_excerpt":"We present an extension to the Tacotron speech synthesis architecture that learns a latent embedding space of prosody, derived from a reference acoustic representation containing the desired prosody. We show that conditioning Tacotron on this learned embedding space results in synthesized audio that matches the prosody of the reference signal with fine time detail even when the reference and synthesis speakers are different. Additionally, we show that a reference prosody embedding can be used to synthesize text that is different from that of the reference utterance. We define several quantitat"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1803.09047","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:20:12Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"cnMZzonBv6iAFAg6STKobVLKc9cQWI1c9N1XdwMwVrfAXjPfDjP9sN1z3xpZSSFfLJ4FIBoPoIDEDb0fFsD4Cg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-01T16:26:47.604803Z"},"content_sha256":"0fc4da07ab189f1b2b194733680dbb20d7956cc1dd88445e95d82c7e61146a58","schema_version":"1.0","event_id":"sha256:0fc4da07ab189f1b2b194733680dbb20d7956cc1dd88445e95d82c7e61146a58"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/2XA2ZQC6EZM4EHS5ZOFOX7UJDH/bundle.json","state_url":"https://pith.science/pith/2XA2ZQC6EZM4EHS5ZOFOX7UJDH/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/2XA2ZQC6EZM4EHS5ZOFOX7UJDH/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-01T16:26:47Z","links":{"resolver":"https://pith.science/pith/2XA2ZQC6EZM4EHS5ZOFOX7UJDH","bundle":"https://pith.science/pith/2XA2ZQC6EZM4EHS5ZOFOX7UJDH/bundle.json","state":"https://pith.science/pith/2XA2ZQC6EZM4EHS5ZOFOX7UJDH/state.json","well_known_bundle":"https://pith.science/.well-known/pith/2XA2ZQC6EZM4EHS5ZOFOX7UJDH/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:2XA2ZQC6EZM4EHS5ZOFOX7UJDH","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"4653c9b397f646541af66fb6540cfcf19548d11b84c273fa5450174284f402f6","cross_cats_sorted":["cs.LG","cs.SD","eess.AS"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-03-24T02:52:58Z","title_canon_sha256":"ea56430525a30d4e11c49205eb106e2fcd3129731ba5519e708ab154319001ea"},"schema_version":"1.0","source":{"id":"1803.09047","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1803.09047","created_at":"2026-05-18T00:20:12Z"},{"alias_kind":"arxiv_version","alias_value":"1803.09047v1","created_at":"2026-05-18T00:20:12Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1803.09047","created_at":"2026-05-18T00:20:12Z"},{"alias_kind":"pith_short_12","alias_value":"2XA2ZQC6EZM4","created_at":"2026-05-18T12:32:02Z"},{"alias_kind":"pith_short_16","alias_value":"2XA2ZQC6EZM4EHS5","created_at":"2026-05-18T12:32:02Z"},{"alias_kind":"pith_short_8","alias_value":"2XA2ZQC6","created_at":"2026-05-18T12:32:02Z"}],"graph_snapshots":[{"event_id":"sha256:0fc4da07ab189f1b2b194733680dbb20d7956cc1dd88445e95d82c7e61146a58","target":"graph","created_at":"2026-05-18T00:20:12Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We present an extension to the Tacotron speech synthesis architecture that learns a latent embedding space of prosody, derived from a reference acoustic representation containing the desired prosody. We show that conditioning Tacotron on this learned embedding space results in synthesized audio that matches the prosody of the reference signal with fine time detail even when the reference and synthesis speakers are different. Additionally, we show that a reference prosody embedding can be used to synthesize text that is different from that of the reference utterance. We define several quantitat","authors_text":"Daisy Stanton, Eric Battenberg, Joel Shor, Rif A. Saurous, RJ Skerry-Ryan, Rob Clark, Ron J. Weiss, Ying Xiao, Yuxuan Wang","cross_cats":["cs.LG","cs.SD","eess.AS"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-03-24T02:52:58Z","title":"Towards End-to-End Prosody Transfer for Expressive Speech Synthesis with Tacotron"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1803.09047","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:8f2c887afad3a15d1b2c449e06e3ccd38973cb6b63c6d2bf7d50d8ff482ea811","target":"record","created_at":"2026-05-18T00:20:12Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"4653c9b397f646541af66fb6540cfcf19548d11b84c273fa5450174284f402f6","cross_cats_sorted":["cs.LG","cs.SD","eess.AS"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-03-24T02:52:58Z","title_canon_sha256":"ea56430525a30d4e11c49205eb106e2fcd3129731ba5519e708ab154319001ea"},"schema_version":"1.0","source":{"id":"1803.09047","kind":"arxiv","version":1}},"canonical_sha256":"d5c1acc05e2659c21e5dcb8aebfe8919d2021757851a191f55686c190ab6b533","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"d5c1acc05e2659c21e5dcb8aebfe8919d2021757851a191f55686c190ab6b533","first_computed_at":"2026-05-18T00:20:12.771019Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:20:12.771019Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"LGjKyL8mmZHq/1/omFL7VR8TGbK8/iw8OkGKAU46T0vX8ngJ1jk7KA1XxjO1xpQ92YcXqzUMo9Pct1imABLEBQ==","signature_status":"signed_v1","signed_at":"2026-05-18T00:20:12.771639Z","signed_message":"canonical_sha256_bytes"},"source_id":"1803.09047","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:8f2c887afad3a15d1b2c449e06e3ccd38973cb6b63c6d2bf7d50d8ff482ea811","sha256:0fc4da07ab189f1b2b194733680dbb20d7956cc1dd88445e95d82c7e61146a58"],"state_sha256":"aaffa828d2f5848690454c4829ddc72259fe1ff4c74460ea12501cc215aea46a"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"/ZDWGc2LSXDAxpBQwUtUBjyjO+I+lcvvqMIZUkI78l0/C6i/zaB7kLUGxHLGNUbEDT9YJDDaLtnlTpAI6eoDAA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-01T16:26:47.606767Z","bundle_sha256":"9a5fa941e1c4a3bfd74863baa63f433f6e42ad1c07378cada752e5e470891e3d"}}