{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:UJP54NM7OCDMBRDKQX45EQWHJK","short_pith_number":"pith:UJP54NM7","canonical_record":{"source":{"id":"2605.30993","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"eess.AS","submitted_at":"2026-05-29T08:27:57Z","cross_cats_sorted":[],"title_canon_sha256":"60875ed4174ada81ed52c011f327a1b91663204a1adf960c21dcec4f0d01c2c3","abstract_canon_sha256":"9bb34fc3dce63e7ac9d12e631f3c2db37154c1161ecbf624987237b9e09dfce5"},"schema_version":"1.0"},"canonical_sha256":"a25fde359f7086c0c46a85f9d242c74aa1caf92682b0d1511b70a55775b5bde3","source":{"kind":"arxiv","id":"2605.30993","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.30993","created_at":"2026-06-01T01:03:29Z"},{"alias_kind":"arxiv_version","alias_value":"2605.30993v1","created_at":"2026-06-01T01:03:29Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.30993","created_at":"2026-06-01T01:03:29Z"},{"alias_kind":"pith_short_12","alias_value":"UJP54NM7OCDM","created_at":"2026-06-01T01:03:29Z"},{"alias_kind":"pith_short_16","alias_value":"UJP54NM7OCDMBRDK","created_at":"2026-06-01T01:03:29Z"},{"alias_kind":"pith_short_8","alias_value":"UJP54NM7","created_at":"2026-06-01T01:03:29Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:UJP54NM7OCDMBRDKQX45EQWHJK","target":"record","payload":{"canonical_record":{"source":{"id":"2605.30993","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"eess.AS","submitted_at":"2026-05-29T08:27:57Z","cross_cats_sorted":[],"title_canon_sha256":"60875ed4174ada81ed52c011f327a1b91663204a1adf960c21dcec4f0d01c2c3","abstract_canon_sha256":"9bb34fc3dce63e7ac9d12e631f3c2db37154c1161ecbf624987237b9e09dfce5"},"schema_version":"1.0"},"canonical_sha256":"a25fde359f7086c0c46a85f9d242c74aa1caf92682b0d1511b70a55775b5bde3","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-01T01:03:29.286845Z","signature_b64":"KStQ+5MXLb2OGgUy/De8+BaNjeBCgcEOCEcKXGdMOwmjeIORKJLS3l23Cs1szYylwCUj2swyyC8JhTJltaD0DQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"a25fde359f7086c0c46a85f9d242c74aa1caf92682b0d1511b70a55775b5bde3","last_reissued_at":"2026-06-01T01:03:29.285991Z","signature_status":"signed_v1","first_computed_at":"2026-06-01T01:03:29.285991Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.30993","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-01T01:03:29Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"JejSDqsGCMqDKMPVhenP4dd6lORmjkxOt2EVUILcGjA9FmP8Raoe1FvzVhiNYcpFebp3ByOf2Us5OBrTN49PDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-08T21:27:18.889778Z"},"content_sha256":"b2ad7366606917725e7267717789a73d5ff8ccac174678ac88668d80287da3c1","schema_version":"1.0","event_id":"sha256:b2ad7366606917725e7267717789a73d5ff8ccac174678ac88668d80287da3c1"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:UJP54NM7OCDMBRDKQX45EQWHJK","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"SwanVoice: Expressive Long-Form Zero-Shot Speech Synthesis for Both Monologue and Dialogue","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"eess.AS","authors_text":"Changhao Pan, Cheng Yang, Ke Lei, Ruiqi Li, Xiang Yin, Yu Zhang","submitted_at":"2026-05-29T08:27:57Z","abstract_excerpt":"Zero-shot text-to-speech (TTS) has improved substantially for single-speaker synthesis, yet expressive long-form multi-speaker dialogue remains difficult. A common workaround is to synthesize each turn with a monologue TTS model and stitch the outputs together. This adds inference cost and often breaks acoustic consistency, conversational coherence, and affective continuity across turns. Recent dialogue TTS systems have begun to address this setting, but they still struggle to keep expressive coherence, controllable speaker switching, and monologue quality at the same time. We present SwanData"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.30993","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.30993/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-01T01:03:29Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"kiobGba33uxVqaQCsuiPfjAhJrILiCb5e+hGNQfXKfNpfZcD9LVLTKjMJKTA9Z8J7+rtOIE4hMii4xzUo6ibBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-08T21:27:18.890485Z"},"content_sha256":"e56f09dc468148ba9f7ffc9393b645f027f67e48b38bcabb1232efe4889b887d","schema_version":"1.0","event_id":"sha256:e56f09dc468148ba9f7ffc9393b645f027f67e48b38bcabb1232efe4889b887d"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/UJP54NM7OCDMBRDKQX45EQWHJK/bundle.json","state_url":"https://pith.science/pith/UJP54NM7OCDMBRDKQX45EQWHJK/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/UJP54NM7OCDMBRDKQX45EQWHJK/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-08T21:27:18Z","links":{"resolver":"https://pith.science/pith/UJP54NM7OCDMBRDKQX45EQWHJK","bundle":"https://pith.science/pith/UJP54NM7OCDMBRDKQX45EQWHJK/bundle.json","state":"https://pith.science/pith/UJP54NM7OCDMBRDKQX45EQWHJK/state.json","well_known_bundle":"https://pith.science/.well-known/pith/UJP54NM7OCDMBRDKQX45EQWHJK/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:UJP54NM7OCDMBRDKQX45EQWHJK","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"9bb34fc3dce63e7ac9d12e631f3c2db37154c1161ecbf624987237b9e09dfce5","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"eess.AS","submitted_at":"2026-05-29T08:27:57Z","title_canon_sha256":"60875ed4174ada81ed52c011f327a1b91663204a1adf960c21dcec4f0d01c2c3"},"schema_version":"1.0","source":{"id":"2605.30993","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.30993","created_at":"2026-06-01T01:03:29Z"},{"alias_kind":"arxiv_version","alias_value":"2605.30993v1","created_at":"2026-06-01T01:03:29Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.30993","created_at":"2026-06-01T01:03:29Z"},{"alias_kind":"pith_short_12","alias_value":"UJP54NM7OCDM","created_at":"2026-06-01T01:03:29Z"},{"alias_kind":"pith_short_16","alias_value":"UJP54NM7OCDMBRDK","created_at":"2026-06-01T01:03:29Z"},{"alias_kind":"pith_short_8","alias_value":"UJP54NM7","created_at":"2026-06-01T01:03:29Z"}],"graph_snapshots":[{"event_id":"sha256:e56f09dc468148ba9f7ffc9393b645f027f67e48b38bcabb1232efe4889b887d","target":"graph","created_at":"2026-06-01T01:03:29Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.30993/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Zero-shot text-to-speech (TTS) has improved substantially for single-speaker synthesis, yet expressive long-form multi-speaker dialogue remains difficult. A common workaround is to synthesize each turn with a monologue TTS model and stitch the outputs together. This adds inference cost and often breaks acoustic consistency, conversational coherence, and affective continuity across turns. Recent dialogue TTS systems have begun to address this setting, but they still struggle to keep expressive coherence, controllable speaker switching, and monologue quality at the same time. We present SwanData","authors_text":"Changhao Pan, Cheng Yang, Ke Lei, Ruiqi Li, Xiang Yin, Yu Zhang","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"eess.AS","submitted_at":"2026-05-29T08:27:57Z","title":"SwanVoice: Expressive Long-Form Zero-Shot Speech Synthesis for Both Monologue and Dialogue"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.30993","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:b2ad7366606917725e7267717789a73d5ff8ccac174678ac88668d80287da3c1","target":"record","created_at":"2026-06-01T01:03:29Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"9bb34fc3dce63e7ac9d12e631f3c2db37154c1161ecbf624987237b9e09dfce5","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"eess.AS","submitted_at":"2026-05-29T08:27:57Z","title_canon_sha256":"60875ed4174ada81ed52c011f327a1b91663204a1adf960c21dcec4f0d01c2c3"},"schema_version":"1.0","source":{"id":"2605.30993","kind":"arxiv","version":1}},"canonical_sha256":"a25fde359f7086c0c46a85f9d242c74aa1caf92682b0d1511b70a55775b5bde3","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"a25fde359f7086c0c46a85f9d242c74aa1caf92682b0d1511b70a55775b5bde3","first_computed_at":"2026-06-01T01:03:29.285991Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-01T01:03:29.285991Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"KStQ+5MXLb2OGgUy/De8+BaNjeBCgcEOCEcKXGdMOwmjeIORKJLS3l23Cs1szYylwCUj2swyyC8JhTJltaD0DQ==","signature_status":"signed_v1","signed_at":"2026-06-01T01:03:29.286845Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.30993","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:b2ad7366606917725e7267717789a73d5ff8ccac174678ac88668d80287da3c1","sha256:e56f09dc468148ba9f7ffc9393b645f027f67e48b38bcabb1232efe4889b887d"],"state_sha256":"90e3e91a12cf0fb449c3b438a995582629ba2f12d9650356bc30577483f7864d"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"OoVVAiFJJ+q8BrH9ku+Kp0+Ue3yDKQMmMzr6aP5dVkmS9TLVXF5Qvg4skwIck13xMYL1A5w3DnOjs1D80sdxBg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-08T21:27:18.894186Z","bundle_sha256":"49e9eaccd1602faee417660fd75fa6bcd479b3a5b6c6ca744c096ae14f03cbf7"}}