{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:I6QXE6FTM2MEJSCCZCCHAPXGE5","short_pith_number":"pith:I6QXE6FT","schema_version":"1.0","canonical_sha256":"47a17278b3669844c842c884703ee6277c43b2a328208c09bfddbf4902f3b1e5","source":{"kind":"arxiv","id":"1810.12730","version":2},"attestation_state":"computed","paper":{"title":"Audiovisual speaker conversion: jointly and simultaneously transforming facial expression and acoustic characteristics","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL","cs.LG","cs.SD","stat.ML"],"primary_cat":"eess.AS","authors_text":"Fuming Fang, Isao Echizen, Junichi Yamagishi, Xin Wang","submitted_at":"2018-10-29T15:20:32Z","abstract_excerpt":"An audiovisual speaker conversion method is presented for simultaneously transforming the facial expressions and voice of a source speaker into those of a target speaker. Transforming the facial and acoustic features together makes it possible for the converted voice and facial expressions to be highly correlated and for the generated target speaker to appear and sound natural. It uses three neural networks: a conversion network that fuses and transforms the facial and acoustic features, a waveform generation network that produces the waveform from both the converted facial and acoustic featur"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1810.12730","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"eess.AS","submitted_at":"2018-10-29T15:20:32Z","cross_cats_sorted":["cs.CL","cs.LG","cs.SD","stat.ML"],"title_canon_sha256":"964424b6bf13f2726c716a309ec667bdaa5170835726fc88b87f6d30eb430584","abstract_canon_sha256":"5ca202f873ff6de8bc287511a49bc0291a7e1584e9ad6345009561e2052d1cc1"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:59:22.142572Z","signature_b64":"5mM8kL7OkSxxlRHVkIAGt9tg/jL+OWhNjvyUvKTniaT7g2oFpioJCZfiiW0fl3p0kV3hkuxW86Y8+g2PQQwJAA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"47a17278b3669844c842c884703ee6277c43b2a328208c09bfddbf4902f3b1e5","last_reissued_at":"2026-05-17T23:59:22.142134Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:59:22.142134Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Audiovisual speaker conversion: jointly and simultaneously transforming facial expression and acoustic characteristics","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL","cs.LG","cs.SD","stat.ML"],"primary_cat":"eess.AS","authors_text":"Fuming Fang, Isao Echizen, Junichi Yamagishi, Xin Wang","submitted_at":"2018-10-29T15:20:32Z","abstract_excerpt":"An audiovisual speaker conversion method is presented for simultaneously transforming the facial expressions and voice of a source speaker into those of a target speaker. Transforming the facial and acoustic features together makes it possible for the converted voice and facial expressions to be highly correlated and for the generated target speaker to appear and sound natural. It uses three neural networks: a conversion network that fuses and transforms the facial and acoustic features, a waveform generation network that produces the waveform from both the converted facial and acoustic featur"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1810.12730","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1810.12730","created_at":"2026-05-17T23:59:22.142209+00:00"},{"alias_kind":"arxiv_version","alias_value":"1810.12730v2","created_at":"2026-05-17T23:59:22.142209+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1810.12730","created_at":"2026-05-17T23:59:22.142209+00:00"},{"alias_kind":"pith_short_12","alias_value":"I6QXE6FTM2ME","created_at":"2026-05-18T12:32:28.185984+00:00"},{"alias_kind":"pith_short_16","alias_value":"I6QXE6FTM2MEJSCC","created_at":"2026-05-18T12:32:28.185984+00:00"},{"alias_kind":"pith_short_8","alias_value":"I6QXE6FT","created_at":"2026-05-18T12:32:28.185984+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/I6QXE6FTM2MEJSCCZCCHAPXGE5","json":"https://pith.science/pith/I6QXE6FTM2MEJSCCZCCHAPXGE5.json","graph_json":"https://pith.science/api/pith-number/I6QXE6FTM2MEJSCCZCCHAPXGE5/graph.json","events_json":"https://pith.science/api/pith-number/I6QXE6FTM2MEJSCCZCCHAPXGE5/events.json","paper":"https://pith.science/paper/I6QXE6FT"},"agent_actions":{"view_html":"https://pith.science/pith/I6QXE6FTM2MEJSCCZCCHAPXGE5","download_json":"https://pith.science/pith/I6QXE6FTM2MEJSCCZCCHAPXGE5.json","view_paper":"https://pith.science/paper/I6QXE6FT","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1810.12730&json=true","fetch_graph":"https://pith.science/api/pith-number/I6QXE6FTM2MEJSCCZCCHAPXGE5/graph.json","fetch_events":"https://pith.science/api/pith-number/I6QXE6FTM2MEJSCCZCCHAPXGE5/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/I6QXE6FTM2MEJSCCZCCHAPXGE5/action/timestamp_anchor","attest_storage":"https://pith.science/pith/I6QXE6FTM2MEJSCCZCCHAPXGE5/action/storage_attestation","attest_author":"https://pith.science/pith/I6QXE6FTM2MEJSCCZCCHAPXGE5/action/author_attestation","sign_citation":"https://pith.science/pith/I6QXE6FTM2MEJSCCZCCHAPXGE5/action/citation_signature","submit_replication":"https://pith.science/pith/I6QXE6FTM2MEJSCCZCCHAPXGE5/action/replication_record"}},"created_at":"2026-05-17T23:59:22.142209+00:00","updated_at":"2026-05-17T23:59:22.142209+00:00"}