{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2019:I7YW5V6RAAPIAVW6UT7HP6L3CK","short_pith_number":"pith:I7YW5V6R","schema_version":"1.0","canonical_sha256":"47f16ed7d1001e8056dea4fe77f97b128f37582c4c99193d9e7612b97cc6d1ba","source":{"kind":"arxiv","id":"1906.08977","version":1},"attestation_state":"computed","paper":{"title":"Singing Voice Synthesis Using Deep Autoregressive Neural Networks for Acoustic Modeling","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG","eess.AS"],"primary_cat":"cs.SD","authors_text":"Li-Rong Dai, Yang Ai, Yuan-Hao Yi, Zhen-Hua Ling","submitted_at":"2019-06-21T06:40:06Z","abstract_excerpt":"This paper presents a method of using autoregressive neural networks for the acoustic modeling of singing voice synthesis (SVS). Singing voice differs from speech and it contains more local dynamic movements of acoustic features, e.g., vibratos. Therefore, our method adopts deep autoregressive (DAR) models to predict the F0 and spectral features of singing voice in order to better describe the dependencies among the acoustic features of consecutive frames. For F0 modeling, discretized F0 values are used and the influences of the history length in DAR are analyzed by experiments. An F0 post-pro"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1906.08977","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.SD","submitted_at":"2019-06-21T06:40:06Z","cross_cats_sorted":["cs.LG","eess.AS"],"title_canon_sha256":"8a90721f474d3fac50125321a1a04e3c51da54d4018732589873cf5cd6c5272c","abstract_canon_sha256":"00ff956190ddd97ada94d0f912b92758fd33bea21e53dcecea872b7b4a024956"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:42:47.169024Z","signature_b64":"UkqRmDCNAHtu/hebjBOaQu+NvP4KJREDyo9ge/iM9z5k/IhnaCs/728IQyNN3cVmCJ+2jDe5TvliV1WcbHhODg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"47f16ed7d1001e8056dea4fe77f97b128f37582c4c99193d9e7612b97cc6d1ba","last_reissued_at":"2026-05-17T23:42:47.168305Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:42:47.168305Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Singing Voice Synthesis Using Deep Autoregressive Neural Networks for Acoustic Modeling","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG","eess.AS"],"primary_cat":"cs.SD","authors_text":"Li-Rong Dai, Yang Ai, Yuan-Hao Yi, Zhen-Hua Ling","submitted_at":"2019-06-21T06:40:06Z","abstract_excerpt":"This paper presents a method of using autoregressive neural networks for the acoustic modeling of singing voice synthesis (SVS). Singing voice differs from speech and it contains more local dynamic movements of acoustic features, e.g., vibratos. Therefore, our method adopts deep autoregressive (DAR) models to predict the F0 and spectral features of singing voice in order to better describe the dependencies among the acoustic features of consecutive frames. For F0 modeling, discretized F0 values are used and the influences of the history length in DAR are analyzed by experiments. An F0 post-pro"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1906.08977","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1906.08977","created_at":"2026-05-17T23:42:47.168423+00:00"},{"alias_kind":"arxiv_version","alias_value":"1906.08977v1","created_at":"2026-05-17T23:42:47.168423+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1906.08977","created_at":"2026-05-17T23:42:47.168423+00:00"},{"alias_kind":"pith_short_12","alias_value":"I7YW5V6RAAPI","created_at":"2026-05-18T12:33:18.533446+00:00"},{"alias_kind":"pith_short_16","alias_value":"I7YW5V6RAAPIAVW6","created_at":"2026-05-18T12:33:18.533446+00:00"},{"alias_kind":"pith_short_8","alias_value":"I7YW5V6R","created_at":"2026-05-18T12:33:18.533446+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":2,"internal_anchor_count":2,"sample":[{"citing_arxiv_id":"1906.08977","citing_title":"Singing Voice Synthesis Using Deep Autoregressive Neural Networks for Acoustic Modeling","ref_index":2,"is_internal_anchor":true},{"citing_arxiv_id":"2406.02430","citing_title":"Seed-TTS: A Family of High-Quality Versatile Speech Generation Models","ref_index":22,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/I7YW5V6RAAPIAVW6UT7HP6L3CK","json":"https://pith.science/pith/I7YW5V6RAAPIAVW6UT7HP6L3CK.json","graph_json":"https://pith.science/api/pith-number/I7YW5V6RAAPIAVW6UT7HP6L3CK/graph.json","events_json":"https://pith.science/api/pith-number/I7YW5V6RAAPIAVW6UT7HP6L3CK/events.json","paper":"https://pith.science/paper/I7YW5V6R"},"agent_actions":{"view_html":"https://pith.science/pith/I7YW5V6RAAPIAVW6UT7HP6L3CK","download_json":"https://pith.science/pith/I7YW5V6RAAPIAVW6UT7HP6L3CK.json","view_paper":"https://pith.science/paper/I7YW5V6R","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1906.08977&json=true","fetch_graph":"https://pith.science/api/pith-number/I7YW5V6RAAPIAVW6UT7HP6L3CK/graph.json","fetch_events":"https://pith.science/api/pith-number/I7YW5V6RAAPIAVW6UT7HP6L3CK/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/I7YW5V6RAAPIAVW6UT7HP6L3CK/action/timestamp_anchor","attest_storage":"https://pith.science/pith/I7YW5V6RAAPIAVW6UT7HP6L3CK/action/storage_attestation","attest_author":"https://pith.science/pith/I7YW5V6RAAPIAVW6UT7HP6L3CK/action/author_attestation","sign_citation":"https://pith.science/pith/I7YW5V6RAAPIAVW6UT7HP6L3CK/action/citation_signature","submit_replication":"https://pith.science/pith/I7YW5V6RAAPIAVW6UT7HP6L3CK/action/replication_record"}},"created_at":"2026-05-17T23:42:47.168423+00:00","updated_at":"2026-05-17T23:42:47.168423+00:00"}