{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2021:GVTB2DHFLMJZZFGOXWNDODAFTY","short_pith_number":"pith:GVTB2DHF","schema_version":"1.0","canonical_sha256":"35661d0ce55b139c94cebd9a370c059e087ca2dec264e7d00b09abd62d7ffea3","source":{"kind":"arxiv","id":"2104.09494","version":1},"attestation_state":"computed","paper":{"title":"NISQA: A Deep CNN-Self-Attention Model for Multidimensional Speech Quality Prediction with Crowdsourced Datasets","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.LG","cs.SD"],"primary_cat":"eess.AS","authors_text":"Assmaa Chehadi, Babak Naderi, Gabriel Mittag, Sebastian M\\\"oller","submitted_at":"2021-04-19T17:56:59Z","abstract_excerpt":"In this paper, we present an update to the NISQA speech quality prediction model that is focused on distortions that occur in communication networks. In contrast to the previous version, the model is trained end-to-end and the time-dependency modelling and time-pooling is achieved through a Self-Attention mechanism. Besides overall speech quality, the model also predicts the four speech quality dimensions Noisiness, Coloration, Discontinuity, and Loudness, and in this way gives more insight into the cause of a quality degradation. Furthermore, new datasets with over 13,000 speech files were cr"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2104.09494","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"eess.AS","submitted_at":"2021-04-19T17:56:59Z","cross_cats_sorted":["cs.AI","cs.LG","cs.SD"],"title_canon_sha256":"f9bcb6b377b5105656ca89ec5aa34d16b95e29ee72d3bf65fb997d83a69f1d52","abstract_canon_sha256":"823ab06dd1b2fe86638d814403fba8b4004f3502f6ba8ae77e72673717fda44e"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-05T03:40:33.066211Z","signature_b64":"e+4yYMpwAn/+Q1KsNc39g4B7JM7hO75sxOhAEaCZuibW14TgySg59Nuyi+0sQ8xGqrvGMsjSk6JD8yvCVA3YAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"35661d0ce55b139c94cebd9a370c059e087ca2dec264e7d00b09abd62d7ffea3","last_reissued_at":"2026-07-05T03:40:33.065729Z","signature_status":"signed_v1","first_computed_at":"2026-07-05T03:40:33.065729Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"NISQA: A Deep CNN-Self-Attention Model for Multidimensional Speech Quality Prediction with Crowdsourced Datasets","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.LG","cs.SD"],"primary_cat":"eess.AS","authors_text":"Assmaa Chehadi, Babak Naderi, Gabriel Mittag, Sebastian M\\\"oller","submitted_at":"2021-04-19T17:56:59Z","abstract_excerpt":"In this paper, we present an update to the NISQA speech quality prediction model that is focused on distortions that occur in communication networks. In contrast to the previous version, the model is trained end-to-end and the time-dependency modelling and time-pooling is achieved through a Self-Attention mechanism. Besides overall speech quality, the model also predicts the four speech quality dimensions Noisiness, Coloration, Discontinuity, and Loudness, and in this way gives more insight into the cause of a quality degradation. Furthermore, new datasets with over 13,000 speech files were cr"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2104.09494","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2104.09494/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2104.09494","created_at":"2026-07-05T03:40:33.065794+00:00"},{"alias_kind":"arxiv_version","alias_value":"2104.09494v1","created_at":"2026-07-05T03:40:33.065794+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2104.09494","created_at":"2026-07-05T03:40:33.065794+00:00"},{"alias_kind":"pith_short_12","alias_value":"GVTB2DHFLMJZ","created_at":"2026-07-05T03:40:33.065794+00:00"},{"alias_kind":"pith_short_16","alias_value":"GVTB2DHFLMJZZFGO","created_at":"2026-07-05T03:40:33.065794+00:00"},{"alias_kind":"pith_short_8","alias_value":"GVTB2DHF","created_at":"2026-07-05T03:40:33.065794+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":8,"internal_anchor_count":0,"sample":[{"citing_arxiv_id":"2606.13006","citing_title":"Emo-LiPO: Listwise Preference Optimization for Fine-Grained Emotion Intensity Control in LLM-based Text-to-Speech","ref_index":18,"is_internal_anchor":false},{"citing_arxiv_id":"2606.11828","citing_title":"Feature-Aligned Speech Watermarking for Robustness to Reconstruction Distortions","ref_index":20,"is_internal_anchor":false},{"citing_arxiv_id":"2606.05678","citing_title":"Beyond Waveform Robustness: Robust Feature-Vocoder Adversarial Attacks on Automatic Speech Recognition","ref_index":23,"is_internal_anchor":false},{"citing_arxiv_id":"2605.16681","citing_title":"A Survey of Advancing Audio Super-Resolution and Bandwidth Extension from Discriminative to Generative Models","ref_index":44,"is_internal_anchor":false},{"citing_arxiv_id":"2512.09299","citing_title":"VABench: A Comprehensive Benchmark for Audio-Video Generation","ref_index":32,"is_internal_anchor":false},{"citing_arxiv_id":"2605.04505","citing_title":"JASTIN: Aligning LLMs for Zero-Shot Audio and Speech Evaluation via Natural Language Instructions","ref_index":10,"is_internal_anchor":false},{"citing_arxiv_id":"2604.09371","citing_title":"Discrete Token Modeling for Multi-Stem Music Source Separation with Language Models","ref_index":35,"is_internal_anchor":false},{"citing_arxiv_id":"2605.00861","citing_title":"Voice Mapping of Text-to-Speech Systems: A Metric-Based Approach for Voice Quality Assessment","ref_index":13,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/GVTB2DHFLMJZZFGOXWNDODAFTY","json":"https://pith.science/pith/GVTB2DHFLMJZZFGOXWNDODAFTY.json","graph_json":"https://pith.science/api/pith-number/GVTB2DHFLMJZZFGOXWNDODAFTY/graph.json","events_json":"https://pith.science/api/pith-number/GVTB2DHFLMJZZFGOXWNDODAFTY/events.json","paper":"https://pith.science/paper/GVTB2DHF"},"agent_actions":{"view_html":"https://pith.science/pith/GVTB2DHFLMJZZFGOXWNDODAFTY","download_json":"https://pith.science/pith/GVTB2DHFLMJZZFGOXWNDODAFTY.json","view_paper":"https://pith.science/paper/GVTB2DHF","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2104.09494&json=true","fetch_graph":"https://pith.science/api/pith-number/GVTB2DHFLMJZZFGOXWNDODAFTY/graph.json","fetch_events":"https://pith.science/api/pith-number/GVTB2DHFLMJZZFGOXWNDODAFTY/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/GVTB2DHFLMJZZFGOXWNDODAFTY/action/timestamp_anchor","attest_storage":"https://pith.science/pith/GVTB2DHFLMJZZFGOXWNDODAFTY/action/storage_attestation","attest_author":"https://pith.science/pith/GVTB2DHFLMJZZFGOXWNDODAFTY/action/author_attestation","sign_citation":"https://pith.science/pith/GVTB2DHFLMJZZFGOXWNDODAFTY/action/citation_signature","submit_replication":"https://pith.science/pith/GVTB2DHFLMJZZFGOXWNDODAFTY/action/replication_record"}},"created_at":"2026-07-05T03:40:33.065794+00:00","updated_at":"2026-07-05T03:40:33.065794+00:00"}