{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:I5C6SDWQXVBA3HNIVTFHYJK24Q","short_pith_number":"pith:I5C6SDWQ","schema_version":"1.0","canonical_sha256":"4745e90ed0bd420d9da8acca7c255ae4390faaad2eaf37ec5b78afa39b0799aa","source":{"kind":"arxiv","id":"2605.21332","version":1},"attestation_state":"computed","paper":{"title":"Speech Quality Embeddings for Improved Detection and Classification of Degradations in Speech Signals","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"eess.AS","authors_text":"Michael Kuhlmann, Reinhold Haeb-Umbach, Tobias Cord-Landwehr","submitted_at":"2026-05-20T15:59:10Z","abstract_excerpt":"Automatic subjective speech quality assessment (SSQA) traditionally estimates speech quality on an utterance or system level. While this resolution was adequate for older transmission or synthesis systems that produced speech signals of mediocre quality, modern systems generate high-quality speech with degradations that may occur only locally. With suitable model architectures and regularization losses, SSQA models trained with utterance-level targets can also yield useful local predictions of speech quality. In this work, we extend such models to produce frame-level embeddings that cluster by"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.21332","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"eess.AS","submitted_at":"2026-05-20T15:59:10Z","cross_cats_sorted":[],"title_canon_sha256":"a495a5df343902e10757e44be57a74342dbb99e9b7958cfdbb2706d26f1c3736","abstract_canon_sha256":"cec0af6db1d339a210acebe3ca5f5fefe78a26b0b1a8022adfb88ce12bee1404"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-21T02:05:29.577763Z","signature_b64":"JRk/nZa8K6kB8vxjzJFRs1be3fG3w1XVW0100Cy3Ao1hQEg2roOS0hVlkybO1gT6xPGI45yp8YCy5LS2QQRWDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"4745e90ed0bd420d9da8acca7c255ae4390faaad2eaf37ec5b78afa39b0799aa","last_reissued_at":"2026-05-21T02:05:29.576979Z","signature_status":"signed_v1","first_computed_at":"2026-05-21T02:05:29.576979Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Speech Quality Embeddings for Improved Detection and Classification of Degradations in Speech Signals","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"eess.AS","authors_text":"Michael Kuhlmann, Reinhold Haeb-Umbach, Tobias Cord-Landwehr","submitted_at":"2026-05-20T15:59:10Z","abstract_excerpt":"Automatic subjective speech quality assessment (SSQA) traditionally estimates speech quality on an utterance or system level. While this resolution was adequate for older transmission or synthesis systems that produced speech signals of mediocre quality, modern systems generate high-quality speech with degradations that may occur only locally. With suitable model architectures and regularization losses, SSQA models trained with utterance-level targets can also yield useful local predictions of speech quality. In this work, we extend such models to produce frame-level embeddings that cluster by"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.21332","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.21332/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.21332","created_at":"2026-05-21T02:05:29.577108+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.21332v1","created_at":"2026-05-21T02:05:29.577108+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.21332","created_at":"2026-05-21T02:05:29.577108+00:00"},{"alias_kind":"pith_short_12","alias_value":"I5C6SDWQXVBA","created_at":"2026-05-21T02:05:29.577108+00:00"},{"alias_kind":"pith_short_16","alias_value":"I5C6SDWQXVBA3HNI","created_at":"2026-05-21T02:05:29.577108+00:00"},{"alias_kind":"pith_short_8","alias_value":"I5C6SDWQ","created_at":"2026-05-21T02:05:29.577108+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"2605.21332","citing_title":"Speech Quality Embeddings for Improved Detection and Classification of Degradations in Speech Signals","ref_index":1,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/I5C6SDWQXVBA3HNIVTFHYJK24Q","json":"https://pith.science/pith/I5C6SDWQXVBA3HNIVTFHYJK24Q.json","graph_json":"https://pith.science/api/pith-number/I5C6SDWQXVBA3HNIVTFHYJK24Q/graph.json","events_json":"https://pith.science/api/pith-number/I5C6SDWQXVBA3HNIVTFHYJK24Q/events.json","paper":"https://pith.science/paper/I5C6SDWQ"},"agent_actions":{"view_html":"https://pith.science/pith/I5C6SDWQXVBA3HNIVTFHYJK24Q","download_json":"https://pith.science/pith/I5C6SDWQXVBA3HNIVTFHYJK24Q.json","view_paper":"https://pith.science/paper/I5C6SDWQ","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.21332&json=true","fetch_graph":"https://pith.science/api/pith-number/I5C6SDWQXVBA3HNIVTFHYJK24Q/graph.json","fetch_events":"https://pith.science/api/pith-number/I5C6SDWQXVBA3HNIVTFHYJK24Q/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/I5C6SDWQXVBA3HNIVTFHYJK24Q/action/timestamp_anchor","attest_storage":"https://pith.science/pith/I5C6SDWQXVBA3HNIVTFHYJK24Q/action/storage_attestation","attest_author":"https://pith.science/pith/I5C6SDWQXVBA3HNIVTFHYJK24Q/action/author_attestation","sign_citation":"https://pith.science/pith/I5C6SDWQXVBA3HNIVTFHYJK24Q/action/citation_signature","submit_replication":"https://pith.science/pith/I5C6SDWQXVBA3HNIVTFHYJK24Q/action/replication_record"}},"created_at":"2026-05-21T02:05:29.577108+00:00","updated_at":"2026-05-21T02:05:29.577108+00:00"}