{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:JHOOJR6W4MC2LG325XR44KYNT2","short_pith_number":"pith:JHOOJR6W","schema_version":"1.0","canonical_sha256":"49dce4c7d6e305a59b7aede3ce2b0d9ea870561093a604a334967bf4c0db72be","source":{"kind":"arxiv","id":"2602.05027","version":2},"attestation_state":"computed","paper":{"title":"AudioSAE: Towards Understanding of Audio-Processing Models with Sparse AutoEncoders","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.SD","authors_text":"Alexey Rukhovich, Assel Yermekova, Georgii Aparin, Irina Piontkovskaya, Kristian Kuznetsov, Laida Kushnareva, Tasnima Sadekova, Vadim Popov","submitted_at":"2026-02-04T20:29:16Z","abstract_excerpt":"Sparse Autoencoders (SAEs) are powerful tools for interpreting neural representations, yet their use in audio remains underexplored. We train SAEs across all encoder layers of Whisper and HuBERT, provide an extensive evaluation of their stability, interpretability, and show their practical utility. Over 50% of the features remain consistent across random seeds, and reconstruction quality is preserved. SAE features capture general acoustic and semantic information as well as specific events, including environmental noises and paralinguistic sounds (e.g. laughter, whispering) and disentangle the"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2602.05027","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.SD","submitted_at":"2026-02-04T20:29:16Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"f111f08af7aa942b65dd7f5f6682d471129c03ea74451022926c25c34ce8c398","abstract_canon_sha256":"78709ee9d7388af780b1da9ad068f9a29e12c40e635a15b5ef4c5f3e0a99e061"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-03T01:05:48.559630Z","signature_b64":"bllANgSDRrPdMRWZRghNbmqCs30klBnTNq+oCFcnh2++dzJqExsnnf7fRw1OHPmKi+LamgT1EbhLAcG2ZadiCw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"49dce4c7d6e305a59b7aede3ce2b0d9ea870561093a604a334967bf4c0db72be","last_reissued_at":"2026-06-03T01:05:48.559114Z","signature_status":"signed_v1","first_computed_at":"2026-06-03T01:05:48.559114Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"AudioSAE: Towards Understanding of Audio-Processing Models with Sparse AutoEncoders","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.SD","authors_text":"Alexey Rukhovich, Assel Yermekova, Georgii Aparin, Irina Piontkovskaya, Kristian Kuznetsov, Laida Kushnareva, Tasnima Sadekova, Vadim Popov","submitted_at":"2026-02-04T20:29:16Z","abstract_excerpt":"Sparse Autoencoders (SAEs) are powerful tools for interpreting neural representations, yet their use in audio remains underexplored. We train SAEs across all encoder layers of Whisper and HuBERT, provide an extensive evaluation of their stability, interpretability, and show their practical utility. Over 50% of the features remain consistent across random seeds, and reconstruction quality is preserved. SAE features capture general acoustic and semantic information as well as specific events, including environmental noises and paralinguistic sounds (e.g. laughter, whispering) and disentangle the"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2602.05027","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2602.05027/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2602.05027","created_at":"2026-06-03T01:05:48.559175+00:00"},{"alias_kind":"arxiv_version","alias_value":"2602.05027v2","created_at":"2026-06-03T01:05:48.559175+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2602.05027","created_at":"2026-06-03T01:05:48.559175+00:00"},{"alias_kind":"pith_short_12","alias_value":"JHOOJR6W4MC2","created_at":"2026-06-03T01:05:48.559175+00:00"},{"alias_kind":"pith_short_16","alias_value":"JHOOJR6W4MC2LG32","created_at":"2026-06-03T01:05:48.559175+00:00"},{"alias_kind":"pith_short_8","alias_value":"JHOOJR6W","created_at":"2026-06-03T01:05:48.559175+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/JHOOJR6W4MC2LG325XR44KYNT2","json":"https://pith.science/pith/JHOOJR6W4MC2LG325XR44KYNT2.json","graph_json":"https://pith.science/api/pith-number/JHOOJR6W4MC2LG325XR44KYNT2/graph.json","events_json":"https://pith.science/api/pith-number/JHOOJR6W4MC2LG325XR44KYNT2/events.json","paper":"https://pith.science/paper/JHOOJR6W"},"agent_actions":{"view_html":"https://pith.science/pith/JHOOJR6W4MC2LG325XR44KYNT2","download_json":"https://pith.science/pith/JHOOJR6W4MC2LG325XR44KYNT2.json","view_paper":"https://pith.science/paper/JHOOJR6W","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2602.05027&json=true","fetch_graph":"https://pith.science/api/pith-number/JHOOJR6W4MC2LG325XR44KYNT2/graph.json","fetch_events":"https://pith.science/api/pith-number/JHOOJR6W4MC2LG325XR44KYNT2/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/JHOOJR6W4MC2LG325XR44KYNT2/action/timestamp_anchor","attest_storage":"https://pith.science/pith/JHOOJR6W4MC2LG325XR44KYNT2/action/storage_attestation","attest_author":"https://pith.science/pith/JHOOJR6W4MC2LG325XR44KYNT2/action/author_attestation","sign_citation":"https://pith.science/pith/JHOOJR6W4MC2LG325XR44KYNT2/action/citation_signature","submit_replication":"https://pith.science/pith/JHOOJR6W4MC2LG325XR44KYNT2/action/replication_record"}},"created_at":"2026-06-03T01:05:48.559175+00:00","updated_at":"2026-06-03T01:05:48.559175+00:00"}