{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:OACCB3QQKXGKCQHP4TBDB3E3JP","short_pith_number":"pith:OACCB3QQ","schema_version":"1.0","canonical_sha256":"700420ee1055cca140efe4c230ec9b4bca83cf1c923db889a3af20eccd2f60ee","source":{"kind":"arxiv","id":"2606.05889","version":1},"attestation_state":"computed","paper":{"title":"GLASS: GRPO-Trained LoRA for Acoustic Style Steering in Zero-Shot Text-to-Speech","license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","headline":"","cross_cats":["cs.CL","eess.AS"],"primary_cat":"cs.SD","authors_text":"Jaehoon Kang, Kyuhong Shim, Yejin Lee","submitted_at":"2026-06-04T08:58:57Z","abstract_excerpt":"We propose GLASS, a framework for composable acoustic style control in zero-shot autoregressive text-to-speech (TTS) that learns controls from post-generation rewards rather than style labels. In zero-shot TTS, a speaker prompt often entangles speaker identity with prosodic attributes such as speaking rate and pitch, making it difficult to change style without changing the prompt itself. GLASS instead treats each acoustic attribute as a reward-defined control direction. For each control axis, GLASS freezes the TTS backbone and trains one lightweight LoRA adapter with Group Relative Policy Opti"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.05889","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.SD","submitted_at":"2026-06-04T08:58:57Z","cross_cats_sorted":["cs.CL","eess.AS"],"title_canon_sha256":"1f4c31c5396e65578851c4d9328f7324db7446b4f8a90b47d88cf375d90592b5","abstract_canon_sha256":"3bcf0cdb91b8f3bf469347dbb0ffd6d37b49c489db05064193d6bf66a91901c4"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-05T01:15:06.894210Z","signature_b64":"WUT5aOAPL44GA0I+/xWFy2bCp+f7Qc4df4iAreEEM3RmfjFxVWewYX+oScMfsLmwIW2NwGAk7JS7+ZqCIFW5BQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"700420ee1055cca140efe4c230ec9b4bca83cf1c923db889a3af20eccd2f60ee","last_reissued_at":"2026-06-05T01:15:06.893849Z","signature_status":"signed_v1","first_computed_at":"2026-06-05T01:15:06.893849Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"GLASS: GRPO-Trained LoRA for Acoustic Style Steering in Zero-Shot Text-to-Speech","license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","headline":"","cross_cats":["cs.CL","eess.AS"],"primary_cat":"cs.SD","authors_text":"Jaehoon Kang, Kyuhong Shim, Yejin Lee","submitted_at":"2026-06-04T08:58:57Z","abstract_excerpt":"We propose GLASS, a framework for composable acoustic style control in zero-shot autoregressive text-to-speech (TTS) that learns controls from post-generation rewards rather than style labels. In zero-shot TTS, a speaker prompt often entangles speaker identity with prosodic attributes such as speaking rate and pitch, making it difficult to change style without changing the prompt itself. GLASS instead treats each acoustic attribute as a reward-defined control direction. For each control axis, GLASS freezes the TTS backbone and trains one lightweight LoRA adapter with Group Relative Policy Opti"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.05889","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.05889/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.05889","created_at":"2026-06-05T01:15:06.893913+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.05889v1","created_at":"2026-06-05T01:15:06.893913+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.05889","created_at":"2026-06-05T01:15:06.893913+00:00"},{"alias_kind":"pith_short_12","alias_value":"OACCB3QQKXGK","created_at":"2026-06-05T01:15:06.893913+00:00"},{"alias_kind":"pith_short_16","alias_value":"OACCB3QQKXGKCQHP","created_at":"2026-06-05T01:15:06.893913+00:00"},{"alias_kind":"pith_short_8","alias_value":"OACCB3QQ","created_at":"2026-06-05T01:15:06.893913+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/OACCB3QQKXGKCQHP4TBDB3E3JP","json":"https://pith.science/pith/OACCB3QQKXGKCQHP4TBDB3E3JP.json","graph_json":"https://pith.science/api/pith-number/OACCB3QQKXGKCQHP4TBDB3E3JP/graph.json","events_json":"https://pith.science/api/pith-number/OACCB3QQKXGKCQHP4TBDB3E3JP/events.json","paper":"https://pith.science/paper/OACCB3QQ"},"agent_actions":{"view_html":"https://pith.science/pith/OACCB3QQKXGKCQHP4TBDB3E3JP","download_json":"https://pith.science/pith/OACCB3QQKXGKCQHP4TBDB3E3JP.json","view_paper":"https://pith.science/paper/OACCB3QQ","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.05889&json=true","fetch_graph":"https://pith.science/api/pith-number/OACCB3QQKXGKCQHP4TBDB3E3JP/graph.json","fetch_events":"https://pith.science/api/pith-number/OACCB3QQKXGKCQHP4TBDB3E3JP/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/OACCB3QQKXGKCQHP4TBDB3E3JP/action/timestamp_anchor","attest_storage":"https://pith.science/pith/OACCB3QQKXGKCQHP4TBDB3E3JP/action/storage_attestation","attest_author":"https://pith.science/pith/OACCB3QQKXGKCQHP4TBDB3E3JP/action/author_attestation","sign_citation":"https://pith.science/pith/OACCB3QQKXGKCQHP4TBDB3E3JP/action/citation_signature","submit_replication":"https://pith.science/pith/OACCB3QQKXGKCQHP4TBDB3E3JP/action/replication_record"}},"created_at":"2026-06-05T01:15:06.893913+00:00","updated_at":"2026-06-05T01:15:06.893913+00:00"}