{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:Z7QLAXPZNEOC6HXYDPQ6MFIUMY","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"49333ace483f564e023ef1d6138125147e77396eb457ea09bcd626547a90e4d7","cross_cats_sorted":["cs.LG"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-14T04:47:22Z","title_canon_sha256":"f4863bbe0621727b76fdaf7b4479f8558cd16a2da763d0125b128359c6c033ea"},"schema_version":"1.0","source":{"id":"2605.14366","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.14366","created_at":"2026-05-17T23:39:07Z"},{"alias_kind":"arxiv_version","alias_value":"2605.14366v1","created_at":"2026-05-17T23:39:07Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.14366","created_at":"2026-05-17T23:39:07Z"},{"alias_kind":"pith_short_12","alias_value":"Z7QLAXPZNEOC","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"Z7QLAXPZNEOC6HXY","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"Z7QLAXPZ","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:f7fd4d8f588f635d8285c502f1d52fb0e686750526160adf0f2c9ac99e720121","target":"graph","created_at":"2026-05-17T23:39:07Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Experiments show that our method acquires low-resource capabilities while markedly mitigating alignment tax, preserving general competence more effectively than SFT."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That embedding-level semantic rewards reliably capture and preserve intended meaning across languages without introducing new biases or requiring the model to have strong pretrained semantic understanding in the target language."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"Reinforcement learning with semantic rewards lets LLMs gain low-resource language skills without the alignment tax that degrades general capabilities in supervised fine-tuning."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Reinforcement learning with embedding-level semantic rewards lets LLMs add low-resource languages without the usual loss of general skills."}],"snapshot_sha256":"5eb63a024a0766558abbc7473f17968c01ede3936cc6cc8a9ceb6dcdae7b5ca8"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"24e7b85c474bd0a15e68827203481b908cedb3c90af88a1c4575d926fcfc9849"},"paper":{"abstract_excerpt":"Extending large language models (LLMs) to low-resource languages often incurs an \"alignment tax\": improvements in the target language come at the cost of catastrophic forgetting in general capabilities. We argue that this trade-off arises from the rigidity of supervised fine-tuning (SFT), which enforces token-level surface imitation on narrow and biased data distributions. To address this limitation, we propose a semantic-space alignment paradigm powered by Group Relative Policy Optimization (GRPO), where the model is optimized using embedding-level semantic rewards rather than likelihood maxi","authors_text":"Guixian Xu, Longfei Zheng, Rong Fu, Wentao Zhang, Xiaolu Zhang, Xuexian Song, Zeli Su, Zhankai Xu, Zhou Liu, Ziyin Zhang","cross_cats":["cs.LG"],"headline":"Reinforcement learning with embedding-level semantic rewards lets LLMs add low-resource languages without the usual loss of general skills.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-14T04:47:22Z","title":"Reinforcement Learning with Semantic Rewards Enables Low-Resource Language Expansion without Alignment Tax"},"references":{"count":38,"internal_anchors":10,"resolved_work":38,"sample":[{"cited_arxiv_id":"","doi":"10.1162/tacl_a_00343","is_internal_anchor":false,"ref_index":1,"title":"Transactions of the Association for Computational Linguistics , volume =","work_id":"ac8e5e49-0c81-4708-a831-537e1c6797bb","year":2020},{"cited_arxiv_id":"","doi":"10.18653/v1/2020.coling-main.574","is_internal_anchor":false,"ref_index":3,"title":"Proceedings of the 28th International Conference on Computational Linguistics , year =","work_id":"68bee7c2-72ce-4164-bf37-371e1b63bada","year":2020},{"cited_arxiv_id":"","doi":"10.18653/v1/2020.coling-main.381","is_internal_anchor":false,"ref_index":4,"title":"Proceedings of the 28th International Conference on Computational Linguistics , year =","work_id":"cd404891-c6fb-4234-94e5-fcda67cb5aa8","year":2020},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"Proceedings of the National Academy of Sciences , volume =","work_id":"7d346159-1571-4ab3-8bda-afb6b1c99afe","year":2017},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":6,"title":"Proceedings of the European Conference on Computer Vision (ECCV) , year =","work_id":"eea8d58b-3b2a-46f6-9c61-070a4414f5d0","year":null}],"snapshot_sha256":"8c26adc7dd01caca208f7f0cf65486be46d7dd0fa7179bfab2222cb57f1644fb"},"source":{"id":"2605.14366","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-15T02:37:54.778015Z","id":"45384cd8-39e2-4bbc-8359-2e5f3f40bf2d","model_set":{"reader":"grok-4.3"},"one_line_summary":"Reinforcement learning with semantic rewards lets LLMs gain low-resource language skills without the alignment tax that degrades general capabilities in supervised fine-tuning.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Reinforcement learning with embedding-level semantic rewards lets LLMs add low-resource languages without the usual loss of general skills.","strongest_claim":"Experiments show that our method acquires low-resource capabilities while markedly mitigating alignment tax, preserving general competence more effectively than SFT.","weakest_assumption":"That embedding-level semantic rewards reliably capture and preserve intended meaning across languages without introducing new biases or requiring the model to have strong pretrained semantic understanding in the target language."}},"verdict_id":"45384cd8-39e2-4bbc-8359-2e5f3f40bf2d"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:80ba97933cf42c1e61ab8e967d7ddf7c0799cb00dddce7c024d359bd6eac85e9","target":"record","created_at":"2026-05-17T23:39:07Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"49333ace483f564e023ef1d6138125147e77396eb457ea09bcd626547a90e4d7","cross_cats_sorted":["cs.LG"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-14T04:47:22Z","title_canon_sha256":"f4863bbe0621727b76fdaf7b4479f8558cd16a2da763d0125b128359c6c033ea"},"schema_version":"1.0","source":{"id":"2605.14366","kind":"arxiv","version":1}},"canonical_sha256":"cfe0b05df9691c2f1ef81be1e615146610fa8b61949ed71f6c188e9c19d90c27","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"cfe0b05df9691c2f1ef81be1e615146610fa8b61949ed71f6c188e9c19d90c27","first_computed_at":"2026-05-17T23:39:07.886359Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:39:07.886359Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"r7YM5vvxqpCqPbMR1T4jorgakJF0kbzr8ktFGr44ZuCNKivmPVPTCjiAEtJe1BbboiPsNmNoz6JQcyxeMelNAg==","signature_status":"signed_v1","signed_at":"2026-05-17T23:39:07.886965Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.14366","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:80ba97933cf42c1e61ab8e967d7ddf7c0799cb00dddce7c024d359bd6eac85e9","sha256:f7fd4d8f588f635d8285c502f1d52fb0e686750526160adf0f2c9ac99e720121"],"state_sha256":"3176b728815dc48a5973f5985bf7705adb9658cc3aecb674e21567dbdf16d189"}