{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:5XODL4VY4QFSPI3D4Z2JET7OTT","short_pith_number":"pith:5XODL4VY","schema_version":"1.0","canonical_sha256":"eddc35f2b8e40b27a363e674924fee9cea4b72d9bac657c73697fee8b60c66ef","source":{"kind":"arxiv","id":"2606.05868","version":1},"attestation_state":"computed","paper":{"title":"YouZhi: Towards High-Concurrency Financial LLMs via Adaptive GQA-to-MLA Transition","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Bin Wang, Chang Liu, Chenghan Jiang, Dong Guo, Duo Zhang, Hang Wang, Huawei LLM Team: Ruihan Long, Hu Zhao, Jiabin Li, Jiahui Zhang, Jianwei Li, Jia Yuan, Jie Ran, Jinbin Fu, Jing Hu, Jingjing Ding, Jing Zhu, Junjie Wu, Junkui Ju, Jun Xie, Kaichao Liang, Lei Cao, Linkai Hou, Man Luo, Mingxuan Yuan, Ning Huang, Pei Li, PSBC LLM Team, Rui Yuan, Rui Zhao, Sen Wang, Shixiong Kai, Shuai Zong, Shucheng Lin, Shupei Sun, Tianan Zhang, Tian Jin, Wei Sun, Wei Yu, Wenjing Xu, Wenshuang Yang, Xiaozhe Ren, Xin Jiang, Xin Wang, Xinyu Wang, Xinzhuang Niu, Xiuhong Fei, Yang Zhao, Yaozong Wu, Yibo He, Yuhang Zhang, Yulong Li, Zequn Ding, Zhangcheng Lv, Zhaohui Xu, Zhentao Tang, Zhihao Song, Zhipeng Zhang","submitted_at":"2026-06-04T08:44:37Z","abstract_excerpt":"Large language models (LLMs) drive significant financial innovations, yet their high-concurrency deployment is severely bottlenecked by KV cache memory overhead, which inflates infrastructure costs and throttles scalability. To address this, we propose YouZhi-LLM, a highly efficient financial LLM empowered by a comprehensive structural transition and training pipeline natively built on the Huawei Ascend ecosystem. At its algorithmic core, YouZhi-LLM features a layer-adaptive GQA-to-MLA transition framework that dynamically assigns per-layer FreqFold sizes, maximizing KV-cache compression while"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.05868","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-04T08:44:37Z","cross_cats_sorted":[],"title_canon_sha256":"d492c231257a036a82861995c3f917eb1157ddb5683ae232b66b606fb7fc0f3c","abstract_canon_sha256":"b6eb23685ba4c77b691ed2629802810d39789c6e93d4f3b82b38b942f18663a2"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-05T01:15:06.352069Z","signature_b64":"3oCSJLQ6AEIlLYSGlnF3OlxbUpf5aT8veWV4QIVi0mX0CNF6z8pK+xhcLlZ/lflhdrajF2R90lJVCPbwd8OvCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"eddc35f2b8e40b27a363e674924fee9cea4b72d9bac657c73697fee8b60c66ef","last_reissued_at":"2026-06-05T01:15:06.351642Z","signature_status":"signed_v1","first_computed_at":"2026-06-05T01:15:06.351642Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"YouZhi: Towards High-Concurrency Financial LLMs via Adaptive GQA-to-MLA Transition","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Bin Wang, Chang Liu, Chenghan Jiang, Dong Guo, Duo Zhang, Hang Wang, Huawei LLM Team: Ruihan Long, Hu Zhao, Jiabin Li, Jiahui Zhang, Jianwei Li, Jia Yuan, Jie Ran, Jinbin Fu, Jing Hu, Jingjing Ding, Jing Zhu, Junjie Wu, Junkui Ju, Jun Xie, Kaichao Liang, Lei Cao, Linkai Hou, Man Luo, Mingxuan Yuan, Ning Huang, Pei Li, PSBC LLM Team, Rui Yuan, Rui Zhao, Sen Wang, Shixiong Kai, Shuai Zong, Shucheng Lin, Shupei Sun, Tianan Zhang, Tian Jin, Wei Sun, Wei Yu, Wenjing Xu, Wenshuang Yang, Xiaozhe Ren, Xin Jiang, Xin Wang, Xinyu Wang, Xinzhuang Niu, Xiuhong Fei, Yang Zhao, Yaozong Wu, Yibo He, Yuhang Zhang, Yulong Li, Zequn Ding, Zhangcheng Lv, Zhaohui Xu, Zhentao Tang, Zhihao Song, Zhipeng Zhang","submitted_at":"2026-06-04T08:44:37Z","abstract_excerpt":"Large language models (LLMs) drive significant financial innovations, yet their high-concurrency deployment is severely bottlenecked by KV cache memory overhead, which inflates infrastructure costs and throttles scalability. To address this, we propose YouZhi-LLM, a highly efficient financial LLM empowered by a comprehensive structural transition and training pipeline natively built on the Huawei Ascend ecosystem. At its algorithmic core, YouZhi-LLM features a layer-adaptive GQA-to-MLA transition framework that dynamically assigns per-layer FreqFold sizes, maximizing KV-cache compression while"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.05868","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.05868/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.05868","created_at":"2026-06-05T01:15:06.351708+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.05868v1","created_at":"2026-06-05T01:15:06.351708+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.05868","created_at":"2026-06-05T01:15:06.351708+00:00"},{"alias_kind":"pith_short_12","alias_value":"5XODL4VY4QFS","created_at":"2026-06-05T01:15:06.351708+00:00"},{"alias_kind":"pith_short_16","alias_value":"5XODL4VY4QFSPI3D","created_at":"2026-06-05T01:15:06.351708+00:00"},{"alias_kind":"pith_short_8","alias_value":"5XODL4VY","created_at":"2026-06-05T01:15:06.351708+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/5XODL4VY4QFSPI3D4Z2JET7OTT","json":"https://pith.science/pith/5XODL4VY4QFSPI3D4Z2JET7OTT.json","graph_json":"https://pith.science/api/pith-number/5XODL4VY4QFSPI3D4Z2JET7OTT/graph.json","events_json":"https://pith.science/api/pith-number/5XODL4VY4QFSPI3D4Z2JET7OTT/events.json","paper":"https://pith.science/paper/5XODL4VY"},"agent_actions":{"view_html":"https://pith.science/pith/5XODL4VY4QFSPI3D4Z2JET7OTT","download_json":"https://pith.science/pith/5XODL4VY4QFSPI3D4Z2JET7OTT.json","view_paper":"https://pith.science/paper/5XODL4VY","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.05868&json=true","fetch_graph":"https://pith.science/api/pith-number/5XODL4VY4QFSPI3D4Z2JET7OTT/graph.json","fetch_events":"https://pith.science/api/pith-number/5XODL4VY4QFSPI3D4Z2JET7OTT/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/5XODL4VY4QFSPI3D4Z2JET7OTT/action/timestamp_anchor","attest_storage":"https://pith.science/pith/5XODL4VY4QFSPI3D4Z2JET7OTT/action/storage_attestation","attest_author":"https://pith.science/pith/5XODL4VY4QFSPI3D4Z2JET7OTT/action/author_attestation","sign_citation":"https://pith.science/pith/5XODL4VY4QFSPI3D4Z2JET7OTT/action/citation_signature","submit_replication":"https://pith.science/pith/5XODL4VY4QFSPI3D4Z2JET7OTT/action/replication_record"}},"created_at":"2026-06-05T01:15:06.351708+00:00","updated_at":"2026-06-05T01:15:06.351708+00:00"}