{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2023:PG4VPBKMZVNSGAP2MWS7JCXCPF","short_pith_number":"pith:PG4VPBKM","schema_version":"1.0","canonical_sha256":"79b957854ccd5b2301fa65a5f48ae27960b9e3cbc05b67d1e948a11bc108acb2","source":{"kind":"arxiv","id":"2309.10305","version":4},"attestation_state":"computed","paper":{"title":"Baichuan 2: Open Large-scale Language Models","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Aiyuan Yang, Bingning Wang, Bin Xiao, Borong Zhang, Ce Bian, Chao Yin, Chenxu Lv, Da Pan, Dian Wang, Dong Yan, Fan Yang, Fei Deng, Feng Liu, Feng Wang, Guangwei Ai, Guosheng Dong, Haizhou Zhao, Hang Xu, Haoze Sun, Hongda Zhang, Hui Liu, Jiaming Ji, Jian Xie, Juntao Dai, Kun Fang, Lei Su, Liang Song, Lifeng Liu, Liyun Ru, Luyao Ma, Mang Wang, Mickel Liu, MingAn Lin, Nuolan Nie, Peidong Guo, Ruiyang Sun, Tao Zhang, Tianpeng Li, Tianyu Li, Wei Cheng, Weipeng Chen, Xiangrong Zeng, Xiaochuan Wang, Xiaoxi Chen, Xin Men, Xin Yu, Xuehai Pan, Yanjun Shen, Yiding Wang, Yiyu Li, Youxin Jiang, Yuchen Gao, Yupeng Zhang, Zenan Zhou, Zhiying Wu","submitted_at":"2023-09-19T04:13:22Z","abstract_excerpt":"Large language models (LLMs) have demonstrated remarkable performance on a variety of natural language tasks based on just a few examples of natural language instructions, reducing the need for extensive feature engineering. However, most powerful LLMs are closed-source or limited in their capability for languages other than English. In this technical report, we present Baichuan 2, a series of large-scale multilingual language models containing 7 billion and 13 billion parameters, trained from scratch, on 2.6 trillion tokens. Baichuan 2 matches or outperforms other open-source models of simila"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2309.10305","kind":"arxiv","version":4},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2023-09-19T04:13:22Z","cross_cats_sorted":[],"title_canon_sha256":"100cf69c612a7392bba0090aac7611e541475fd23753e3f59d56916ac1968ac6","abstract_canon_sha256":"7f6eb79942e004f095196b6bfe797f3f94ca1d783dc724a53f36fc3282e320ba"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:38:49.236369Z","signature_b64":"n14xrOWQ8JytW/P4VLBFDSB383LJgI1biyLw/ToupALGz/wuSIOyBwWL5qIPY+l1Oj3l2xoCpQfThZVGOv5+Dw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"79b957854ccd5b2301fa65a5f48ae27960b9e3cbc05b67d1e948a11bc108acb2","last_reissued_at":"2026-05-17T23:38:49.235540Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:38:49.235540Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Baichuan 2: Open Large-scale Language Models","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Aiyuan Yang, Bingning Wang, Bin Xiao, Borong Zhang, Ce Bian, Chao Yin, Chenxu Lv, Da Pan, Dian Wang, Dong Yan, Fan Yang, Fei Deng, Feng Liu, Feng Wang, Guangwei Ai, Guosheng Dong, Haizhou Zhao, Hang Xu, Haoze Sun, Hongda Zhang, Hui Liu, Jiaming Ji, Jian Xie, Juntao Dai, Kun Fang, Lei Su, Liang Song, Lifeng Liu, Liyun Ru, Luyao Ma, Mang Wang, Mickel Liu, MingAn Lin, Nuolan Nie, Peidong Guo, Ruiyang Sun, Tao Zhang, Tianpeng Li, Tianyu Li, Wei Cheng, Weipeng Chen, Xiangrong Zeng, Xiaochuan Wang, Xiaoxi Chen, Xin Men, Xin Yu, Xuehai Pan, Yanjun Shen, Yiding Wang, Yiyu Li, Youxin Jiang, Yuchen Gao, Yupeng Zhang, Zenan Zhou, Zhiying Wu","submitted_at":"2023-09-19T04:13:22Z","abstract_excerpt":"Large language models (LLMs) have demonstrated remarkable performance on a variety of natural language tasks based on just a few examples of natural language instructions, reducing the need for extensive feature engineering. However, most powerful LLMs are closed-source or limited in their capability for languages other than English. In this technical report, we present Baichuan 2, a series of large-scale multilingual language models containing 7 billion and 13 billion parameters, trained from scratch, on 2.6 trillion tokens. Baichuan 2 matches or outperforms other open-source models of simila"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2309.10305","kind":"arxiv","version":4},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2309.10305","created_at":"2026-05-17T23:38:49.235719+00:00"},{"alias_kind":"arxiv_version","alias_value":"2309.10305v4","created_at":"2026-05-17T23:38:49.235719+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2309.10305","created_at":"2026-05-17T23:38:49.235719+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":20,"internal_anchor_count":2,"sample":[{"citing_arxiv_id":"2401.05561","citing_title":"TrustLLM: Trustworthiness in Large Language Models","ref_index":182,"is_internal_anchor":true},{"citing_arxiv_id":"2408.07666","citing_title":"Model Merging in LLMs, MLLMs, and Beyond: Methods, Theories, Applications and Opportunities","ref_index":266,"is_internal_anchor":true},{"citing_arxiv_id":"2401.15947","citing_title":"MoE-LLaVA: Mixture of Experts for Large Vision-Language Models","ref_index":38,"is_internal_anchor":false},{"citing_arxiv_id":"2403.17297","citing_title":"InternLM2 Technical Report","ref_index":211,"is_internal_anchor":false},{"citing_arxiv_id":"2404.14294","citing_title":"A Survey on Efficient Inference for Large Language Models","ref_index":6,"is_internal_anchor":false},{"citing_arxiv_id":"2312.14238","citing_title":"InternVL: Scaling up Vision Foundation Models and Aligning for Generic Visual-Linguistic Tasks","ref_index":6,"is_internal_anchor":false},{"citing_arxiv_id":"2311.12793","citing_title":"ShareGPT4V: Improving Large Multi-Modal Models with Better Captions","ref_index":56,"is_internal_anchor":false},{"citing_arxiv_id":"2403.04652","citing_title":"Yi: Open Foundation Models by 01.AI","ref_index":86,"is_internal_anchor":false},{"citing_arxiv_id":"2404.16821","citing_title":"How Far Are We to GPT-4V? Closing the Gap to Commercial Multimodal Models with Open-Source Suites","ref_index":7,"is_internal_anchor":false},{"citing_arxiv_id":"2403.20330","citing_title":"Are We on the Right Way for Evaluating Large Vision-Language Models?","ref_index":47,"is_internal_anchor":false},{"citing_arxiv_id":"2605.08898","citing_title":"LLM-Agnostic Semantic Representation Attack","ref_index":65,"is_internal_anchor":false},{"citing_arxiv_id":"2604.25614","citing_title":"HotComment: A Benchmark for Evaluating Popularity of Online Comments","ref_index":82,"is_internal_anchor":false},{"citing_arxiv_id":"2406.06525","citing_title":"Autoregressive Model Beats Diffusion: Llama for Scalable Image Generation","ref_index":38,"is_internal_anchor":false},{"citing_arxiv_id":"2604.19015","citing_title":"FedProxy: Federated Fine-Tuning of LLMs via Proxy SLMs and Heterogeneity-Aware Fusion","ref_index":36,"is_internal_anchor":false},{"citing_arxiv_id":"2604.08948","citing_title":"TaxPraBen: A Scalable Benchmark for Structured Evaluation of LLMs in Chinese Real-World Tax Practice","ref_index":50,"is_internal_anchor":false},{"citing_arxiv_id":"2605.06765","citing_title":"VITA-QinYu: Expressive Spoken Language Model for Role-Playing and Singing","ref_index":133,"is_internal_anchor":false},{"citing_arxiv_id":"2604.06737","citing_title":"WisdomInterrogatory (LuWen): An Open-Source Legal Large Language Model Technical Report","ref_index":4,"is_internal_anchor":false},{"citing_arxiv_id":"2303.18223","citing_title":"A Survey of Large Language Models","ref_index":102,"is_internal_anchor":false},{"citing_arxiv_id":"2604.17037","citing_title":"Dynamic Emotion and Personality Profiling for Multimodal Deception Detection","ref_index":32,"is_internal_anchor":false},{"citing_arxiv_id":"2605.02200","citing_title":"ARGUS: Policy-Adaptive Ad Governance via Evolving Reinforcement with Adversarial Umpiring","ref_index":51,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/PG4VPBKMZVNSGAP2MWS7JCXCPF","json":"https://pith.science/pith/PG4VPBKMZVNSGAP2MWS7JCXCPF.json","graph_json":"https://pith.science/api/pith-number/PG4VPBKMZVNSGAP2MWS7JCXCPF/graph.json","events_json":"https://pith.science/api/pith-number/PG4VPBKMZVNSGAP2MWS7JCXCPF/events.json","paper":"https://pith.science/paper/PG4VPBKM"},"agent_actions":{"view_html":"https://pith.science/pith/PG4VPBKMZVNSGAP2MWS7JCXCPF","download_json":"https://pith.science/pith/PG4VPBKMZVNSGAP2MWS7JCXCPF.json","view_paper":"https://pith.science/paper/PG4VPBKM","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2309.10305&json=true","fetch_graph":"https://pith.science/api/pith-number/PG4VPBKMZVNSGAP2MWS7JCXCPF/graph.json","fetch_events":"https://pith.science/api/pith-number/PG4VPBKMZVNSGAP2MWS7JCXCPF/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/PG4VPBKMZVNSGAP2MWS7JCXCPF/action/timestamp_anchor","attest_storage":"https://pith.science/pith/PG4VPBKMZVNSGAP2MWS7JCXCPF/action/storage_attestation","attest_author":"https://pith.science/pith/PG4VPBKMZVNSGAP2MWS7JCXCPF/action/author_attestation","sign_citation":"https://pith.science/pith/PG4VPBKMZVNSGAP2MWS7JCXCPF/action/citation_signature","submit_replication":"https://pith.science/pith/PG4VPBKMZVNSGAP2MWS7JCXCPF/action/replication_record"}},"created_at":"2026-05-17T23:38:49.235719+00:00","updated_at":"2026-05-17T23:38:49.235719+00:00"}