{"work":{"id":"5bb4e5d7-985e-431b-bb0d-75576cdc2950","openalex_id":null,"doi":null,"arxiv_id":"2508.06471","raw_key":null,"title":"GLM-4.5: Agentic, Reasoning, and Coding (ARC) Foundation Models","authors":null,"authors_text":"GLM-4.5 Team: Aohan Zeng, Xin Lv, Qinkai Zheng, Zhenyu Hou, Bin Chen, Chengxing Xie","year":2025,"venue":"cs.CL","abstract":"We present GLM-4.5, an open-source Mixture-of-Experts (MoE) large language model with 355B total parameters and 32B activated parameters, featuring a hybrid reasoning method that supports both thinking and direct response modes. Through multi-stage training on 23T tokens and comprehensive post-training with expert model iteration and reinforcement learning, GLM-4.5 achieves strong performance across agentic, reasoning, and coding (ARC) tasks, scoring 70.1% on TAU-Bench, 91.0% on AIME 24, and 64.2% on SWE-bench Verified. With much fewer parameters than several competitors, GLM-4.5 ranks 3rd overall among all evaluated models and 2nd on agentic benchmarks. We release both GLM-4.5 (355B parameters) and a compact version, GLM-4.5-Air (106B parameters), to advance research in reasoning and agentic AI systems. Code, models, and more information are available at https://github.com/zai-org/GLM-4.5.","external_url":"https://arxiv.org/abs/2508.06471","cited_by_count":null,"metadata_source":"pith","metadata_fetched_at":"2026-05-18T01:00:34.533433+00:00","pith_arxiv_id":"2508.06471","created_at":"2026-05-09T06:40:42.306958+00:00","updated_at":"2026-05-18T01:00:34.533433+00:00","title_quality_ok":true,"display_title":"GLM-4.5: Agentic, Reasoning, and Coding (ARC) Foundation Models","render_title":"GLM-4.5: Agentic, Reasoning, and Coding (ARC) Foundation Models"},"hub":{"state":{"work_id":"5bb4e5d7-985e-431b-bb0d-75576cdc2950","tier":"hub","tier_reason":"10+ Pith inbound or 1,000+ external citations","pith_inbound_count":69,"external_cited_by_count":null,"distinct_field_count":15,"first_pith_cited_at":"2025-09-02T17:44:45+00:00","last_pith_cited_at":"2026-05-14T06:37:55+00:00","author_build_status":"not_needed","summary_status":"needed","contexts_status":"needed","graph_status":"needed","ask_index_status":"not_needed","reader_status":"not_needed","recognition_status":"not_needed","updated_at":"2026-05-18T04:00:23.503717+00:00","tier_text":"hub"},"tier":"hub","role_counts":[{"context_role":"background","n":13},{"context_role":"baseline","n":3},{"context_role":"other","n":1}],"polarity_counts":[{"context_polarity":"background","n":12},{"context_polarity":"baseline","n":3},{"context_polarity":"unclear","n":2}],"runs":{"context_extract":{"job_type":"context_extract","status":"succeeded","result":{"enqueued_papers":25},"error":null,"updated_at":"2026-05-14T15:11:57.038960+00:00"},"graph_features":{"job_type":"graph_features","status":"succeeded","result":{"co_cited":[{"title":"Qwen3 Technical Report","work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e","shared_citers":28},{"title":"Kimi K2: Open Agentic Intelligence","work_id":"7f18284c-12d3-4137-bea1-1da97e8cf3c1","shared_citers":17},{"title":"DeepSeek-V3 Technical Report","work_id":"57d2791d-2219-4c31-a077-afc04b12a75c","shared_citers":16},{"title":"DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning","work_id":"e6b75ad5-2877-4168-97c8-710407094d20","shared_citers":14},{"title":"DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models","work_id":"c5006563-f3ec-438a-9e35-b7b484f34828","shared_citers":13},{"title":"Gemini 2.5: Pushing the Frontier with Advanced Reasoning, Multimodality, Long Context, and Next Generation Agentic Capabilities","work_id":"008df105-2fdd-45d8-857a-8e35868aecb6","shared_citers":12},{"title":"DAPO: An Open-Source LLM Reinforcement Learning System at Scale","work_id":"64019d00-0b11-4bbd-b173-b46c8fad0157","shared_citers":6},{"title":"GPT-4 Technical Report","work_id":"b928e041-6991-4c08-8c81-0359e4097c7b","shared_citers":6},{"title":"Kimi K2.5: Visual Agentic Intelligence","work_id":"d690be8f-5d53-49b0-b1e7-79668eb8fcdb","shared_citers":6},{"title":"The Llama 3 Herd of Models","work_id":"1549a635-88af-4ac1-acfe-51ae7bb53345","shared_citers":6},{"title":"gpt-oss-120b & gpt-oss-20b Model Card","work_id":"178c1f7e-4f19-4392-a45d-45a6dfa88ead","shared_citers":5},{"title":"Group Sequence Policy Optimization","work_id":"3a98b53b-9f52-4d95-adf7-89353c0a9a65","shared_citers":5},{"title":"Proximal Policy Optimization Algorithms","work_id":"240c67fe-d14d-4520-91c1-38a4e272ca19","shared_citers":5},{"title":"Browsecomp-zh: Benchmarking web browsing ability of large language models in chinese","work_id":"d2997896-a54e-43f5-80ac-d4d548116d21","shared_citers":4},{"title":"DeepSeek-V2: A Strong, Economical, and Efficient Mixture-of-Experts Language Model","work_id":"1e1df141-cac8-47fd-b068-c4c96e51e331","shared_citers":4},{"title":"DeepSeek-V3.2: Pushing the Frontier of Open Large Language Models","work_id":"07c85cc5-4086-4abc-823b-6d0f4ff784d0","shared_citers":4},{"title":"GLM-4.5V and GLM-4.1V-Thinking: Towards Versatile Multimodal Reasoning with Scalable Reinforcement Learning","work_id":"366607ba-e4ea-4726-98c3-63356e32351c","shared_citers":4},{"title":"GLM-5: from Vibe Coding to Agentic Engineering","work_id":"ad29b1a2-bf77-46b3-9ead-fb62b1d2c6fe","shared_citers":4},{"title":"LiveCodeBench: Holistic and Contamination Free Evaluation of Large Language Models for Code","work_id":"ea9e51ce-1e75-4182-92d8-4d25f70d2ee4","shared_citers":4},{"title":"Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism","work_id":"c888e6d1-0b1d-43d6-9ef5-f0912a0efa1b","shared_citers":4},{"title":"Qwen2.5 Technical Report","work_id":"d8432992-4980-4a81-85c7-9fa2c2b87f85","shared_citers":4},{"title":"The Entropy Mechanism of Reinforcement Learning for Reasoning Language Models","work_id":"d4b4aee4-d20f-4572-886a-4ba9ea6c9b81","shared_citers":4},{"title":"Training Verifiers to Solve Math Word Problems","work_id":"acab1aa8-b4d6-40e0-a3ee-25341701dca2","shared_citers":4},{"title":"BrowseComp: A Simple Yet Challenging Benchmark for Browsing Agents","work_id":"25adb508-d97c-49d6-ae43-7a70c2478a34","shared_citers":3}],"time_series":[{"n":2,"year":2025},{"n":46,"year":2026}],"dependency_candidates":[]},"error":null,"updated_at":"2026-05-14T15:11:52.087289+00:00"},"identity_refresh":{"job_type":"identity_refresh","status":"succeeded","result":{"items":[{"title":"Qwen3 Technical Report","outcome":"unchanged","work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e","resolver":"local_arxiv","confidence":0.98,"old_work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e"}],"counts":{"fixed":0,"merged":0,"unchanged":1,"quarantined":0,"needs_external_resolution":0},"errors":[],"attempted":1},"error":null,"updated_at":"2026-05-14T15:11:52.015404+00:00"},"summary_claims":{"job_type":"summary_claims","status":"succeeded","result":{"title":"GLM-4.5: Agentic, Reasoning, and Coding (ARC) Foundation Models","claims":[{"claim_text":"We present GLM-4.5, an open-source Mixture-of-Experts (MoE) large language model with 355B total parameters and 32B activated parameters, featuring a hybrid reasoning method that supports both thinking and direct response modes. Through multi-stage training on 23T tokens and comprehensive post-training with expert model iteration and reinforcement learning, GLM-4.5 achieves strong performance across agentic, reasoning, and coding (ARC) tasks, scoring 70.1% on TAU-Bench, 91.0% on AIME 24, and 64.2% on SWE-bench Verified. With much fewer parameters than several competitors, GLM-4.5 ranks 3rd ove","claim_type":"abstract","evidence_strength":"source_metadata"}],"why_cited":"Pith tracks GLM-4.5: Agentic, Reasoning, and Coding (ARC) Foundation Models because it crossed a citation-hub threshold.","role_counts":[]},"error":null,"updated_at":"2026-05-14T15:11:54.390418+00:00"}},"summary":{"title":"GLM-4.5: Agentic, Reasoning, and Coding (ARC) Foundation Models","claims":[{"claim_text":"We present GLM-4.5, an open-source Mixture-of-Experts (MoE) large language model with 355B total parameters and 32B activated parameters, featuring a hybrid reasoning method that supports both thinking and direct response modes. Through multi-stage training on 23T tokens and comprehensive post-training with expert model iteration and reinforcement learning, GLM-4.5 achieves strong performance across agentic, reasoning, and coding (ARC) tasks, scoring 70.1% on TAU-Bench, 91.0% on AIME 24, and 64.2% on SWE-bench Verified. With much fewer parameters than several competitors, GLM-4.5 ranks 3rd ove","claim_type":"abstract","evidence_strength":"source_metadata"}],"why_cited":"Pith tracks GLM-4.5: Agentic, Reasoning, and Coding (ARC) Foundation Models because it crossed a citation-hub threshold.","role_counts":[]},"graph":{"co_cited":[{"title":"Qwen3 Technical Report","work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e","shared_citers":28},{"title":"Kimi K2: Open Agentic Intelligence","work_id":"7f18284c-12d3-4137-bea1-1da97e8cf3c1","shared_citers":17},{"title":"DeepSeek-V3 Technical Report","work_id":"57d2791d-2219-4c31-a077-afc04b12a75c","shared_citers":16},{"title":"DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning","work_id":"e6b75ad5-2877-4168-97c8-710407094d20","shared_citers":14},{"title":"DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models","work_id":"c5006563-f3ec-438a-9e35-b7b484f34828","shared_citers":13},{"title":"Gemini 2.5: Pushing the Frontier with Advanced Reasoning, Multimodality, Long Context, and Next Generation Agentic Capabilities","work_id":"008df105-2fdd-45d8-857a-8e35868aecb6","shared_citers":12},{"title":"DAPO: An Open-Source LLM Reinforcement Learning System at Scale","work_id":"64019d00-0b11-4bbd-b173-b46c8fad0157","shared_citers":6},{"title":"GPT-4 Technical Report","work_id":"b928e041-6991-4c08-8c81-0359e4097c7b","shared_citers":6},{"title":"Kimi K2.5: Visual Agentic Intelligence","work_id":"d690be8f-5d53-49b0-b1e7-79668eb8fcdb","shared_citers":6},{"title":"The Llama 3 Herd of Models","work_id":"1549a635-88af-4ac1-acfe-51ae7bb53345","shared_citers":6},{"title":"gpt-oss-120b & gpt-oss-20b Model Card","work_id":"178c1f7e-4f19-4392-a45d-45a6dfa88ead","shared_citers":5},{"title":"Group Sequence Policy Optimization","work_id":"3a98b53b-9f52-4d95-adf7-89353c0a9a65","shared_citers":5},{"title":"Proximal Policy Optimization Algorithms","work_id":"240c67fe-d14d-4520-91c1-38a4e272ca19","shared_citers":5},{"title":"Browsecomp-zh: Benchmarking web browsing ability of large language models in chinese","work_id":"d2997896-a54e-43f5-80ac-d4d548116d21","shared_citers":4},{"title":"DeepSeek-V2: A Strong, Economical, and Efficient Mixture-of-Experts Language Model","work_id":"1e1df141-cac8-47fd-b068-c4c96e51e331","shared_citers":4},{"title":"DeepSeek-V3.2: Pushing the Frontier of Open Large Language Models","work_id":"07c85cc5-4086-4abc-823b-6d0f4ff784d0","shared_citers":4},{"title":"GLM-4.5V and GLM-4.1V-Thinking: Towards Versatile Multimodal Reasoning with Scalable Reinforcement Learning","work_id":"366607ba-e4ea-4726-98c3-63356e32351c","shared_citers":4},{"title":"GLM-5: from Vibe Coding to Agentic Engineering","work_id":"ad29b1a2-bf77-46b3-9ead-fb62b1d2c6fe","shared_citers":4},{"title":"LiveCodeBench: Holistic and Contamination Free Evaluation of Large Language Models for Code","work_id":"ea9e51ce-1e75-4182-92d8-4d25f70d2ee4","shared_citers":4},{"title":"Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism","work_id":"c888e6d1-0b1d-43d6-9ef5-f0912a0efa1b","shared_citers":4},{"title":"Qwen2.5 Technical Report","work_id":"d8432992-4980-4a81-85c7-9fa2c2b87f85","shared_citers":4},{"title":"The Entropy Mechanism of Reinforcement Learning for Reasoning Language Models","work_id":"d4b4aee4-d20f-4572-886a-4ba9ea6c9b81","shared_citers":4},{"title":"Training Verifiers to Solve Math Word Problems","work_id":"acab1aa8-b4d6-40e0-a3ee-25341701dca2","shared_citers":4},{"title":"BrowseComp: A Simple Yet Challenging Benchmark for Browsing Agents","work_id":"25adb508-d97c-49d6-ae43-7a70c2478a34","shared_citers":3}],"time_series":[{"n":2,"year":2025},{"n":46,"year":2026}],"dependency_candidates":[]},"authors":[]}}