{"work":{"id":"178c1f7e-4f19-4392-a45d-45a6dfa88ead","openalex_id":null,"doi":null,"arxiv_id":"2508.10925","raw_key":null,"title":"gpt-oss-120b & gpt-oss-20b Model Card","authors":null,"authors_text":"OpenAI: Sandhini Agarwal, Lama Ahmad, Jason Ai, Sam Altman, Andy Applebaum, Edwin Arbus","year":2025,"venue":"cs.CL","abstract":"We present gpt-oss-120b and gpt-oss-20b, two open-weight reasoning models that push the frontier of accuracy and inference cost. The models use an efficient mixture-of-expert transformer architecture and are trained using large-scale distillation and reinforcement learning. We optimize the models to have strong agentic capabilities (deep research browsing, python tool use, and support for developer-provided functions), all while using a rendered chat format that enables clear instruction following and role delineation. Both models achieve strong results on benchmarks ranging from mathematics, coding, and safety. We release the model weights, inference implementations, tool environments, and tokenizers under an Apache 2.0 license to enable broad use and further research.","external_url":"https://arxiv.org/abs/2508.10925","cited_by_count":null,"metadata_source":"pith","metadata_fetched_at":"2026-05-25T07:25:28.955369+00:00","pith_arxiv_id":"2508.10925","created_at":"2026-05-08T17:43:52.617253+00:00","updated_at":"2026-06-05T21:23:00.469572+00:00","title_quality_ok":false,"display_title":"gpt-oss-120b & gpt-oss-20b Model Card","render_title":"gpt-oss-120b & gpt-oss-20b Model Card"},"hub":{"state":{"work_id":"178c1f7e-4f19-4392-a45d-45a6dfa88ead","tier":"super_hub","tier_reason":"100+ Pith inbound or 10,000+ external citations","pith_inbound_count":279,"external_cited_by_count":null,"distinct_field_count":29,"first_pith_cited_at":"2025-05-10T16:52:40+00:00","last_pith_cited_at":"2026-05-21T23:17:03+00:00","author_build_status":"needed","summary_status":"needed","contexts_status":"needed","graph_status":"needed","ask_index_status":"needed","reader_status":"not_needed","recognition_status":"not_needed","updated_at":"2026-06-08T20:33:57.792093+00:00","tier_text":"super_hub"},"tier":"super_hub","role_counts":[{"context_role":"background","n":33},{"context_role":"baseline","n":16},{"context_role":"method","n":16},{"context_role":"other","n":6},{"context_role":"dataset","n":5}],"polarity_counts":[{"context_polarity":"background","n":31},{"context_polarity":"baseline","n":16},{"context_polarity":"use_method","n":16},{"context_polarity":"unclear","n":7},{"context_polarity":"use_dataset","n":5},{"context_polarity":"support","n":1}],"runs":{"ask_index":{"job_type":"ask_index","status":"succeeded","result":{"title":"gpt-oss-120b & gpt-oss-20b Model Card","claims":[{"claim_text":"We present gpt-oss-120b and gpt-oss-20b, two open-weight reasoning models that push the frontier of accuracy and inference cost. The models use an efficient mixture-of-expert transformer architecture and are trained using large-scale distillation and reinforcement learning. We optimize the models to have strong agentic capabilities (deep research browsing, python tool use, and support for developer-provided functions), all while using a rendered chat format that enables clear instruction following and role delineation. Both models achieve strong results on benchmarks ranging from mathematics, ","claim_type":"abstract","evidence_strength":"source_metadata"}],"why_cited":"Pith tracks gpt-oss-120b & gpt-oss-20b Model Card because it crossed a citation-hub threshold.","role_counts":[]},"error":null,"updated_at":"2026-05-14T18:39:52.222938+00:00"},"author_expand":{"job_type":"author_expand","status":"succeeded","result":{"authors_linked":[{"id":"f5c7e000-d90f-4c5f-a2b5-d89acfc9b6df","orcid":null,"display_name":"OpenAI: Sandhini Agarwal"},{"id":"a6a44120-a496-4985-81c5-8dfed0f5086a","orcid":null,"display_name":"Lama Ahmad"},{"id":"ae2fc481-2022-4a99-8a1b-216fb3d6ed28","orcid":null,"display_name":"Jason Ai"},{"id":"34c50762-c94f-4277-ab7d-834e2f801eaf","orcid":null,"display_name":"Sam Altman"},{"id":"c4718257-7ec2-4343-ae57-239afe711aac","orcid":null,"display_name":"Andy Applebaum"},{"id":"80d70f65-aa81-4b7a-8e51-183629f25d26","orcid":null,"display_name":"Edwin Arbus"}]},"error":null,"updated_at":"2026-05-14T18:39:27.786863+00:00"},"context_extract":{"job_type":"context_extract","status":"succeeded","result":{"enqueued_papers":25},"error":null,"updated_at":"2026-05-14T18:39:51.178564+00:00"},"graph_features":{"job_type":"graph_features","status":"succeeded","result":{"co_cited":[{"title":"Qwen3 Technical Report","work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e","shared_citers":73},{"title":"The Llama 3 Herd of Models","work_id":"1549a635-88af-4ac1-acfe-51ae7bb53345","shared_citers":42},{"title":"DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning","work_id":"e6b75ad5-2877-4168-97c8-710407094d20","shared_citers":25},{"title":"DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models","work_id":"c5006563-f3ec-438a-9e35-b7b484f34828","shared_citers":24},{"title":"DeepSeek-V3 Technical Report","work_id":"57d2791d-2219-4c31-a077-afc04b12a75c","shared_citers":20},{"title":"Gemini 2.5: Pushing the Frontier with Advanced Reasoning, Multimodality, Long Context, and Next Generation Agentic Capabilities","work_id":"008df105-2fdd-45d8-857a-8e35868aecb6","shared_citers":19},{"title":"Training Verifiers to Solve Math Word Problems","work_id":"acab1aa8-b4d6-40e0-a3ee-25341701dca2","shared_citers":17},{"title":"DeepSeek-V3.2: Pushing the Frontier of Open Large Language Models","work_id":"07c85cc5-4086-4abc-823b-6d0f4ff784d0","shared_citers":15},{"title":"OpenAI GPT-5 System Card","work_id":"ca87689a-0d29-4476-b504-b65dbbb08af4","shared_citers":15},{"title":"Evaluating Large Language Models Trained on Code","work_id":"042493e9-b26f-4b4e-bbde-382072ca9b08","shared_citers":13},{"title":"Mixtral of Experts","work_id":"0de8c352-9daa-4e1e-8c7b-3d0dec69f369","shared_citers":13},{"title":"GPT-4 Technical Report","work_id":"b928e041-6991-4c08-8c81-0359e4097c7b","shared_citers":12},{"title":"Gemma 3 Technical Report","work_id":"f93e08bf-9e96-409b-8ac6-b8385fd17fd7","shared_citers":11},{"title":"Outrageously Large Neural Networks: The Sparsely-Gated Mixture-of-Experts Layer","work_id":"2c6b3f6d-54e4-4df7-baa7-475a490799af","shared_citers":11},{"title":"Kimi K2.5: Visual Agentic Intelligence","work_id":"d690be8f-5d53-49b0-b1e7-79668eb8fcdb","shared_citers":10},{"title":"Kimi K2: Open Agentic Intelligence","work_id":"7f18284c-12d3-4137-bea1-1da97e8cf3c1","shared_citers":10},{"title":"LiveCodeBench: Holistic and Contamination Free Evaluation of Large Language Models for Code","work_id":"ea9e51ce-1e75-4182-92d8-4d25f70d2ee4","shared_citers":10},{"title":"DeepSeek-V2: A Strong, Economical, and Efficient Mixture-of-Experts Language Model","work_id":"1e1df141-cac8-47fd-b068-c4c96e51e331","shared_citers":9},{"title":"Olmo 3","work_id":"74de5f5e-0a69-4f73-862d-e5705fa9f4bb","shared_citers":9},{"title":"Qwen2.5 Technical Report","work_id":"d8432992-4980-4a81-85c7-9fa2c2b87f85","shared_citers":9},{"title":"Qwen3-VL Technical Report","work_id":"1fe243aa-e3c0-4da6-b391-4cbcfc88d5c0","shared_citers":9},{"title":"Universal and Transferable Adversarial Attacks on Aligned Language Models","work_id":"3322fa86-1768-4677-8425-dd326b45e078","shared_citers":9},{"title":"DAPO: An Open-Source LLM Reinforcement Learning System at Scale","work_id":"64019d00-0b11-4bbd-b173-b46c8fad0157","shared_citers":8},{"title":"Humanity's Last Exam","work_id":"59ea00d4-16a8-45e1-aafc-290a6f91d9f4","shared_citers":8}],"time_series":[{"n":1,"year":2025},{"n":159,"year":2026}],"dependency_candidates":[]},"error":null,"updated_at":"2026-05-14T18:39:32.552363+00:00"},"identity_refresh":{"job_type":"identity_refresh","status":"succeeded","result":{"items":[{"title":"Qwen3 Technical Report","outcome":"unchanged","work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e","resolver":"local_arxiv","confidence":0.98,"old_work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e"}],"counts":{"fixed":0,"merged":0,"unchanged":1,"quarantined":0,"needs_external_resolution":0},"errors":[],"attempted":1},"error":null,"updated_at":"2026-05-14T18:39:26.348509+00:00"},"role_polarity":{"job_type":"role_polarity","status":"succeeded","result":{"title":"gpt-oss-120b & gpt-oss-20b Model Card","claims":[{"claim_text":"We present gpt-oss-120b and gpt-oss-20b, two open-weight reasoning models that push the frontier of accuracy and inference cost. The models use an efficient mixture-of-expert transformer architecture and are trained using large-scale distillation and reinforcement learning. We optimize the models to have strong agentic capabilities (deep research browsing, python tool use, and support for developer-provided functions), all while using a rendered chat format that enables clear instruction following and role delineation. Both models achieve strong results on benchmarks ranging from mathematics, ","claim_type":"abstract","evidence_strength":"source_metadata"}],"why_cited":"Pith tracks gpt-oss-120b & gpt-oss-20b Model Card because it crossed a citation-hub threshold.","role_counts":[]},"error":null,"updated_at":"2026-05-14T18:39:43.385966+00:00"},"summary_claims":{"job_type":"summary_claims","status":"succeeded","result":{"title":"gpt-oss-120b & gpt-oss-20b Model Card","claims":[{"claim_text":"We present gpt-oss-120b and gpt-oss-20b, two open-weight reasoning models that push the frontier of accuracy and inference cost. The models use an efficient mixture-of-expert transformer architecture and are trained using large-scale distillation and reinforcement learning. We optimize the models to have strong agentic capabilities (deep research browsing, python tool use, and support for developer-provided functions), all while using a rendered chat format that enables clear instruction following and role delineation. Both models achieve strong results on benchmarks ranging from mathematics, ","claim_type":"abstract","evidence_strength":"source_metadata"}],"why_cited":"Pith tracks gpt-oss-120b & gpt-oss-20b Model Card because it crossed a citation-hub threshold.","role_counts":[]},"error":null,"updated_at":"2026-05-14T18:39:32.448699+00:00"}},"summary":{"title":"gpt-oss-120b & gpt-oss-20b Model Card","claims":[{"claim_text":"We present gpt-oss-120b and gpt-oss-20b, two open-weight reasoning models that push the frontier of accuracy and inference cost. The models use an efficient mixture-of-expert transformer architecture and are trained using large-scale distillation and reinforcement learning. We optimize the models to have strong agentic capabilities (deep research browsing, python tool use, and support for developer-provided functions), all while using a rendered chat format that enables clear instruction following and role delineation. Both models achieve strong results on benchmarks ranging from mathematics, ","claim_type":"abstract","evidence_strength":"source_metadata"}],"why_cited":"Pith tracks gpt-oss-120b & gpt-oss-20b Model Card because it crossed a citation-hub threshold.","role_counts":[]},"graph":{"co_cited":[{"title":"Qwen3 Technical Report","work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e","shared_citers":73},{"title":"The Llama 3 Herd of Models","work_id":"1549a635-88af-4ac1-acfe-51ae7bb53345","shared_citers":42},{"title":"DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning","work_id":"e6b75ad5-2877-4168-97c8-710407094d20","shared_citers":25},{"title":"DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models","work_id":"c5006563-f3ec-438a-9e35-b7b484f34828","shared_citers":24},{"title":"DeepSeek-V3 Technical Report","work_id":"57d2791d-2219-4c31-a077-afc04b12a75c","shared_citers":20},{"title":"Gemini 2.5: Pushing the Frontier with Advanced Reasoning, Multimodality, Long Context, and Next Generation Agentic Capabilities","work_id":"008df105-2fdd-45d8-857a-8e35868aecb6","shared_citers":19},{"title":"Training Verifiers to Solve Math Word Problems","work_id":"acab1aa8-b4d6-40e0-a3ee-25341701dca2","shared_citers":17},{"title":"DeepSeek-V3.2: Pushing the Frontier of Open Large Language Models","work_id":"07c85cc5-4086-4abc-823b-6d0f4ff784d0","shared_citers":15},{"title":"OpenAI GPT-5 System Card","work_id":"ca87689a-0d29-4476-b504-b65dbbb08af4","shared_citers":15},{"title":"Evaluating Large Language Models Trained on Code","work_id":"042493e9-b26f-4b4e-bbde-382072ca9b08","shared_citers":13},{"title":"Mixtral of Experts","work_id":"0de8c352-9daa-4e1e-8c7b-3d0dec69f369","shared_citers":13},{"title":"GPT-4 Technical Report","work_id":"b928e041-6991-4c08-8c81-0359e4097c7b","shared_citers":12},{"title":"Gemma 3 Technical Report","work_id":"f93e08bf-9e96-409b-8ac6-b8385fd17fd7","shared_citers":11},{"title":"Outrageously Large Neural Networks: The Sparsely-Gated Mixture-of-Experts Layer","work_id":"2c6b3f6d-54e4-4df7-baa7-475a490799af","shared_citers":11},{"title":"Kimi K2.5: Visual Agentic Intelligence","work_id":"d690be8f-5d53-49b0-b1e7-79668eb8fcdb","shared_citers":10},{"title":"Kimi K2: Open Agentic Intelligence","work_id":"7f18284c-12d3-4137-bea1-1da97e8cf3c1","shared_citers":10},{"title":"LiveCodeBench: Holistic and Contamination Free Evaluation of Large Language Models for Code","work_id":"ea9e51ce-1e75-4182-92d8-4d25f70d2ee4","shared_citers":10},{"title":"DeepSeek-V2: A Strong, Economical, and Efficient Mixture-of-Experts Language Model","work_id":"1e1df141-cac8-47fd-b068-c4c96e51e331","shared_citers":9},{"title":"Olmo 3","work_id":"74de5f5e-0a69-4f73-862d-e5705fa9f4bb","shared_citers":9},{"title":"Qwen2.5 Technical Report","work_id":"d8432992-4980-4a81-85c7-9fa2c2b87f85","shared_citers":9},{"title":"Qwen3-VL Technical Report","work_id":"1fe243aa-e3c0-4da6-b391-4cbcfc88d5c0","shared_citers":9},{"title":"Universal and Transferable Adversarial Attacks on Aligned Language Models","work_id":"3322fa86-1768-4677-8425-dd326b45e078","shared_citers":9},{"title":"DAPO: An Open-Source LLM Reinforcement Learning System at Scale","work_id":"64019d00-0b11-4bbd-b173-b46c8fad0157","shared_citers":8},{"title":"Humanity's Last Exam","work_id":"59ea00d4-16a8-45e1-aafc-290a6f91d9f4","shared_citers":8}],"time_series":[{"n":1,"year":2025},{"n":159,"year":2026}],"dependency_candidates":[]},"authors":[{"id":"c4718257-7ec2-4343-ae57-239afe711aac","orcid":null,"display_name":"Andy Applebaum","source":"manual","import_confidence":0.72},{"id":"80d70f65-aa81-4b7a-8e51-183629f25d26","orcid":null,"display_name":"Edwin Arbus","source":"manual","import_confidence":0.72},{"id":"ae2fc481-2022-4a99-8a1b-216fb3d6ed28","orcid":null,"display_name":"Jason Ai","source":"manual","import_confidence":0.72},{"id":"a6a44120-a496-4985-81c5-8dfed0f5086a","orcid":null,"display_name":"Lama Ahmad","source":"manual","import_confidence":0.72},{"id":"f5c7e000-d90f-4c5f-a2b5-d89acfc9b6df","orcid":null,"display_name":"OpenAI: Sandhini Agarwal","source":"manual","import_confidence":0.72},{"id":"34c50762-c94f-4277-ab7d-834e2f801eaf","orcid":null,"display_name":"Sam Altman","source":"manual","import_confidence":0.72}]}}