{"work":{"id":"d1cf6693-a082-403c-ada9-dac7b96341f9","openalex_id":null,"doi":null,"arxiv_id":"2201.11903","raw_key":null,"title":"Chain-of-Thought Prompting Elicits Reasoning in Large Language Models","authors":null,"authors_text":"Jason Wei, Xuezhi Wang, Dale Schuurmans, Maarten Bosma, Brian Ichter, Fei Xia","year":2022,"venue":"cs.CL","abstract":"We explore how generating a chain of thought -- a series of intermediate reasoning steps -- significantly improves the ability of large language models to perform complex reasoning. In particular, we show how such reasoning abilities emerge naturally in sufficiently large language models via a simple method called chain of thought prompting, where a few chain of thought demonstrations are provided as exemplars in prompting. Experiments on three large language models show that chain of thought prompting improves performance on a range of arithmetic, commonsense, and symbolic reasoning tasks. The empirical gains can be striking. For instance, prompting a 540B-parameter language model with just eight chain of thought exemplars achieves state of the art accuracy on the GSM8K benchmark of math word problems, surpassing even finetuned GPT-3 with a verifier.","external_url":"https://arxiv.org/abs/2201.11903","cited_by_count":null,"metadata_source":"pith","metadata_fetched_at":"2026-05-25T08:05:31.209263+00:00","pith_arxiv_id":"2201.11903","created_at":"2026-05-08T17:08:34.346014+00:00","updated_at":"2026-05-25T08:05:31.209263+00:00","title_quality_ok":true,"display_title":"Chain-of-Thought Prompting Elicits Reasoning in Large Language Models","render_title":"Chain-of-Thought Prompting Elicits Reasoning in Large Language Models"},"hub":{"state":{"work_id":"d1cf6693-a082-403c-ada9-dac7b96341f9","tier":"super_hub","tier_reason":"100+ Pith inbound or 10,000+ external citations","pith_inbound_count":240,"external_cited_by_count":null,"distinct_field_count":23,"first_pith_cited_at":"2022-02-25T17:25:19+00:00","last_pith_cited_at":"2026-05-22T03:32:52+00:00","author_build_status":"needed","summary_status":"needed","contexts_status":"needed","graph_status":"needed","ask_index_status":"needed","reader_status":"not_needed","recognition_status":"not_needed","updated_at":"2026-06-03T09:05:52.646461+00:00","tier_text":"super_hub"},"tier":"super_hub","role_counts":[{"context_role":"background","n":49},{"context_role":"method","n":13},{"context_role":"baseline","n":5}],"polarity_counts":[{"context_polarity":"background","n":43},{"context_polarity":"use_method","n":13},{"context_polarity":"baseline","n":5},{"context_polarity":"support","n":4},{"context_polarity":"unclear","n":2}],"runs":{"ask_index":{"job_type":"ask_index","status":"succeeded","result":{"title":"Chain-of-Thought Prompting Elicits Reasoning in Large Language Models","claims":[{"claim_text":"We explore how generating a chain of thought -- a series of intermediate reasoning steps -- significantly improves the ability of large language models to perform complex reasoning. In particular, we show how such reasoning abilities emerge naturally in sufficiently large language models via a simple method called chain of thought prompting, where a few chain of thought demonstrations are provided as exemplars in prompting. Experiments on three large language models show that chain of thought prompting improves performance on a range of arithmetic, commonsense, and symbolic reasoning tasks. Th","claim_type":"abstract","evidence_strength":"source_metadata"}],"why_cited":"Pith tracks Chain-of-Thought Prompting Elicits Reasoning in Large Language Models because it crossed a citation-hub threshold.","role_counts":[]},"error":null,"updated_at":"2026-05-14T01:13:58.434336+00:00"},"author_expand":{"job_type":"author_expand","status":"succeeded","result":{"authors_linked":[{"id":"cb0687c7-02a4-474c-a3bd-ba5a6e0d00f9","orcid":null,"display_name":"Jason Wei"},{"id":"7d44fff2-95d1-41fc-83c5-2969ba35b464","orcid":null,"display_name":"Xuezhi Wang"},{"id":"c5de7f09-6e32-4784-8fa7-9532f012735a","orcid":null,"display_name":"Dale Schuurmans"},{"id":"0778e3ce-2eaf-448e-8c23-5a30ba22c9fb","orcid":null,"display_name":"Maarten Bosma"},{"id":"9ddcb952-44b8-421e-b082-34b6c7ac1d39","orcid":null,"display_name":"Brian Ichter"},{"id":"bb693ea4-d2ec-4e98-abd9-408775f8657d","orcid":null,"display_name":"Fei Xia"}]},"error":null,"updated_at":"2026-05-14T01:14:13.785722+00:00"},"context_extract":{"job_type":"context_extract","status":"succeeded","result":{"enqueued_papers":25},"error":null,"updated_at":"2026-05-14T01:04:00.773676+00:00"},"graph_features":{"job_type":"graph_features","status":"succeeded","result":{"co_cited":[{"title":"Training Verifiers to Solve Math Word Problems","work_id":"acab1aa8-b4d6-40e0-a3ee-25341701dca2","shared_citers":30},{"title":"Self-Consistency Improves Chain of Thought Reasoning in Language Models","work_id":"8c6d5a6b-b5cc-4105-9c84-9c34bb9375bb","shared_citers":28},{"title":"Evaluating Large Language Models Trained on Code","work_id":"042493e9-b26f-4b4e-bbde-382072ca9b08","shared_citers":24},{"title":"ReAct: Synergizing Reasoning and Acting in Language Models","work_id":"407a2351-25f1-497d-b611-f77d0292a8e6","shared_citers":23},{"title":"GPT-4 Technical Report","work_id":"b928e041-6991-4c08-8c81-0359e4097c7b","shared_citers":21},{"title":"Large Language Models are Zero-Shot Reasoners","work_id":"d9b7eb1a-7165-46ff-9f06-d2f0b9d6f95d","shared_citers":20},{"title":"Qwen3 Technical Report","work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e","shared_citers":20},{"title":"DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models","work_id":"c5006563-f3ec-438a-9e35-b7b484f34828","shared_citers":18},{"title":"Training language models to follow instructions with human feedback","work_id":"52aff42f-4fa9-4fcf-bdb3-1459b9bebf65","shared_citers":18},{"title":"LLaMA: Open and Efficient Foundation Language Models","work_id":"c018fc23-6f3f-4035-9d02-28a2173b2b9d","shared_citers":17},{"title":"PaLM: Scaling Language Modeling with Pathways","work_id":"a94f3ef7-2c49-4445-93fe-6ec16aafd966","shared_citers":17},{"title":"Reflexion: Language Agents with Verbal Reinforcement Learning","work_id":"778f739e-5f55-4961-8a2a-e4736a2757f4","shared_citers":16},{"title":"The Llama 3 Herd of Models","work_id":"1549a635-88af-4ac1-acfe-51ae7bb53345","shared_citers":15},{"title":"LoRA: Low-Rank Adaptation of Large Language Models","work_id":"0426219a-789e-4964-adc8-a04538510818","shared_citers":14},{"title":"Measuring Mathematical Problem Solving With the MATH Dataset","work_id":"50652ac6-fb7c-4675-a2c2-159c241feb17","shared_citers":14},{"title":"Scaling Laws for Neural Language Models","work_id":"b7dd8749-9c45-4977-ab9b-64478dce1ae8","shared_citers":14},{"title":"Language Models are Few-Shot Learners","work_id":"214732c0-2edd-44a0-af9e-28184a2b8279","shared_citers":13},{"title":"Self-Refine: Iterative Refinement with Self-Feedback","work_id":"59181e7f-e58e-45d3-8146-4477a9f53d5a","shared_citers":13},{"title":"Solving Quantitative Reasoning Problems with Language Models","work_id":"17214d12-1ca8-4186-806d-53c6715383a0","shared_citers":13},{"title":"On the Opportunities and Risks of Foundation Models","work_id":"a18039e9-928d-47c9-a836-32656a71bf71","shared_citers":12},{"title":"Tree of Thoughts: Deliberate Problem Solving with Large Language Models","work_id":"07adb06e-4ed5-4ec5-a7ae-ff288fd214fb","shared_citers":12},{"title":"Beyond the Imitation Game: Quantifying and extrapolating the capabilities of language models","work_id":"bb63abb3-0d50-4362-b97c-b5e725b03b39","shared_citers":11},{"title":"DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning","work_id":"e6b75ad5-2877-4168-97c8-710407094d20","shared_citers":11},{"title":"Attention Is All You Need","work_id":"baafb5a2-5272-43bc-932f-09fa9ffe5316","shared_citers":10}],"time_series":[{"n":6,"year":2022},{"n":16,"year":2023},{"n":3,"year":2024},{"n":1,"year":2025},{"n":94,"year":2026}]},"error":null,"updated_at":"2026-05-14T01:03:58.577032+00:00"},"identity_refresh":{"job_type":"identity_refresh","status":"succeeded","result":{"items":[{"title":"Qwen3 Technical Report","outcome":"unchanged","work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e","resolver":"local_arxiv","confidence":0.98,"old_work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e"}],"counts":{"fixed":0,"merged":0,"unchanged":1,"quarantined":0,"needs_external_resolution":0},"errors":[],"attempted":1},"error":null,"updated_at":"2026-05-14T01:03:55.282281+00:00"},"role_polarity":{"job_type":"role_polarity","status":"succeeded","result":{"title":"Chain-of-Thought Prompting Elicits Reasoning in Large Language Models","claims":[{"claim_text":"We explore how generating a chain of thought -- a series of intermediate reasoning steps -- significantly improves the ability of large language models to perform complex reasoning. In particular, we show how such reasoning abilities emerge naturally in sufficiently large language models via a simple method called chain of thought prompting, where a few chain of thought demonstrations are provided as exemplars in prompting. Experiments on three large language models show that chain of thought prompting improves performance on a range of arithmetic, commonsense, and symbolic reasoning tasks. Th","claim_type":"abstract","evidence_strength":"source_metadata"}],"why_cited":"Pith tracks Chain-of-Thought Prompting Elicits Reasoning in Large Language Models because it crossed a citation-hub threshold.","role_counts":[]},"error":null,"updated_at":"2026-05-14T01:04:03.085435+00:00"},"summary_claims":{"job_type":"summary_claims","status":"succeeded","result":{"title":"Chain-of-Thought Prompting Elicits Reasoning in Large Language Models","claims":[{"claim_text":"We explore how generating a chain of thought -- a series of intermediate reasoning steps -- significantly improves the ability of large language models to perform complex reasoning. In particular, we show how such reasoning abilities emerge naturally in sufficiently large language models via a simple method called chain of thought prompting, where a few chain of thought demonstrations are provided as exemplars in prompting. Experiments on three large language models show that chain of thought prompting improves performance on a range of arithmetic, commonsense, and symbolic reasoning tasks. Th","claim_type":"abstract","evidence_strength":"source_metadata"}],"why_cited":"Pith tracks Chain-of-Thought Prompting Elicits Reasoning in Large Language Models because it crossed a citation-hub threshold.","role_counts":[]},"error":null,"updated_at":"2026-05-14T01:03:57.810879+00:00"}},"summary":{"title":"Chain-of-Thought Prompting Elicits Reasoning in Large Language Models","claims":[{"claim_text":"We explore how generating a chain of thought -- a series of intermediate reasoning steps -- significantly improves the ability of large language models to perform complex reasoning. In particular, we show how such reasoning abilities emerge naturally in sufficiently large language models via a simple method called chain of thought prompting, where a few chain of thought demonstrations are provided as exemplars in prompting. Experiments on three large language models show that chain of thought prompting improves performance on a range of arithmetic, commonsense, and symbolic reasoning tasks. Th","claim_type":"abstract","evidence_strength":"source_metadata"}],"why_cited":"Pith tracks Chain-of-Thought Prompting Elicits Reasoning in Large Language Models because it crossed a citation-hub threshold.","role_counts":[]},"graph":{"co_cited":[{"title":"Training Verifiers to Solve Math Word Problems","work_id":"acab1aa8-b4d6-40e0-a3ee-25341701dca2","shared_citers":30},{"title":"Self-Consistency Improves Chain of Thought Reasoning in Language Models","work_id":"8c6d5a6b-b5cc-4105-9c84-9c34bb9375bb","shared_citers":28},{"title":"Evaluating Large Language Models Trained on Code","work_id":"042493e9-b26f-4b4e-bbde-382072ca9b08","shared_citers":24},{"title":"ReAct: Synergizing Reasoning and Acting in Language Models","work_id":"407a2351-25f1-497d-b611-f77d0292a8e6","shared_citers":23},{"title":"GPT-4 Technical Report","work_id":"b928e041-6991-4c08-8c81-0359e4097c7b","shared_citers":21},{"title":"Large Language Models are Zero-Shot Reasoners","work_id":"d9b7eb1a-7165-46ff-9f06-d2f0b9d6f95d","shared_citers":20},{"title":"Qwen3 Technical Report","work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e","shared_citers":20},{"title":"DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models","work_id":"c5006563-f3ec-438a-9e35-b7b484f34828","shared_citers":18},{"title":"Training language models to follow instructions with human feedback","work_id":"52aff42f-4fa9-4fcf-bdb3-1459b9bebf65","shared_citers":18},{"title":"LLaMA: Open and Efficient Foundation Language Models","work_id":"c018fc23-6f3f-4035-9d02-28a2173b2b9d","shared_citers":17},{"title":"PaLM: Scaling Language Modeling with Pathways","work_id":"a94f3ef7-2c49-4445-93fe-6ec16aafd966","shared_citers":17},{"title":"Reflexion: Language Agents with Verbal Reinforcement Learning","work_id":"778f739e-5f55-4961-8a2a-e4736a2757f4","shared_citers":16},{"title":"The Llama 3 Herd of Models","work_id":"1549a635-88af-4ac1-acfe-51ae7bb53345","shared_citers":15},{"title":"LoRA: Low-Rank Adaptation of Large Language Models","work_id":"0426219a-789e-4964-adc8-a04538510818","shared_citers":14},{"title":"Measuring Mathematical Problem Solving With the MATH Dataset","work_id":"50652ac6-fb7c-4675-a2c2-159c241feb17","shared_citers":14},{"title":"Scaling Laws for Neural Language Models","work_id":"b7dd8749-9c45-4977-ab9b-64478dce1ae8","shared_citers":14},{"title":"Language Models are Few-Shot Learners","work_id":"214732c0-2edd-44a0-af9e-28184a2b8279","shared_citers":13},{"title":"Self-Refine: Iterative Refinement with Self-Feedback","work_id":"59181e7f-e58e-45d3-8146-4477a9f53d5a","shared_citers":13},{"title":"Solving Quantitative Reasoning Problems with Language Models","work_id":"17214d12-1ca8-4186-806d-53c6715383a0","shared_citers":13},{"title":"On the Opportunities and Risks of Foundation Models","work_id":"a18039e9-928d-47c9-a836-32656a71bf71","shared_citers":12},{"title":"Tree of Thoughts: Deliberate Problem Solving with Large Language Models","work_id":"07adb06e-4ed5-4ec5-a7ae-ff288fd214fb","shared_citers":12},{"title":"Beyond the Imitation Game: Quantifying and extrapolating the capabilities of language models","work_id":"bb63abb3-0d50-4362-b97c-b5e725b03b39","shared_citers":11},{"title":"DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning","work_id":"e6b75ad5-2877-4168-97c8-710407094d20","shared_citers":11},{"title":"Attention Is All You Need","work_id":"baafb5a2-5272-43bc-932f-09fa9ffe5316","shared_citers":10}],"time_series":[{"n":6,"year":2022},{"n":16,"year":2023},{"n":3,"year":2024},{"n":1,"year":2025},{"n":94,"year":2026}]},"authors":[{"id":"9ddcb952-44b8-421e-b082-34b6c7ac1d39","orcid":null,"display_name":"Brian Ichter","source":"manual","import_confidence":0.72},{"id":"c5de7f09-6e32-4784-8fa7-9532f012735a","orcid":null,"display_name":"Dale Schuurmans","source":"manual","import_confidence":0.72},{"id":"bb693ea4-d2ec-4e98-abd9-408775f8657d","orcid":null,"display_name":"Fei Xia","source":"manual","import_confidence":0.72},{"id":"cb0687c7-02a4-474c-a3bd-ba5a6e0d00f9","orcid":null,"display_name":"Jason Wei","source":"manual","import_confidence":0.72},{"id":"0778e3ce-2eaf-448e-8c23-5a30ba22c9fb","orcid":null,"display_name":"Maarten Bosma","source":"manual","import_confidence":0.72},{"id":"7d44fff2-95d1-41fc-83c5-2969ba35b464","orcid":null,"display_name":"Xuezhi Wang","source":"manual","import_confidence":0.72}]}}