{"work":{"id":"6830fb5e-ffe7-4200-8b19-a56a5152a37a","openalex_id":null,"doi":null,"arxiv_id":"2210.11610","raw_key":null,"title":"Large Language Models Can Self-Improve","authors":null,"authors_text":"Jiaxin Huang, Shixiang Shane Gu, Le Hou, Yuexin Wu, Xuezhi Wang, Hongkun Yu","year":2022,"venue":"cs.CL","abstract":"Large Language Models (LLMs) have achieved excellent performances in various tasks. However, fine-tuning an LLM requires extensive supervision. Human, on the other hand, may improve their reasoning abilities by self-thinking without external inputs. In this work, we demonstrate that an LLM is also capable of self-improving with only unlabeled datasets. We use a pre-trained LLM to generate \"high-confidence\" rationale-augmented answers for unlabeled questions using Chain-of-Thought prompting and self-consistency, and fine-tune the LLM using those self-generated solutions as target outputs. We show that our approach improves the general reasoning ability of a 540B-parameter LLM (74.4%->82.1% on GSM8K, 78.2%->83.0% on DROP, 90.0%->94.4% on OpenBookQA, and 63.4%->67.9% on ANLI-A3) and achieves state-of-the-art-level performance, without any ground truth label. We conduct ablation studies and show that fine-tuning on reasoning is critical for self-improvement.","external_url":"https://arxiv.org/abs/2210.11610","cited_by_count":null,"metadata_source":"pith","metadata_fetched_at":"2026-05-24T09:34:16.995759+00:00","pith_arxiv_id":"2210.11610","created_at":"2026-05-10T05:41:01.930306+00:00","updated_at":"2026-05-24T09:34:16.995759+00:00","title_quality_ok":true,"display_title":"Large Language Models Can Self-Improve","render_title":"Large Language Models Can Self-Improve"},"hub":{"state":{"work_id":"6830fb5e-ffe7-4200-8b19-a56a5152a37a","tier":"hub","tier_reason":"10+ Pith inbound or 1,000+ external citations","pith_inbound_count":23,"external_cited_by_count":null,"distinct_field_count":6,"first_pith_cited_at":"2023-02-23T14:02:47+00:00","last_pith_cited_at":"2026-05-16T22:52:11+00:00","author_build_status":"not_needed","summary_status":"needed","contexts_status":"needed","graph_status":"needed","ask_index_status":"not_needed","reader_status":"not_needed","recognition_status":"not_needed","updated_at":"2026-06-02T05:44:21.770946+00:00","tier_text":"hub"},"tier":"hub","role_counts":[{"context_role":"background","n":4},{"context_role":"method","n":1}],"polarity_counts":[{"context_polarity":"background","n":4},{"context_polarity":"use_method","n":1}],"runs":{},"summary":{},"graph":{},"authors":[]}}