{"work":{"id":"4a49f413-bca9-4de8-8620-97aa4cb099f3","openalex_id":null,"doi":null,"arxiv_id":"2402.17764","raw_key":null,"title":"The Era of 1-bit LLMs: All Large Language Models are in 1.58 Bits","authors":null,"authors_text":"Shuming Ma, Hongyu Wang, Lingxiao Ma, Lei Wang, Wenhui Wang, Shaohan Huang","year":2024,"venue":"cs.CL","abstract":"Recent research, such as BitNet, is paving the way for a new era of 1-bit Large Language Models (LLMs). In this work, we introduce a 1-bit LLM variant, namely BitNet b1.58, in which every single parameter (or weight) of the LLM is ternary {-1, 0, 1}. It matches the full-precision (i.e., FP16 or BF16) Transformer LLM with the same model size and training tokens in terms of both perplexity and end-task performance, while being significantly more cost-effective in terms of latency, memory, throughput, and energy consumption. More profoundly, the 1.58-bit LLM defines a new scaling law and recipe for training new generations of LLMs that are both high-performance and cost-effective. Furthermore, it enables a new computation paradigm and opens the door for designing specific hardware optimized for 1-bit LLMs.","external_url":"https://arxiv.org/abs/2402.17764","cited_by_count":null,"metadata_source":"pith","metadata_fetched_at":"2026-05-23T01:05:16.306329+00:00","pith_arxiv_id":"2402.17764","created_at":"2026-05-09T02:27:22.360808+00:00","updated_at":"2026-06-05T21:23:00.469572+00:00","title_quality_ok":true,"display_title":"The Era of 1-bit LLMs: All Large Language Models are in 1.58 Bits","render_title":"The Era of 1-bit LLMs: All Large Language Models are in 1.58 Bits"},"hub":{"state":{"work_id":"4a49f413-bca9-4de8-8620-97aa4cb099f3","tier":"hub","tier_reason":"10+ Pith inbound or 1,000+ external citations","pith_inbound_count":26,"external_cited_by_count":null,"distinct_field_count":8,"first_pith_cited_at":"2025-03-11T09:41:29+00:00","last_pith_cited_at":"2026-05-21T07:27:22+00:00","author_build_status":"not_needed","summary_status":"needed","contexts_status":"needed","graph_status":"needed","ask_index_status":"not_needed","reader_status":"not_needed","recognition_status":"not_needed","updated_at":"2026-06-06T05:20:19.471928+00:00","tier_text":"hub"},"tier":"hub","role_counts":[{"context_role":"background","n":2},{"context_role":"dataset","n":1}],"polarity_counts":[{"context_polarity":"background","n":1},{"context_polarity":"unclear","n":1},{"context_polarity":"use_dataset","n":1}],"runs":{},"summary":{},"graph":{},"authors":[]}}