{"work":{"id":"25e8bd3d-e51c-43ae-8126-4ea6ecdb3321","openalex_id":null,"doi":null,"arxiv_id":"2505.13211","raw_key":null,"title":"MAGI-1: Autoregressive Video Generation at Scale","authors":null,"authors_text":"Sand.ai, Hansi Teng, Hongyu Jia, Lei Sun, Lingzhi Li, Maolin Li","year":2025,"venue":"cs.CV","abstract":"We present MAGI-1, a world model that generates videos by autoregressively predicting a sequence of video chunks, defined as fixed-length segments of consecutive frames. Trained to denoise per-chunk noise that increases monotonically over time, MAGI-1 enables causal temporal modeling and naturally supports streaming generation. It achieves strong performance on image-to-video (I2V) tasks conditioned on text instructions, providing high temporal consistency and scalability, which are made possible by several algorithmic innovations and a dedicated infrastructure stack. MAGI-1 facilitates controllable generation via chunk-wise prompting and supports real-time, memory-efficient deployment by maintaining constant peak inference cost, regardless of video length. The largest variant of MAGI-1 comprises 24 billion parameters and supports context lengths of up to 4 million tokens, demonstrating the scalability and robustness of our approach. The code and models are available at https://github.com/SandAI-org/MAGI-1 and https://github.com/SandAI-org/MagiAttention. The product can be accessed at https://sand.ai.","external_url":"https://arxiv.org/abs/2505.13211","cited_by_count":null,"metadata_source":"pith","metadata_fetched_at":"2026-05-25T04:40:23.653437+00:00","pith_arxiv_id":"2505.13211","created_at":"2026-05-09T06:40:39.122062+00:00","updated_at":"2026-05-25T04:40:23.653437+00:00","title_quality_ok":true,"display_title":"MAGI-1: Autoregressive Video Generation at Scale","render_title":"MAGI-1: Autoregressive Video Generation at Scale"},"hub":{"state":{"work_id":"25e8bd3d-e51c-43ae-8126-4ea6ecdb3321","tier":"hub","tier_reason":"10+ Pith inbound or 1,000+ external citations","pith_inbound_count":55,"external_cited_by_count":null,"distinct_field_count":3,"first_pith_cited_at":"2025-08-18T15:28:53+00:00","last_pith_cited_at":"2026-05-22T14:51:22+00:00","author_build_status":"not_needed","summary_status":"needed","contexts_status":"needed","graph_status":"needed","ask_index_status":"not_needed","reader_status":"not_needed","recognition_status":"not_needed","updated_at":"2026-05-26T04:36:15.462022+00:00","tier_text":"hub"},"tier":"hub","role_counts":[{"context_role":"background","n":15},{"context_role":"baseline","n":2},{"context_role":"dataset","n":1},{"context_role":"method","n":1}],"polarity_counts":[{"context_polarity":"background","n":14},{"context_polarity":"baseline","n":2},{"context_polarity":"unclear","n":1},{"context_polarity":"use_dataset","n":1},{"context_polarity":"use_method","n":1}],"runs":{},"summary":{},"graph":{},"authors":[]}}