{"work":{"id":"b687fcea-fa95-4504-9ab2-8a627cae0000","openalex_id":null,"doi":null,"arxiv_id":"2102.12092","raw_key":null,"title":"Zero-Shot Text-to-Image Generation","authors":null,"authors_text":"Aditya Ramesh, Mikhail Pavlov, Gabriel Goh, Scott Gray, Chelsea Voss, Alec Radford","year":2021,"venue":"cs.CV","abstract":"Text-to-image generation has traditionally focused on finding better modeling assumptions for training on a fixed dataset. These assumptions might involve complex architectures, auxiliary losses, or side information such as object part labels or segmentation masks supplied during training. We describe a simple approach for this task based on a transformer that autoregressively models the text and image tokens as a single stream of data. With sufficient data and scale, our approach is competitive with previous domain-specific models when evaluated in a zero-shot fashion.","external_url":"https://arxiv.org/abs/2102.12092","cited_by_count":null,"metadata_source":"pith","metadata_fetched_at":"2026-05-24T11:09:22.459881+00:00","pith_arxiv_id":"2102.12092","created_at":"2026-05-10T16:55:57.861171+00:00","updated_at":"2026-06-05T21:23:00.469572+00:00","title_quality_ok":true,"display_title":"Zero-Shot Text-to-Image Generation","render_title":"Zero-Shot Text-to-Image Generation"},"hub":{"state":{"work_id":"b687fcea-fa95-4504-9ab2-8a627cae0000","tier":"hub","tier_reason":"10+ Pith inbound or 1,000+ external citations","pith_inbound_count":33,"external_cited_by_count":null,"distinct_field_count":7,"first_pith_cited_at":"2021-04-20T17:58:03+00:00","last_pith_cited_at":"2026-05-20T08:01:45+00:00","author_build_status":"not_needed","summary_status":"needed","contexts_status":"needed","graph_status":"needed","ask_index_status":"not_needed","reader_status":"not_needed","recognition_status":"not_needed","updated_at":"2026-06-07T15:52:05.637077+00:00","tier_text":"hub"},"tier":"hub","role_counts":[{"context_role":"background","n":8},{"context_role":"method","n":2}],"polarity_counts":[{"context_polarity":"background","n":7},{"context_polarity":"use_method","n":2},{"context_polarity":"unclear","n":1}],"runs":{},"summary":{},"graph":{},"authors":[]}}