{"work":{"id":"5103f4be-344a-4139-8504-eaa59f5bac9d","openalex_id":null,"doi":null,"arxiv_id":"1912.01603","raw_key":null,"title":"Dream to Control: Learning Behaviors by Latent Imagination","authors":null,"authors_text":"Danijar Hafner, Timothy Lillicrap, Jimmy Ba, Mohammad Norouzi","year":2019,"venue":"cs.LG","abstract":"Learned world models summarize an agent's experience to facilitate learning complex behaviors. While learning world models from high-dimensional sensory inputs is becoming feasible through deep learning, there are many potential ways for deriving behaviors from them. We present Dreamer, a reinforcement learning agent that solves long-horizon tasks from images purely by latent imagination. We efficiently learn behaviors by propagating analytic gradients of learned state values back through trajectories imagined in the compact state space of a learned world model. On 20 challenging visual control tasks, Dreamer exceeds existing approaches in data-efficiency, computation time, and final performance.","external_url":"https://arxiv.org/abs/1912.01603","cited_by_count":null,"metadata_source":"pith","metadata_fetched_at":"2026-05-25T07:50:29.217783+00:00","pith_arxiv_id":"1912.01603","created_at":"2026-05-10T06:11:20.800695+00:00","updated_at":"2026-05-25T07:50:29.217783+00:00","title_quality_ok":true,"display_title":"Dream to Control: Learning Behaviors by Latent Imagination","render_title":"Dream to Control: Learning Behaviors by Latent Imagination"},"hub":{"state":{"work_id":"5103f4be-344a-4139-8504-eaa59f5bac9d","tier":"hub","tier_reason":"10+ Pith inbound or 1,000+ external citations","pith_inbound_count":71,"external_cited_by_count":null,"distinct_field_count":8,"first_pith_cited_at":"2020-10-05T17:52:14+00:00","last_pith_cited_at":"2026-05-21T08:34:57+00:00","author_build_status":"not_needed","summary_status":"needed","contexts_status":"needed","graph_status":"needed","ask_index_status":"not_needed","reader_status":"not_needed","recognition_status":"not_needed","updated_at":"2026-05-31T21:32:49.146245+00:00","tier_text":"hub"},"tier":"hub","role_counts":[{"context_role":"background","n":20},{"context_role":"method","n":1}],"polarity_counts":[{"context_polarity":"background","n":20},{"context_polarity":"use_method","n":1}],"runs":{"context_extract":{"job_type":"context_extract","status":"succeeded","result":{"enqueued_papers":25},"error":null,"updated_at":"2026-05-14T18:09:36.146799+00:00"},"graph_features":{"job_type":"graph_features","status":"succeeded","result":{"co_cited":[{"title":"Mastering Diverse Domains through World Models","work_id":"6aeb260f-8c7c-4f9c-b98b-067cd7c59acd","shared_citers":17},{"title":"World Models","work_id":"07227eee-8445-4c98-bce4-c6a6fd5ed907","shared_citers":16},{"title":"OpenVLA: An Open-Source Vision-Language-Action Model","work_id":"3e7e65c5-5aed-4fe9-8414-2092bcb31cc7","shared_citers":12},{"title":"$\\pi_0$: A Vision-Language-Action Flow Model for General Robot Control","work_id":"f790abdc-a796-482f-a40d-f8ee035ecfc2","shared_citers":10},{"title":"//arxiv.org/abs/2010.02193","work_id":"154f6f5f-bb34-456d-8107-45d5b51433ce","shared_citers":10},{"title":"$\\pi_{0.5}$: a Vision-Language-Action Model with Open-World Generalization","work_id":"d1ad7304-d09a-49bc-809e-846439f6aff9","shared_citers":9},{"title":"Proximal Policy Optimization Algorithms","work_id":"240c67fe-d14d-4520-91c1-38a4e272ca19","shared_citers":9},{"title":"Auto-Encoding Variational Bayes","work_id":"97d95295-30e1-42b4-bbf6-85f0fa4edb44","shared_citers":7},{"title":"Cosmos World Foundation Model Platform for Physical AI","work_id":"a2dba24c-318d-476a-8b21-4289c265810c","shared_citers":7},{"title":"Revisiting Feature Prediction for Learning Visual Representations from Video","work_id":"f7251dcf-5341-4915-bfe7-27812387b61a","shared_citers":7},{"title":"V-JEPA 2: Self-Supervised Video Models Enable Understanding, Prediction and Planning","work_id":"a9c28401-f16a-4933-89f0-788e2f94e52b","shared_citers":7},{"title":"//arxiv.org/abs/1811.04551","work_id":"146fc4a4-6db2-43a9-a57f-4dd133c0d315","shared_citers":6},{"title":"CogVideoX: Text-to-Video Diffusion Models with An Expert Transformer","work_id":"f38fc088-12aa-4bf4-9ecd-08d3e797ccb7","shared_citers":6},{"title":"Fine-Tuning Vision-Language-Action Models: Optimizing Speed and Success","work_id":"04f46bb3-4346-47e8-bf09-c75d91f96e87","shared_citers":6},{"title":"Flow Matching for Generative Modeling","work_id":"6edb71c4-5d64-40af-a394-9757ea051a36","shared_citers":6},{"title":"GAIA-1: A Generative World Model for Autonomous Driving","work_id":"313484e6-a442-4522-8e19-d07e502844a8","shared_citers":6},{"title":"GR00T N1: An Open Foundation Model for Generalist Humanoid Robots","work_id":"e2db69c7-ee8a-4cb7-a761-7b8de1dfcf97","shared_citers":6},{"title":"RT-2: Vision-Language-Action Models Transfer Web Knowledge to Robotic Control","work_id":"ff438a8a-8003-4fae-9131-acd418b3597b","shared_citers":6},{"title":"TD-MPC2: Scalable, Robust World Models for Continuous Control","work_id":"360ec5fb-79fd-4490-bc73-3d161609c42d","shared_citers":6},{"title":"Wan: Open and Advanced Large-Scale Video Generative Models","work_id":"ad3ebc3b-4224-46c9-b61d-bcf135da0a7c","shared_citers":6},{"title":"DROID: A Large-Scale In-The-Wild Robot Manipulation Dataset","work_id":"13253de2-3d89-415c-8c2f-3adb25d4c337","shared_citers":5},{"title":"Genie envisioner: A unified world foundation platform for robotic manipulation","work_id":"440ad435-44ba-4acd-9aeb-21dd3ee04835","shared_citers":5},{"title":"Kaiser, M","work_id":"edc1a23e-c421-4569-ab9e-83b204eeb0fa","shared_citers":5},{"title":"RoboCasa: Large-Scale Simulation of Everyday Tasks for Generalist Robots","work_id":"11232b35-bd17-402a-9234-951c46015815","shared_citers":5}],"time_series":[{"n":1,"year":2023},{"n":3,"year":2025},{"n":32,"year":2026}],"dependency_candidates":[]},"error":null,"updated_at":"2026-05-14T18:09:45.755247+00:00"},"identity_refresh":{"job_type":"identity_refresh","status":"succeeded","result":{"items":[{"title":"Qwen3 Technical Report","outcome":"unchanged","work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e","resolver":"local_arxiv","confidence":0.98,"old_work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e"}],"counts":{"fixed":0,"merged":0,"unchanged":1,"quarantined":0,"needs_external_resolution":0},"errors":[],"attempted":1},"error":null,"updated_at":"2026-05-14T18:10:00.322398+00:00"},"summary_claims":{"job_type":"summary_claims","status":"succeeded","result":{"title":"Dream to Control: Learning Behaviors by Latent Imagination","claims":[{"claim_text":"Learned world models summarize an agent's experience to facilitate learning complex behaviors. While learning world models from high-dimensional sensory inputs is becoming feasible through deep learning, there are many potential ways for deriving behaviors from them. We present Dreamer, a reinforcement learning agent that solves long-horizon tasks from images purely by latent imagination. We efficiently learn behaviors by propagating analytic gradients of learned state values back through trajectories imagined in the compact state space of a learned world model. On 20 challenging visual contro","claim_type":"abstract","evidence_strength":"source_metadata"},{"claim_text":"Langugae-Conditoned MoCoGAN [29], U-Net [30], Latte [ 31], Wan [32], Sora 2 [ 33]. . . Embodied World Model SWIM [34], DreamDojo [ 35], RoboDreamer [36], RoboScape [37]. . . WM for VLA Imitation Learning Ctrl-World [38], RoboScape [37], DREMA [ 39] Reinforcement Learning Dreamer to Control [ 40] DreamerV2 [ 41], Dreamer 4 [ 42], RISE [ 43] DreamerV3 [44], DayDreamer [45], World-Env [46], RoboScape-R [47] WMPO [48], WoVR [49], VLA-RFT [50], RWML [51], MoDem-V2 [52] World-Gymnast [53], RWM-U [54],","claim_type":"background","confidence":0.85,"evidence_strength":"citation_context"}],"why_cited":"Pith tracks Dream to Control: Learning Behaviors by Latent Imagination because it crossed a citation-hub threshold. Current citing contexts most often use it as background evidence (1 contexts).","role_counts":[{"n":1,"context_role":"background"}]},"error":null,"updated_at":"2026-05-14T18:09:36.158989+00:00"}},"summary":{"title":"Dream to Control: Learning Behaviors by Latent Imagination","claims":[{"claim_text":"Learned world models summarize an agent's experience to facilitate learning complex behaviors. While learning world models from high-dimensional sensory inputs is becoming feasible through deep learning, there are many potential ways for deriving behaviors from them. We present Dreamer, a reinforcement learning agent that solves long-horizon tasks from images purely by latent imagination. We efficiently learn behaviors by propagating analytic gradients of learned state values back through trajectories imagined in the compact state space of a learned world model. On 20 challenging visual contro","claim_type":"abstract","evidence_strength":"source_metadata"},{"claim_text":"Langugae-Conditoned MoCoGAN [29], U-Net [30], Latte [ 31], Wan [32], Sora 2 [ 33]. . . Embodied World Model SWIM [34], DreamDojo [ 35], RoboDreamer [36], RoboScape [37]. . . WM for VLA Imitation Learning Ctrl-World [38], RoboScape [37], DREMA [ 39] Reinforcement Learning Dreamer to Control [ 40] DreamerV2 [ 41], Dreamer 4 [ 42], RISE [ 43] DreamerV3 [44], DayDreamer [45], World-Env [46], RoboScape-R [47] WMPO [48], WoVR [49], VLA-RFT [50], RWML [51], MoDem-V2 [52] World-Gymnast [53], RWM-U [54],","claim_type":"background","confidence":0.85,"evidence_strength":"citation_context"}],"why_cited":"Pith tracks Dream to Control: Learning Behaviors by Latent Imagination because it crossed a citation-hub threshold. Current citing contexts most often use it as background evidence (1 contexts).","role_counts":[{"n":1,"context_role":"background"}]},"graph":{"co_cited":[{"title":"Mastering Diverse Domains through World Models","work_id":"6aeb260f-8c7c-4f9c-b98b-067cd7c59acd","shared_citers":17},{"title":"World Models","work_id":"07227eee-8445-4c98-bce4-c6a6fd5ed907","shared_citers":16},{"title":"OpenVLA: An Open-Source Vision-Language-Action Model","work_id":"3e7e65c5-5aed-4fe9-8414-2092bcb31cc7","shared_citers":12},{"title":"$\\pi_0$: A Vision-Language-Action Flow Model for General Robot Control","work_id":"f790abdc-a796-482f-a40d-f8ee035ecfc2","shared_citers":10},{"title":"//arxiv.org/abs/2010.02193","work_id":"154f6f5f-bb34-456d-8107-45d5b51433ce","shared_citers":10},{"title":"$\\pi_{0.5}$: a Vision-Language-Action Model with Open-World Generalization","work_id":"d1ad7304-d09a-49bc-809e-846439f6aff9","shared_citers":9},{"title":"Proximal Policy Optimization Algorithms","work_id":"240c67fe-d14d-4520-91c1-38a4e272ca19","shared_citers":9},{"title":"Auto-Encoding Variational Bayes","work_id":"97d95295-30e1-42b4-bbf6-85f0fa4edb44","shared_citers":7},{"title":"Cosmos World Foundation Model Platform for Physical AI","work_id":"a2dba24c-318d-476a-8b21-4289c265810c","shared_citers":7},{"title":"Revisiting Feature Prediction for Learning Visual Representations from Video","work_id":"f7251dcf-5341-4915-bfe7-27812387b61a","shared_citers":7},{"title":"V-JEPA 2: Self-Supervised Video Models Enable Understanding, Prediction and Planning","work_id":"a9c28401-f16a-4933-89f0-788e2f94e52b","shared_citers":7},{"title":"//arxiv.org/abs/1811.04551","work_id":"146fc4a4-6db2-43a9-a57f-4dd133c0d315","shared_citers":6},{"title":"CogVideoX: Text-to-Video Diffusion Models with An Expert Transformer","work_id":"f38fc088-12aa-4bf4-9ecd-08d3e797ccb7","shared_citers":6},{"title":"Fine-Tuning Vision-Language-Action Models: Optimizing Speed and Success","work_id":"04f46bb3-4346-47e8-bf09-c75d91f96e87","shared_citers":6},{"title":"Flow Matching for Generative Modeling","work_id":"6edb71c4-5d64-40af-a394-9757ea051a36","shared_citers":6},{"title":"GAIA-1: A Generative World Model for Autonomous Driving","work_id":"313484e6-a442-4522-8e19-d07e502844a8","shared_citers":6},{"title":"GR00T N1: An Open Foundation Model for Generalist Humanoid Robots","work_id":"e2db69c7-ee8a-4cb7-a761-7b8de1dfcf97","shared_citers":6},{"title":"RT-2: Vision-Language-Action Models Transfer Web Knowledge to Robotic Control","work_id":"ff438a8a-8003-4fae-9131-acd418b3597b","shared_citers":6},{"title":"TD-MPC2: Scalable, Robust World Models for Continuous Control","work_id":"360ec5fb-79fd-4490-bc73-3d161609c42d","shared_citers":6},{"title":"Wan: Open and Advanced Large-Scale Video Generative Models","work_id":"ad3ebc3b-4224-46c9-b61d-bcf135da0a7c","shared_citers":6},{"title":"DROID: A Large-Scale In-The-Wild Robot Manipulation Dataset","work_id":"13253de2-3d89-415c-8c2f-3adb25d4c337","shared_citers":5},{"title":"Genie envisioner: A unified world foundation platform for robotic manipulation","work_id":"440ad435-44ba-4acd-9aeb-21dd3ee04835","shared_citers":5},{"title":"Kaiser, M","work_id":"edc1a23e-c421-4569-ab9e-83b204eeb0fa","shared_citers":5},{"title":"RoboCasa: Large-Scale Simulation of Everyday Tasks for Generalist Robots","work_id":"11232b35-bd17-402a-9234-951c46015815","shared_citers":5}],"time_series":[{"n":1,"year":2023},{"n":3,"year":2025},{"n":32,"year":2026}],"dependency_candidates":[]},"authors":[]}}