{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2020:5AKCLSOEPX7ZECUB2MCPVE5A72","short_pith_number":"pith:5AKCLSOE","schema_version":"1.0","canonical_sha256":"e81425c9c47dff920a81d304fa93a0febb85398e4b4caafbd2e82996b0336154","source":{"kind":"arxiv","id":"2010.02193","version":4},"attestation_state":"computed","paper":{"title":"Mastering Atari with Discrete World Models","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"DreamerV2 achieves human-level performance on Atari by learning behaviors inside a separately trained discrete world model.","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Danijar Hafner, Jimmy Ba, Mohammad Norouzi, Timothy Lillicrap","submitted_at":"2020-10-05T17:52:14Z","abstract_excerpt":"Intelligent agents need to generalize from past experience to achieve goals in complex environments. World models facilitate such generalization and allow learning behaviors from imagined outcomes to increase sample-efficiency. While learning world models from image inputs has recently become feasible for some tasks, modeling Atari games accurately enough to derive successful behaviors has remained an open challenge for many years. We introduce DreamerV2, a reinforcement learning agent that learns behaviors purely from predictions in the compact latent space of a powerful world model. The worl"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":true,"formal_links_present":true},"canonical_record":{"source":{"id":"2010.02193","kind":"arxiv","version":4},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2020-10-05T17:52:14Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"603fc9f15cf6bed973e38f100f387b0308723a9449bd143904c5590fc7ef33ee","abstract_canon_sha256":"5ce5127337c9fa31dec90f012feca927d55fef23576a1fc97d5066dd02ea866b"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:39:05.134101Z","signature_b64":"vGPzpNnN3TkpspXRn7MDCdIhFa9SU2Y9qDeeuKoivHYhViFrc12UIeuMtsMcos0aD4Qbwq7YeZl/izg/vGo/BA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"e81425c9c47dff920a81d304fa93a0febb85398e4b4caafbd2e82996b0336154","last_reissued_at":"2026-05-17T23:39:05.133505Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:39:05.133505Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Mastering Atari with Discrete World Models","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"DreamerV2 achieves human-level performance on Atari by learning behaviors inside a separately trained discrete world model.","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Danijar Hafner, Jimmy Ba, Mohammad Norouzi, Timothy Lillicrap","submitted_at":"2020-10-05T17:52:14Z","abstract_excerpt":"Intelligent agents need to generalize from past experience to achieve goals in complex environments. World models facilitate such generalization and allow learning behaviors from imagined outcomes to increase sample-efficiency. While learning world models from image inputs has recently become feasible for some tasks, modeling Atari games accurately enough to derive successful behaviors has remained an open challenge for many years. We introduce DreamerV2, a reinforcement learning agent that learns behaviors purely from predictions in the compact latent space of a powerful world model. The worl"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"DreamerV2 constitutes the first agent that achieves human-level performance on the Atari benchmark of 55 tasks by learning behaviors inside a separately trained world model.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That the learned discrete world model remains sufficiently accurate over the multi-step imagined trajectories used for policy optimization, without compounding errors that would invalidate the imagined returns.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"DreamerV2 reaches human-level performance on 55 Atari games by learning behaviors inside a separately trained discrete-latent world model.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"DreamerV2 achieves human-level performance on Atari by learning behaviors inside a separately trained discrete world model.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"b888d1f8367efcc45c8e083c9f04922fcda7e14e06a8085e45be447aaafce288"},"source":{"id":"2010.02193","kind":"arxiv","version":4},"verdict":{"id":"f5d18503-e20e-4ffc-8306-ea795f7035f4","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-15T01:21:23.129242Z","strongest_claim":"DreamerV2 constitutes the first agent that achieves human-level performance on the Atari benchmark of 55 tasks by learning behaviors inside a separately trained world model.","one_line_summary":"DreamerV2 reaches human-level performance on 55 Atari games by learning behaviors inside a separately trained discrete-latent world model.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That the learned discrete world model remains sufficiently accurate over the multi-step imagined trajectories used for policy optimization, without compounding errors that would invalidate the imagined returns.","pith_extraction_headline":"DreamerV2 achieves human-level performance on Atari by learning behaviors inside a separately trained discrete world model."},"references":{"count":56,"sample":[{"doi":"","year":null,"title":"H., and Levine, S","work_id":"2b4f01f7-2946-42ed-ad06-677913824304","ref_index":1,"cited_arxiv_id":"1710.11252","is_internal_anchor":true},{"doi":"","year":2003,"title":"Agent57: Outperforming the Atari Human Benchmark","work_id":"1ebaef4b-4a53-434a-96c4-8c645798ae71","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"A distributional perspective on rein- forcement learning.arXiv preprint arXiv:1707.06887","work_id":"79879eb4-5aa1-4764-9aaa-505a0a1c0f7f","ref_index":3,"cited_arxiv_id":"1707.06887","is_internal_anchor":true},{"doi":"","year":null,"title":"Estimating or Propagating Gradients Through Stochastic Neurons for Conditional Computation","work_id":"1fe8c7c8-aff7-4b94-9096-e549d7e60789","ref_index":4,"cited_arxiv_id":"1308.3432","is_internal_anchor":true},{"doi":"","year":null,"title":"Learning and Querying Fast Generative Models for Reinforcement Learning","work_id":"45700551-6f99-4914-b123-083e4ac20e0a","ref_index":5,"cited_arxiv_id":"1802.03006","is_internal_anchor":true}],"resolved_work":56,"snapshot_sha256":"d506c9036b0f063443309964941bfe3c726c3800f8812ee2169e139c0a4de2c5","internal_anchors":41},"formal_canon":{"evidence_count":2,"snapshot_sha256":"a861284b54472c82477bb4ead6f023fe9141056c0c6be6e432d72b653494f3ac"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2010.02193","created_at":"2026-05-17T23:39:05.133602+00:00"},{"alias_kind":"arxiv_version","alias_value":"2010.02193v4","created_at":"2026-05-17T23:39:05.133602+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2010.02193","created_at":"2026-05-17T23:39:05.133602+00:00"},{"alias_kind":"pith_short_12","alias_value":"5AKCLSOEPX7Z","created_at":"2026-05-18T12:33:33.725879+00:00"},{"alias_kind":"pith_short_16","alias_value":"5AKCLSOEPX7ZECUB","created_at":"2026-05-18T12:33:33.725879+00:00"},{"alias_kind":"pith_short_8","alias_value":"5AKCLSOE","created_at":"2026-05-18T12:33:33.725879+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":44,"internal_anchor_count":44,"sample":[{"citing_arxiv_id":"2506.14135","citing_title":"GAF: Gaussian Action Field as a 4D Representation for Dynamic World Modeling in Robotic Manipulation","ref_index":19,"is_internal_anchor":true},{"citing_arxiv_id":"2412.12870","citing_title":"Physically Interpretable World Models via Weakly Supervised Representation Learning","ref_index":16,"is_internal_anchor":true},{"citing_arxiv_id":"2605.21800","citing_title":"stable-worldmodel: A Platform for Reproducible World Modeling Research and Evaluation","ref_index":45,"is_internal_anchor":true},{"citing_arxiv_id":"2605.19376","citing_title":"Generative Recursive Reasoning","ref_index":37,"is_internal_anchor":true},{"citing_arxiv_id":"2605.16030","citing_title":"Mind Dreamer: Untethering Imagination via Active Causal Intervention on Latent Manifolds","ref_index":7,"is_internal_anchor":true},{"citing_arxiv_id":"2605.15618","citing_title":"Latent Video Prediction Learns Better World Models","ref_index":13,"is_internal_anchor":true},{"citing_arxiv_id":"2605.19376","citing_title":"Generative Recursive Reasoning","ref_index":37,"is_internal_anchor":true},{"citing_arxiv_id":"2605.15256","citing_title":"ReactiveGWM: Steering NPC in Reactive Game World Models","ref_index":15,"is_internal_anchor":true},{"citing_arxiv_id":"2605.15477","citing_title":"EgoExo-WM: Unlocking Exo Video for Ego World Models","ref_index":32,"is_internal_anchor":true},{"citing_arxiv_id":"2505.21996","citing_title":"VRAG: Learning World Models for Interactive Video Generation","ref_index":3,"is_internal_anchor":true},{"citing_arxiv_id":"2506.05762","citing_title":"BiTrajDiff: Bidirectional Trajectory Generation with Diffusion Models for Offline Reinforcement Learning","ref_index":13,"is_internal_anchor":true},{"citing_arxiv_id":"2106.01345","citing_title":"Decision Transformer: Reinforcement Learning via Sequence Modeling","ref_index":21,"is_internal_anchor":true},{"citing_arxiv_id":"2509.20869","citing_title":"Model-Based Reinforcement Learning under Random Observation Delays","ref_index":12,"is_internal_anchor":true},{"citing_arxiv_id":"2509.19538","citing_title":"DAWM: Diffusion Action World Models for Offline Reinforcement Learning via Action-Inferred Transitions","ref_index":11,"is_internal_anchor":true},{"citing_arxiv_id":"2509.24948","citing_title":"World-Env: Leveraging World Model as a Virtual Environment for VLA Post-Training","ref_index":10,"is_internal_anchor":true},{"citing_arxiv_id":"2411.04983","citing_title":"DINO-WM: World Models on Pre-trained Visual Features enable Zero-shot Planning","ref_index":22,"is_internal_anchor":true},{"citing_arxiv_id":"2505.15659","citing_title":"FLARE: Robot Learning with Implicit World Modeling","ref_index":19,"is_internal_anchor":true},{"citing_arxiv_id":"2305.14992","citing_title":"Reasoning with Language Model is Planning with World Model","ref_index":151,"is_internal_anchor":true},{"citing_arxiv_id":"2309.16797","citing_title":"Promptbreeder: Self-Referential Self-Improvement Via Prompt Evolution","ref_index":134,"is_internal_anchor":true},{"citing_arxiv_id":"2310.06114","citing_title":"Learning Interactive Real-World Simulators","ref_index":246,"is_internal_anchor":true},{"citing_arxiv_id":"2510.10125","citing_title":"Ctrl-World: A Controllable Generative World Model for Robot Manipulation","ref_index":18,"is_internal_anchor":true},{"citing_arxiv_id":"2603.19312","citing_title":"LeWorldModel: Stable End-to-End Joint-Embedding Predictive Architecture from Pixels","ref_index":30,"is_internal_anchor":true},{"citing_arxiv_id":"2509.24527","citing_title":"Training Agents Inside of Scalable World Models","ref_index":66,"is_internal_anchor":true},{"citing_arxiv_id":"2605.13013","citing_title":"JEDI: Joint Embedding Diffusion World Model for Online Model-Based Reinforcement Learning","ref_index":10,"is_internal_anchor":true},{"citing_arxiv_id":"2604.04974","citing_title":"From Video to Control: A Survey of Learning Manipulation Interfaces from Temporal Visual Data","ref_index":41,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":2,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/5AKCLSOEPX7ZECUB2MCPVE5A72","json":"https://pith.science/pith/5AKCLSOEPX7ZECUB2MCPVE5A72.json","graph_json":"https://pith.science/api/pith-number/5AKCLSOEPX7ZECUB2MCPVE5A72/graph.json","events_json":"https://pith.science/api/pith-number/5AKCLSOEPX7ZECUB2MCPVE5A72/events.json","paper":"https://pith.science/paper/5AKCLSOE"},"agent_actions":{"view_html":"https://pith.science/pith/5AKCLSOEPX7ZECUB2MCPVE5A72","download_json":"https://pith.science/pith/5AKCLSOEPX7ZECUB2MCPVE5A72.json","view_paper":"https://pith.science/paper/5AKCLSOE","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2010.02193&json=true","fetch_graph":"https://pith.science/api/pith-number/5AKCLSOEPX7ZECUB2MCPVE5A72/graph.json","fetch_events":"https://pith.science/api/pith-number/5AKCLSOEPX7ZECUB2MCPVE5A72/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/5AKCLSOEPX7ZECUB2MCPVE5A72/action/timestamp_anchor","attest_storage":"https://pith.science/pith/5AKCLSOEPX7ZECUB2MCPVE5A72/action/storage_attestation","attest_author":"https://pith.science/pith/5AKCLSOEPX7ZECUB2MCPVE5A72/action/author_attestation","sign_citation":"https://pith.science/pith/5AKCLSOEPX7ZECUB2MCPVE5A72/action/citation_signature","submit_replication":"https://pith.science/pith/5AKCLSOEPX7ZECUB2MCPVE5A72/action/replication_record"}},"created_at":"2026-05-17T23:39:05.133602+00:00","updated_at":"2026-05-17T23:39:05.133602+00:00"}