{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:QZPNLERNGAKNBUDI22PIJ77TFS","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"c17ad5bd13adb1c24b6da393fcd423d7782deb1152195b6ec0860f51eaf91b91","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-11-19T13:58:52Z","title_canon_sha256":"e1b6e9a101ccbe0c56a2ef0ef7c6625e447d26efc29afa6fac4b14d44e852264"},"schema_version":"1.0","source":{"id":"1911.08265","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1911.08265","created_at":"2026-05-17T23:38:46Z"},{"alias_kind":"arxiv_version","alias_value":"1911.08265v2","created_at":"2026-05-17T23:38:46Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1911.08265","created_at":"2026-05-17T23:38:46Z"},{"alias_kind":"pith_short_12","alias_value":"QZPNLERNGAKN","created_at":"2026-05-18T12:33:27Z"},{"alias_kind":"pith_short_16","alias_value":"QZPNLERNGAKNBUDI","created_at":"2026-05-18T12:33:27Z"},{"alias_kind":"pith_short_8","alias_value":"QZPNLERN","created_at":"2026-05-18T12:33:27Z"}],"graph_snapshots":[{"event_id":"sha256:bb0780c7d5b68eb778a310ca7985fe5eac2121a9b8cd55cf0ac9dc310b4df1a3","target":"graph","created_at":"2026-05-17T23:38:46Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"MuZero achieves superhuman performance in a range of challenging and visually complex domains, without any knowledge of their underlying dynamics."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That the learned model, when applied iteratively inside tree search, produces sufficiently accurate long-horizon predictions of reward, policy, and value to support effective planning even when the true dynamics are unknown and high-dimensional."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"MuZero matches or exceeds AlphaZero-level performance in Go, Chess, Shogi and sets a new state of the art on 57 Atari games by learning a model that directly supports planning rather than reconstructing full environment dynamics."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"MuZero achieves superhuman performance in Atari, Go, chess and shogi by learning a model that predicts only the reward, policy and value needed for planning."}],"snapshot_sha256":"fc5d4d70e58143e2eb08613abb5d8fb5234c20e7280a7423dcc820c749673d35"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"01cc8924ba4b674bba76f2d749503c125b742dbd28c604fa0de38be27d3fb9d8"},"paper":{"abstract_excerpt":"Constructing agents with planning capabilities has long been one of the main challenges in the pursuit of artificial intelligence. Tree-based planning methods have enjoyed huge success in challenging domains, such as chess and Go, where a perfect simulator is available. However, in real-world problems the dynamics governing the environment are often complex and unknown. In this work we present the MuZero algorithm which, by combining a tree-based search with a learned model, achieves superhuman performance in a range of challenging and visually complex domains, without any knowledge of their u","authors_text":"Arthur Guez, David Silver, Demis Hassabis, Edward Lockhart, Ioannis Antonoglou, Julian Schrittwieser, Karen Simonyan, Laurent Sifre, Simon Schmitt, Thomas Hubert, Thore Graepel, Timothy Lillicrap","cross_cats":["stat.ML"],"headline":"MuZero achieves superhuman performance in Atari, Go, chess and shogi by learning a model that predicts only the reward, policy and value needed for planning.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-11-19T13:58:52Z","title":"Mastering Atari, Go, Chess and Shogi by Planning with a Learned Model"},"references":{"count":53,"internal_anchors":6,"resolved_work":53,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"Lipton, and Animashree Anandkumar","work_id":"89d8a872-e25f-4e79-971e-9aad2c2d136a","year":2018},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"The arcade learning environment: An evaluation platform for general agents","work_id":"dd383516-d2cf-40d5-b95c-99ff0ca6f83d","year":2013},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"Superhuman ai for heads-up no-limit poker: Libratus beats top profes- sionals","work_id":"2356edfc-3c56-477c-848e-709319c2218b","year":2018},{"cited_arxiv_id":"1802.03006","doi":"","is_internal_anchor":true,"ref_index":4,"title":"Learning and Querying Fast Generative Models for Reinforcement Learning","work_id":"45700551-6f99-4914-b123-083e4ac20e0a","year":2018},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"Joseph Hoane, Jr., and Feng-hsiung Hsu","work_id":"313124b1-b65d-4318-85e2-cb28a84a6476","year":2002}],"snapshot_sha256":"fd65d6b50c28d5f2436bc2689d847b03db653ada1b97a74fc8a57871844fecde"},"source":{"id":"1911.08265","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-16T23:52:41.366359Z","id":"d05ceb39-5633-41b4-9a1c-ba680c1e5d23","model_set":{"reader":"grok-4.3"},"one_line_summary":"MuZero matches or exceeds AlphaZero-level performance in Go, Chess, Shogi and sets a new state of the art on 57 Atari games by learning a model that directly supports planning rather than reconstructing full environment dynamics.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"MuZero achieves superhuman performance in Atari, Go, chess and shogi by learning a model that predicts only the reward, policy and value needed for planning.","strongest_claim":"MuZero achieves superhuman performance in a range of challenging and visually complex domains, without any knowledge of their underlying dynamics.","weakest_assumption":"That the learned model, when applied iteratively inside tree search, produces sufficiently accurate long-horizon predictions of reward, policy, and value to support effective planning even when the true dynamics are unknown and high-dimensional."}},"verdict_id":"d05ceb39-5633-41b4-9a1c-ba680c1e5d23"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:3d94405646cfe71fa1cfab56f667705c9df9ac3dbc2898e4def7064852b76f54","target":"record","created_at":"2026-05-17T23:38:46Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"c17ad5bd13adb1c24b6da393fcd423d7782deb1152195b6ec0860f51eaf91b91","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-11-19T13:58:52Z","title_canon_sha256":"e1b6e9a101ccbe0c56a2ef0ef7c6625e447d26efc29afa6fac4b14d44e852264"},"schema_version":"1.0","source":{"id":"1911.08265","kind":"arxiv","version":2}},"canonical_sha256":"865ed5922d3014d0d068d69e84fff32caf82ec05d3984e27e9a7f6d3678b1b63","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"865ed5922d3014d0d068d69e84fff32caf82ec05d3984e27e9a7f6d3678b1b63","first_computed_at":"2026-05-17T23:38:46.177763Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:38:46.177763Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"+EMevCl0ypFTcszIMVW0TtD9iSewfXpt/Q6rCryVm17+Q7ZxdTrG9hmcEIZjbYb4Z7Yg6eNzVvpMCw6q6feTCw==","signature_status":"signed_v1","signed_at":"2026-05-17T23:38:46.178211Z","signed_message":"canonical_sha256_bytes"},"source_id":"1911.08265","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:3d94405646cfe71fa1cfab56f667705c9df9ac3dbc2898e4def7064852b76f54","sha256:bb0780c7d5b68eb778a310ca7985fe5eac2121a9b8cd55cf0ac9dc310b4df1a3"],"state_sha256":"c32c8ed2a75c3b871b6bd9ba9b371928a77ed1b0104977ef114e8878d9ad4151"}