{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:QZPNLERNGAKNBUDI22PIJ77TFS","short_pith_number":"pith:QZPNLERN","canonical_record":{"source":{"id":"1911.08265","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-11-19T13:58:52Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"e1b6e9a101ccbe0c56a2ef0ef7c6625e447d26efc29afa6fac4b14d44e852264","abstract_canon_sha256":"c17ad5bd13adb1c24b6da393fcd423d7782deb1152195b6ec0860f51eaf91b91"},"schema_version":"1.0"},"canonical_sha256":"865ed5922d3014d0d068d69e84fff32caf82ec05d3984e27e9a7f6d3678b1b63","source":{"kind":"arxiv","id":"1911.08265","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1911.08265","created_at":"2026-05-17T23:38:46Z"},{"alias_kind":"arxiv_version","alias_value":"1911.08265v2","created_at":"2026-05-17T23:38:46Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1911.08265","created_at":"2026-05-17T23:38:46Z"},{"alias_kind":"pith_short_12","alias_value":"QZPNLERNGAKN","created_at":"2026-05-18T12:33:27Z"},{"alias_kind":"pith_short_16","alias_value":"QZPNLERNGAKNBUDI","created_at":"2026-05-18T12:33:27Z"},{"alias_kind":"pith_short_8","alias_value":"QZPNLERN","created_at":"2026-05-18T12:33:27Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:QZPNLERNGAKNBUDI22PIJ77TFS","target":"record","payload":{"canonical_record":{"source":{"id":"1911.08265","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-11-19T13:58:52Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"e1b6e9a101ccbe0c56a2ef0ef7c6625e447d26efc29afa6fac4b14d44e852264","abstract_canon_sha256":"c17ad5bd13adb1c24b6da393fcd423d7782deb1152195b6ec0860f51eaf91b91"},"schema_version":"1.0"},"canonical_sha256":"865ed5922d3014d0d068d69e84fff32caf82ec05d3984e27e9a7f6d3678b1b63","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:38:46.178211Z","signature_b64":"+EMevCl0ypFTcszIMVW0TtD9iSewfXpt/Q6rCryVm17+Q7ZxdTrG9hmcEIZjbYb4Z7Yg6eNzVvpMCw6q6feTCw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"865ed5922d3014d0d068d69e84fff32caf82ec05d3984e27e9a7f6d3678b1b63","last_reissued_at":"2026-05-17T23:38:46.177763Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:38:46.177763Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1911.08265","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:46Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"e5jxz0ojXUh2q0dt1YzsulwPWbaQzquwvmXE7963auBAybZJp4g+XCvnOa0NXOOqrQdvFjPhSXBWZiKRID1MBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-20T23:01:07.531670Z"},"content_sha256":"3d94405646cfe71fa1cfab56f667705c9df9ac3dbc2898e4def7064852b76f54","schema_version":"1.0","event_id":"sha256:3d94405646cfe71fa1cfab56f667705c9df9ac3dbc2898e4def7064852b76f54"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:QZPNLERNGAKNBUDI22PIJ77TFS","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Mastering Atari, Go, Chess and Shogi by Planning with a Learned Model","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"MuZero achieves superhuman performance in Atari, Go, chess and shogi by learning a model that predicts only the reward, policy and value needed for planning.","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Arthur Guez, David Silver, Demis Hassabis, Edward Lockhart, Ioannis Antonoglou, Julian Schrittwieser, Karen Simonyan, Laurent Sifre, Simon Schmitt, Thomas Hubert, Thore Graepel, Timothy Lillicrap","submitted_at":"2019-11-19T13:58:52Z","abstract_excerpt":"Constructing agents with planning capabilities has long been one of the main challenges in the pursuit of artificial intelligence. Tree-based planning methods have enjoyed huge success in challenging domains, such as chess and Go, where a perfect simulator is available. However, in real-world problems the dynamics governing the environment are often complex and unknown. In this work we present the MuZero algorithm which, by combining a tree-based search with a learned model, achieves superhuman performance in a range of challenging and visually complex domains, without any knowledge of their u"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"MuZero achieves superhuman performance in a range of challenging and visually complex domains, without any knowledge of their underlying dynamics.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That the learned model, when applied iteratively inside tree search, produces sufficiently accurate long-horizon predictions of reward, policy, and value to support effective planning even when the true dynamics are unknown and high-dimensional.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"MuZero matches or exceeds AlphaZero-level performance in Go, Chess, Shogi and sets a new state of the art on 57 Atari games by learning a model that directly supports planning rather than reconstructing full environment dynamics.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"MuZero achieves superhuman performance in Atari, Go, chess and shogi by learning a model that predicts only the reward, policy and value needed for planning.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"fc5d4d70e58143e2eb08613abb5d8fb5234c20e7280a7423dcc820c749673d35"},"source":{"id":"1911.08265","kind":"arxiv","version":2},"verdict":{"id":"d05ceb39-5633-41b4-9a1c-ba680c1e5d23","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-16T23:52:41.366359Z","strongest_claim":"MuZero achieves superhuman performance in a range of challenging and visually complex domains, without any knowledge of their underlying dynamics.","one_line_summary":"MuZero matches or exceeds AlphaZero-level performance in Go, Chess, Shogi and sets a new state of the art on 57 Atari games by learning a model that directly supports planning rather than reconstructing full environment dynamics.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That the learned model, when applied iteratively inside tree search, produces sufficiently accurate long-horizon predictions of reward, policy, and value to support effective planning even when the true dynamics are unknown and high-dimensional.","pith_extraction_headline":"MuZero achieves superhuman performance in Atari, Go, chess and shogi by learning a model that predicts only the reward, policy and value needed for planning."},"references":{"count":53,"sample":[{"doi":"","year":2018,"title":"Lipton, and Animashree Anandkumar","work_id":"89d8a872-e25f-4e79-971e-9aad2c2d136a","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2013,"title":"The arcade learning environment: An evaluation platform for general agents","work_id":"dd383516-d2cf-40d5-b95c-99ff0ca6f83d","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2018,"title":"Superhuman ai for heads-up no-limit poker: Libratus beats top profes- sionals","work_id":"2356edfc-3c56-477c-848e-709319c2218b","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2018,"title":"Learning and Querying Fast Generative Models for Reinforcement Learning","work_id":"45700551-6f99-4914-b123-083e4ac20e0a","ref_index":4,"cited_arxiv_id":"1802.03006","is_internal_anchor":true},{"doi":"","year":2002,"title":"Joseph Hoane, Jr., and Feng-hsiung Hsu","work_id":"313124b1-b65d-4318-85e2-cb28a84a6476","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":53,"snapshot_sha256":"fd65d6b50c28d5f2436bc2689d847b03db653ada1b97a74fc8a57871844fecde","internal_anchors":6},"formal_canon":{"evidence_count":2,"snapshot_sha256":"01cc8924ba4b674bba76f2d749503c125b742dbd28c604fa0de38be27d3fb9d8"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"d05ceb39-5633-41b4-9a1c-ba680c1e5d23"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:46Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"92noKgzLCjzuLRHy42alGHrvzU+/wwowlqnd3femXSaWCag5TzFItomC3hLZ+aA3gieqU9A7OiUdmUYQvhQ+Bw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-20T23:01:07.532597Z"},"content_sha256":"bb0780c7d5b68eb778a310ca7985fe5eac2121a9b8cd55cf0ac9dc310b4df1a3","schema_version":"1.0","event_id":"sha256:bb0780c7d5b68eb778a310ca7985fe5eac2121a9b8cd55cf0ac9dc310b4df1a3"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/QZPNLERNGAKNBUDI22PIJ77TFS/bundle.json","state_url":"https://pith.science/pith/QZPNLERNGAKNBUDI22PIJ77TFS/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/QZPNLERNGAKNBUDI22PIJ77TFS/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-20T23:01:07Z","links":{"resolver":"https://pith.science/pith/QZPNLERNGAKNBUDI22PIJ77TFS","bundle":"https://pith.science/pith/QZPNLERNGAKNBUDI22PIJ77TFS/bundle.json","state":"https://pith.science/pith/QZPNLERNGAKNBUDI22PIJ77TFS/state.json","well_known_bundle":"https://pith.science/.well-known/pith/QZPNLERNGAKNBUDI22PIJ77TFS/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:QZPNLERNGAKNBUDI22PIJ77TFS","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"c17ad5bd13adb1c24b6da393fcd423d7782deb1152195b6ec0860f51eaf91b91","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-11-19T13:58:52Z","title_canon_sha256":"e1b6e9a101ccbe0c56a2ef0ef7c6625e447d26efc29afa6fac4b14d44e852264"},"schema_version":"1.0","source":{"id":"1911.08265","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1911.08265","created_at":"2026-05-17T23:38:46Z"},{"alias_kind":"arxiv_version","alias_value":"1911.08265v2","created_at":"2026-05-17T23:38:46Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1911.08265","created_at":"2026-05-17T23:38:46Z"},{"alias_kind":"pith_short_12","alias_value":"QZPNLERNGAKN","created_at":"2026-05-18T12:33:27Z"},{"alias_kind":"pith_short_16","alias_value":"QZPNLERNGAKNBUDI","created_at":"2026-05-18T12:33:27Z"},{"alias_kind":"pith_short_8","alias_value":"QZPNLERN","created_at":"2026-05-18T12:33:27Z"}],"graph_snapshots":[{"event_id":"sha256:bb0780c7d5b68eb778a310ca7985fe5eac2121a9b8cd55cf0ac9dc310b4df1a3","target":"graph","created_at":"2026-05-17T23:38:46Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"MuZero achieves superhuman performance in a range of challenging and visually complex domains, without any knowledge of their underlying dynamics."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That the learned model, when applied iteratively inside tree search, produces sufficiently accurate long-horizon predictions of reward, policy, and value to support effective planning even when the true dynamics are unknown and high-dimensional."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"MuZero matches or exceeds AlphaZero-level performance in Go, Chess, Shogi and sets a new state of the art on 57 Atari games by learning a model that directly supports planning rather than reconstructing full environment dynamics."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"MuZero achieves superhuman performance in Atari, Go, chess and shogi by learning a model that predicts only the reward, policy and value needed for planning."}],"snapshot_sha256":"fc5d4d70e58143e2eb08613abb5d8fb5234c20e7280a7423dcc820c749673d35"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"01cc8924ba4b674bba76f2d749503c125b742dbd28c604fa0de38be27d3fb9d8"},"paper":{"abstract_excerpt":"Constructing agents with planning capabilities has long been one of the main challenges in the pursuit of artificial intelligence. Tree-based planning methods have enjoyed huge success in challenging domains, such as chess and Go, where a perfect simulator is available. However, in real-world problems the dynamics governing the environment are often complex and unknown. In this work we present the MuZero algorithm which, by combining a tree-based search with a learned model, achieves superhuman performance in a range of challenging and visually complex domains, without any knowledge of their u","authors_text":"Arthur Guez, David Silver, Demis Hassabis, Edward Lockhart, Ioannis Antonoglou, Julian Schrittwieser, Karen Simonyan, Laurent Sifre, Simon Schmitt, Thomas Hubert, Thore Graepel, Timothy Lillicrap","cross_cats":["stat.ML"],"headline":"MuZero achieves superhuman performance in Atari, Go, chess and shogi by learning a model that predicts only the reward, policy and value needed for planning.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-11-19T13:58:52Z","title":"Mastering Atari, Go, Chess and Shogi by Planning with a Learned Model"},"references":{"count":53,"internal_anchors":6,"resolved_work":53,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"Lipton, and Animashree Anandkumar","work_id":"89d8a872-e25f-4e79-971e-9aad2c2d136a","year":2018},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"The arcade learning environment: An evaluation platform for general agents","work_id":"dd383516-d2cf-40d5-b95c-99ff0ca6f83d","year":2013},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"Superhuman ai for heads-up no-limit poker: Libratus beats top profes- sionals","work_id":"2356edfc-3c56-477c-848e-709319c2218b","year":2018},{"cited_arxiv_id":"1802.03006","doi":"","is_internal_anchor":true,"ref_index":4,"title":"Learning and Querying Fast Generative Models for Reinforcement Learning","work_id":"45700551-6f99-4914-b123-083e4ac20e0a","year":2018},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"Joseph Hoane, Jr., and Feng-hsiung Hsu","work_id":"313124b1-b65d-4318-85e2-cb28a84a6476","year":2002}],"snapshot_sha256":"fd65d6b50c28d5f2436bc2689d847b03db653ada1b97a74fc8a57871844fecde"},"source":{"id":"1911.08265","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-16T23:52:41.366359Z","id":"d05ceb39-5633-41b4-9a1c-ba680c1e5d23","model_set":{"reader":"grok-4.3"},"one_line_summary":"MuZero matches or exceeds AlphaZero-level performance in Go, Chess, Shogi and sets a new state of the art on 57 Atari games by learning a model that directly supports planning rather than reconstructing full environment dynamics.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"MuZero achieves superhuman performance in Atari, Go, chess and shogi by learning a model that predicts only the reward, policy and value needed for planning.","strongest_claim":"MuZero achieves superhuman performance in a range of challenging and visually complex domains, without any knowledge of their underlying dynamics.","weakest_assumption":"That the learned model, when applied iteratively inside tree search, produces sufficiently accurate long-horizon predictions of reward, policy, and value to support effective planning even when the true dynamics are unknown and high-dimensional."}},"verdict_id":"d05ceb39-5633-41b4-9a1c-ba680c1e5d23"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:3d94405646cfe71fa1cfab56f667705c9df9ac3dbc2898e4def7064852b76f54","target":"record","created_at":"2026-05-17T23:38:46Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"c17ad5bd13adb1c24b6da393fcd423d7782deb1152195b6ec0860f51eaf91b91","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-11-19T13:58:52Z","title_canon_sha256":"e1b6e9a101ccbe0c56a2ef0ef7c6625e447d26efc29afa6fac4b14d44e852264"},"schema_version":"1.0","source":{"id":"1911.08265","kind":"arxiv","version":2}},"canonical_sha256":"865ed5922d3014d0d068d69e84fff32caf82ec05d3984e27e9a7f6d3678b1b63","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"865ed5922d3014d0d068d69e84fff32caf82ec05d3984e27e9a7f6d3678b1b63","first_computed_at":"2026-05-17T23:38:46.177763Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:38:46.177763Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"+EMevCl0ypFTcszIMVW0TtD9iSewfXpt/Q6rCryVm17+Q7ZxdTrG9hmcEIZjbYb4Z7Yg6eNzVvpMCw6q6feTCw==","signature_status":"signed_v1","signed_at":"2026-05-17T23:38:46.178211Z","signed_message":"canonical_sha256_bytes"},"source_id":"1911.08265","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:3d94405646cfe71fa1cfab56f667705c9df9ac3dbc2898e4def7064852b76f54","sha256:bb0780c7d5b68eb778a310ca7985fe5eac2121a9b8cd55cf0ac9dc310b4df1a3"],"state_sha256":"c32c8ed2a75c3b871b6bd9ba9b371928a77ed1b0104977ef114e8878d9ad4151"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"z9Rex3SKCpqH7hMonnpqyk/TWfXR5LrujKJ9JXNh0L69ed099MtH+f5NkKkBeR1bLzILOZ8M1DWfwNy5SxDyDA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-20T23:01:07.536226Z","bundle_sha256":"63f9f4dfa01539deccf2d3ce228ea3f248f4a5971e00e6f4dcbfd9864653a771"}}