{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:AU5IKNFXWIS4PZMYUNDJHEDR2J","short_pith_number":"pith:AU5IKNFX","schema_version":"1.0","canonical_sha256":"053a8534b7b225c7e598a346939071d24c1d6131ca1668d34530b08ecdba1ff3","source":{"kind":"arxiv","id":"1712.01815","version":1},"attestation_state":"computed","paper":{"title":"Mastering Chess and Shogi by Self-Play with a General Reinforcement Learning Algorithm","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.AI","authors_text":"Arthur Guez, David Silver, Demis Hassabis, Dharshan Kumaran, Ioannis Antonoglou, Julian Schrittwieser, Karen Simonyan, Laurent Sifre, Marc Lanctot, Matthew Lai, Thomas Hubert, Thore Graepel, Timothy Lillicrap","submitted_at":"2017-12-05T18:45:38Z","abstract_excerpt":"The game of chess is the most widely-studied domain in the history of artificial intelligence. The strongest programs are based on a combination of sophisticated search techniques, domain-specific adaptations, and handcrafted evaluation functions that have been refined by human experts over several decades. In contrast, the AlphaGo Zero program recently achieved superhuman performance in the game of Go, by tabula rasa reinforcement learning from games of self-play. In this paper, we generalise this approach into a single AlphaZero algorithm that can achieve, tabula rasa, superhuman performance"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1712.01815","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-12-05T18:45:38Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"a80b15bd39d80a5f99e4892ac2cda76c6d7f926b9451c8bba70545844bd9e71c","abstract_canon_sha256":"46eb0d06c1b453dfc244d1e0a7df8ab26ef0b70d52acd4c9b6c068c3fc42e3e1"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:28:47.429957Z","signature_b64":"Gcg53BD0AUlgJUHQFs9v+3KoctYNaGabz5W02GvseJVUJcX4jzgx0YVsR4xxzK9+NOtHqiaFfHmLdaBrWVzsAQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"053a8534b7b225c7e598a346939071d24c1d6131ca1668d34530b08ecdba1ff3","last_reissued_at":"2026-05-18T00:28:47.429237Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:28:47.429237Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Mastering Chess and Shogi by Self-Play with a General Reinforcement Learning Algorithm","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.AI","authors_text":"Arthur Guez, David Silver, Demis Hassabis, Dharshan Kumaran, Ioannis Antonoglou, Julian Schrittwieser, Karen Simonyan, Laurent Sifre, Marc Lanctot, Matthew Lai, Thomas Hubert, Thore Graepel, Timothy Lillicrap","submitted_at":"2017-12-05T18:45:38Z","abstract_excerpt":"The game of chess is the most widely-studied domain in the history of artificial intelligence. The strongest programs are based on a combination of sophisticated search techniques, domain-specific adaptations, and handcrafted evaluation functions that have been refined by human experts over several decades. In contrast, the AlphaGo Zero program recently achieved superhuman performance in the game of Go, by tabula rasa reinforcement learning from games of self-play. In this paper, we generalise this approach into a single AlphaZero algorithm that can achieve, tabula rasa, superhuman performance"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1712.01815","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1712.01815","created_at":"2026-05-18T00:28:47.429356+00:00"},{"alias_kind":"arxiv_version","alias_value":"1712.01815v1","created_at":"2026-05-18T00:28:47.429356+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1712.01815","created_at":"2026-05-18T00:28:47.429356+00:00"},{"alias_kind":"pith_short_12","alias_value":"AU5IKNFXWIS4","created_at":"2026-05-18T12:31:08.081275+00:00"},{"alias_kind":"pith_short_16","alias_value":"AU5IKNFXWIS4PZMY","created_at":"2026-05-18T12:31:08.081275+00:00"},{"alias_kind":"pith_short_8","alias_value":"AU5IKNFX","created_at":"2026-05-18T12:31:08.081275+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":45,"internal_anchor_count":32,"sample":[{"citing_arxiv_id":"1906.09114","citing_title":"Near-optimal Bayesian Solution For Unknown Discrete Markov Decision Process","ref_index":24,"is_internal_anchor":true},{"citing_arxiv_id":"1906.09627","citing_title":"Inductive general game playing","ref_index":70,"is_internal_anchor":true},{"citing_arxiv_id":"1906.10124","citing_title":"On Multi-Agent Learning in Team Sports Games","ref_index":14,"is_internal_anchor":true},{"citing_arxiv_id":"1906.12266","citing_title":"Growing Action Spaces","ref_index":9,"is_internal_anchor":true},{"citing_arxiv_id":"2605.23643","citing_title":"Less Effort, Shorter Proofs: Reinforcement Learning for Security Protocol Analysis in Tamarin","ref_index":34,"is_internal_anchor":true},{"citing_arxiv_id":"1907.06508","citing_title":"General Board Game Playing for Education and Research in Generic AI Game Learning","ref_index":2,"is_internal_anchor":true},{"citing_arxiv_id":"2310.02635","citing_title":"Reinforcement Learning with Foundation Priors: Let the Embodied Agent Efficiently Learn on Its Own","ref_index":10,"is_internal_anchor":true},{"citing_arxiv_id":"2009.03393","citing_title":"Generative Language Modeling for Automated Theorem Proving","ref_index":23,"is_internal_anchor":true},{"citing_arxiv_id":"2503.01804","citing_title":"$\\texttt{SEM-CTRL}$: Semantically Controlled Decoding","ref_index":3,"is_internal_anchor":true},{"citing_arxiv_id":"2504.13541","citing_title":"Scalable Multi-Task Learning through Spiking Neural Networks with Adaptive Task-Switching Policy for Intelligent Autonomous Agents","ref_index":29,"is_internal_anchor":true},{"citing_arxiv_id":"2507.23773","citing_title":"General Agentic Planning Through Simulative Reasoning with World Models","ref_index":55,"is_internal_anchor":true},{"citing_arxiv_id":"2510.02590","citing_title":"Use the Online Network If You Can: Towards Fast and Stable Reinforcement Learning","ref_index":8,"is_internal_anchor":true},{"citing_arxiv_id":"2512.18552","citing_title":"Toward Training Superintelligent Software Agents through Self-Play SWE-RL","ref_index":39,"is_internal_anchor":true},{"citing_arxiv_id":"2602.08167","citing_title":"Self-Supervised Bootstrapping of Action-Predictive Embodied Reasoning","ref_index":67,"is_internal_anchor":true},{"citing_arxiv_id":"2605.10930","citing_title":"Evaluating the False Trust Engendered by LLM Explanations","ref_index":3,"is_internal_anchor":true},{"citing_arxiv_id":"2605.16692","citing_title":"EfficientTDMPC: Improved MPC Objectives for Sample-Efficient Continuous Control","ref_index":17,"is_internal_anchor":true},{"citing_arxiv_id":"2605.19235","citing_title":"GAE Falls Short in Imperfect-Information Self-Play Reinforcement Learning","ref_index":36,"is_internal_anchor":true},{"citing_arxiv_id":"2605.17037","citing_title":"D$^2$Evo: Dual Difficulty-Aware Self-Evolution for Data-Efficient Reinforcement Learning","ref_index":14,"is_internal_anchor":true},{"citing_arxiv_id":"2509.00338","citing_title":"Scalable Option Learning in High-Throughput Environments","ref_index":58,"is_internal_anchor":true},{"citing_arxiv_id":"2511.08717","citing_title":"Optimal control of the future via prospective learning with control","ref_index":31,"is_internal_anchor":true},{"citing_arxiv_id":"2503.13377","citing_title":"Time-R1: Post-Training Large Vision Language Model for Temporal Video Grounding","ref_index":47,"is_internal_anchor":true},{"citing_arxiv_id":"2502.03387","citing_title":"LIMO: Less is More for Reasoning","ref_index":89,"is_internal_anchor":true},{"citing_arxiv_id":"2305.14992","citing_title":"Reasoning with Language Model is Planning with World Model","ref_index":130,"is_internal_anchor":true},{"citing_arxiv_id":"2310.04406","citing_title":"Language Agent Tree Search Unifies Reasoning Acting and Planning in Language Models","ref_index":7,"is_internal_anchor":true},{"citing_arxiv_id":"2512.13961","citing_title":"Olmo 3","ref_index":4,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/AU5IKNFXWIS4PZMYUNDJHEDR2J","json":"https://pith.science/pith/AU5IKNFXWIS4PZMYUNDJHEDR2J.json","graph_json":"https://pith.science/api/pith-number/AU5IKNFXWIS4PZMYUNDJHEDR2J/graph.json","events_json":"https://pith.science/api/pith-number/AU5IKNFXWIS4PZMYUNDJHEDR2J/events.json","paper":"https://pith.science/paper/AU5IKNFX"},"agent_actions":{"view_html":"https://pith.science/pith/AU5IKNFXWIS4PZMYUNDJHEDR2J","download_json":"https://pith.science/pith/AU5IKNFXWIS4PZMYUNDJHEDR2J.json","view_paper":"https://pith.science/paper/AU5IKNFX","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1712.01815&json=true","fetch_graph":"https://pith.science/api/pith-number/AU5IKNFXWIS4PZMYUNDJHEDR2J/graph.json","fetch_events":"https://pith.science/api/pith-number/AU5IKNFXWIS4PZMYUNDJHEDR2J/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/AU5IKNFXWIS4PZMYUNDJHEDR2J/action/timestamp_anchor","attest_storage":"https://pith.science/pith/AU5IKNFXWIS4PZMYUNDJHEDR2J/action/storage_attestation","attest_author":"https://pith.science/pith/AU5IKNFXWIS4PZMYUNDJHEDR2J/action/author_attestation","sign_citation":"https://pith.science/pith/AU5IKNFXWIS4PZMYUNDJHEDR2J/action/citation_signature","submit_replication":"https://pith.science/pith/AU5IKNFXWIS4PZMYUNDJHEDR2J/action/replication_record"}},"created_at":"2026-05-18T00:28:47.429356+00:00","updated_at":"2026-05-18T00:28:47.429356+00:00"}