{"work":{"id":"1ff30834-09de-4b27-9602-0bd5ea024dd0","openalex_id":"https://openalex.org/W2145339207","doi":"10.1038/nature14236","arxiv_id":null,"raw_key":null,"title":"Human-level control through deep reinforcement learning","authors":[{"given":"Volodymyr","family":"Mnih","sequence":"first","affiliation":[]},{"given":"Koray","family":"Kavukcuoglu","sequence":"additional","affiliation":[]},{"given":"David","family":"Silver","sequence":"additional","affiliation":[]},{"given":"Andrei A.","family":"Rusu","sequence":"additional","affiliation":[]},{"given":"Joel","family":"Veness","sequence":"additional","affiliation":[]},{"given":"Marc G.","family":"Bellemare","sequence":"additional","affiliation":[]},{"given":"Alex","family":"Graves","sequence":"additional","affiliation":[]},{"given":"Martin","family":"Riedmiller","sequence":"additional","affiliation":[]},{"given":"Andreas K.","family":"Fidjeland","sequence":"additional","affiliation":[]},{"given":"Georg","family":"Ostrovski","sequence":"additional","affiliation":[]},{"given":"Stig","family":"Petersen","sequence":"additional","affiliation":[]},{"given":"Charles","family":"Beattie","sequence":"additional","affiliation":[]},{"given":"Amir","family":"Sadik","sequence":"additional","affiliation":[]},{"given":"Ioannis","family":"Antonoglou","sequence":"additional","affiliation":[]},{"given":"Helen","family":"King","sequence":"additional","affiliation":[]},{"given":"Dharshan","family":"Kumaran","sequence":"additional","affiliation":[]},{"given":"Daan","family":"Wierstra","sequence":"additional","affiliation":[]},{"given":"Shane","family":"Legg","sequence":"additional","affiliation":[]},{"given":"Demis","family":"Hassabis","sequence":"additional","affiliation":[]}],"authors_text":"V","year":2015,"venue":"Nature","abstract":null,"external_url":"https://doi.org/10.1038/nature14236","cited_by_count":22645,"metadata_source":"doi_reference","metadata_fetched_at":"2026-05-25T17:46:05.767979+00:00","pith_arxiv_id":null,"created_at":"2026-05-08T18:44:01.452548+00:00","updated_at":"2026-05-25T17:46:05.767979+00:00","title_quality_ok":true,"display_title":"Rusu, Joel Veness, Marc G","render_title":"Rusu, Joel Veness, Marc G"},"hub":{"state":{"work_id":"1ff30834-09de-4b27-9602-0bd5ea024dd0","tier":"super_hub","tier_reason":"100+ Pith inbound or 10,000+ external citations","pith_inbound_count":29,"external_cited_by_count":22645,"distinct_field_count":11,"first_pith_cited_at":"2019-06-24T05:37:58+00:00","last_pith_cited_at":"2026-05-22T12:31:18+00:00","author_build_status":"needed","summary_status":"needed","contexts_status":"needed","graph_status":"needed","ask_index_status":"needed","reader_status":"not_needed","recognition_status":"not_needed","updated_at":"2026-05-28T09:58:26.436230+00:00","tier_text":"super_hub"},"tier":"super_hub","role_counts":[{"context_role":"background","n":4},{"context_role":"method","n":2},{"context_role":"baseline","n":1}],"polarity_counts":[{"context_polarity":"background","n":3},{"context_polarity":"use_method","n":2},{"context_polarity":"baseline","n":1},{"context_polarity":"unclear","n":1}],"runs":{"ask_index":{"job_type":"ask_index","status":"succeeded","result":{"title":"Rusu, Joel Veness, Marc G","claims":[],"why_cited":"Pith tracks Rusu, Joel Veness, Marc G because it crossed a citation-hub threshold.","role_counts":[]},"error":null,"updated_at":"2026-05-14T18:39:34.444227+00:00"},"author_expand":{"job_type":"author_expand","status":"succeeded","result":{"authors_linked":[{"id":"86d7ce36-293b-4b1d-9886-ff6dc1068818","orcid":null,"display_name":"Volodymyr Mnih"},{"id":"9d8577f6-4451-4e27-a0d2-3e66e22c0c4f","orcid":null,"display_name":"Koray Kavukcuoglu"},{"id":"c6fd3013-f440-4f0f-a73d-da5b40715448","orcid":null,"display_name":"David Silver"},{"id":"8add448b-7e2c-41ff-b581-9a375f6cbc46","orcid":null,"display_name":"Andrei A. Rusu"},{"id":"818a31da-186b-45aa-be91-1c39949e23a4","orcid":null,"display_name":"Joel Veness"},{"id":"7026d45d-b38f-4005-89c8-4b1c13ec99c5","orcid":null,"display_name":"Marc G. Bellemare"},{"id":"d96aa666-85ce-48ea-bd97-e08bc711dd3f","orcid":null,"display_name":"Alex Graves"},{"id":"4c65da08-ae53-4a60-a847-fdaa6481b67b","orcid":null,"display_name":"Martin Riedmiller"},{"id":"e2efc0fa-f410-47f1-89a1-6aa4a8b2230d","orcid":null,"display_name":"Andreas K. Fidjeland"},{"id":"9772b573-60f0-42ef-ad21-dd05e6e5f0de","orcid":null,"display_name":"Georg Ostrovski"},{"id":"ed72766a-376e-4e84-8270-7fbce7fcfde9","orcid":null,"display_name":"Stig Petersen"},{"id":"feb2fa51-a335-4f95-95c8-69cf3d4ab43d","orcid":null,"display_name":"Charles Beattie"}]},"error":null,"updated_at":"2026-05-14T18:39:29.456622+00:00"},"context_extract":{"job_type":"context_extract","status":"succeeded","result":{"enqueued_papers":25},"error":null,"updated_at":"2026-05-14T18:40:00.529115+00:00"},"graph_features":{"job_type":"graph_features","status":"succeeded","result":{"co_cited":[{"title":"Proximal Policy Optimization Algorithms","work_id":"240c67fe-d14d-4520-91c1-38a4e272ca19","shared_citers":4},{"title":"LeCun, Y","work_id":"f959cefa-9092-49df-9fb5-a4e6654500f1","shared_citers":3},{"title":"Planning and acting in partially observable stochastic domains","work_id":"b3ea0098-466e-4f58-abfe-1641bbce18de","shared_citers":3},{"title":"Q -learning","work_id":"56b15667-338b-4f66-8bf6-e2c1e429cbbb","shared_citers":3},{"title":"Soft Actor-Critic: Off-Policy Maximum Entropy Deep Reinforcement Learning with a Stochastic Actor","work_id":"6674e5db-4e1c-49c0-b598-c108a0ecadb6","shared_citers":3},{"title":"Weihao Tan, Ziluo Ding, Wentao Zhang, Boyu Li, Bohan Zhou, Junpeng Yue, Haochong Xia, Jiechuan Jiang, Longtao Zheng, Xinrun Xu, Yifei Bi, Pengjie Gu, Xinrun Wang, B ¨orje F","work_id":"3f16fe58-d421-46e9-adaa-c38090223a36","shared_citers":3},{"title":"Asynchronous methods for deep reinforcement learning.arXiv preprint arXiv:1602.01783","work_id":"b54058f9-0b3b-46ae-a9a0-f22748625e2a","shared_citers":2},{"title":"AWAC: Accelerating Online Reinforcement Learning with Offline Datasets","work_id":"f0a11265-1acf-4ffc-a822-08bd04b6bddf","shared_citers":2},{"title":"Compute-optimal scaling for value-based deep rl","work_id":"b5c1a21a-c316-4024-8ce9-c6108e5b5c50","shared_citers":2},{"title":"Efﬁcient online reinforcement learning with ofﬂine data","work_id":"dd85f6f4-3190-4462-afb4-4f6a12115555","shared_citers":2},{"title":"Language Models are Few-Shot Learners","work_id":"214732c0-2edd-44a0-af9e-28184a2b8279","shared_citers":2},{"title":"Learning representations by back-propagating errors.Nature1986,323, 533–536","work_id":"2d37f26a-0175-4c28-ab1f-1d76fc2b1082","shared_citers":2},{"title":"Meta-world: A benchmark and evaluation for multi-task and meta reinforcement learning.CoRR, abs/1910.10897","work_id":"42b3081e-43bd-451c-b777-259c81be7953","shared_citers":2},{"title":"Offline Reinforcement Learning with Implicit Q-Learning","work_id":"4adca4ff-8975-49b3-aee4-2ef7e0f95275","shared_citers":2},{"title":"Playing Atari with Deep Reinforcement Learning","work_id":"736a8ddf-e365-4940-ad58-4699fddedb86","shared_citers":2},{"title":"PonderNet: Learning to ponder.arXiv preprint arXiv:2106.01345","work_id":"a0295445-c5e8-43f7-95e4-eec56e7c0d07","shared_citers":2},{"title":"Puterman","work_id":"02ff1fe9-a285-487d-a56f-1e5ddd479dd7","shared_citers":2},{"title":"Rainbow: Combining Improvements in Deep Reinforcement Learning , October 2017","work_id":"d0dd9ed8-9406-49f8-aa8c-ffef61ca5e50","shared_citers":2},{"title":"Scaling Laws for Neural Language Models","work_id":"b7dd8749-9c45-4977-ab9b-64478dce1ae8","shared_citers":2},{"title":"Stabilizing oﬀ-policy q-learning via bootstrapping error reduction.arXiv preprint arXiv:1906.00949","work_id":"5245a2f6-b3e1-4edb-8d34-fb132fc9ccdc","shared_citers":2},{"title":"Tarasov, V","work_id":"08ad3e5a-4806-4a75-b300-70d46dafd1a0","shared_citers":2},{"title":"Williams","work_id":"469b3b81-55f9-4542-9dd7-570a63cfda74","shared_citers":2},{"title":"Ziegler, Ryan Lowe, Chelsea V oss, Alec Radford, Dario Amodei, and Paul Christiano","work_id":"1fae7759-93bb-4cba-a0d5-ebff515b9d39","shared_citers":2},{"title":"$\\pi_{0.5}$: a Vision-Language-Action Model with Open-World Generalization","work_id":"d1ad7304-d09a-49bc-809e-846439f6aff9","shared_citers":1}],"time_series":[{"n":1,"year":2019},{"n":1,"year":2024},{"n":12,"year":2026}],"dependency_candidates":[]},"error":null,"updated_at":"2026-05-14T18:39:39.082794+00:00"},"identity_refresh":{"job_type":"identity_refresh","status":"succeeded","result":{"items":[{"title":"Qwen3 Technical Report","outcome":"unchanged","work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e","resolver":"local_arxiv","confidence":0.98,"old_work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e"}],"counts":{"fixed":0,"merged":0,"unchanged":1,"quarantined":0,"needs_external_resolution":0},"errors":[],"attempted":1},"error":null,"updated_at":"2026-05-14T18:39:34.275212+00:00"},"role_polarity":{"job_type":"role_polarity","status":"succeeded","result":{"title":"Rusu, Joel Veness, Marc G","claims":[],"why_cited":"Pith tracks Rusu, Joel Veness, Marc G because it crossed a citation-hub threshold.","role_counts":[]},"error":null,"updated_at":"2026-05-14T18:40:04.544920+00:00"},"summary_claims":{"job_type":"summary_claims","status":"succeeded","result":{"title":"Rusu, Joel Veness, Marc G","claims":[],"why_cited":"Pith tracks Rusu, Joel Veness, Marc G because it crossed a citation-hub threshold.","role_counts":[]},"error":null,"updated_at":"2026-05-14T18:39:56.642613+00:00"}},"summary":{"title":"Rusu, Joel Veness, Marc G","claims":[],"why_cited":"Pith tracks Rusu, Joel Veness, Marc G because it crossed a citation-hub threshold.","role_counts":[]},"graph":{"co_cited":[{"title":"Proximal Policy Optimization Algorithms","work_id":"240c67fe-d14d-4520-91c1-38a4e272ca19","shared_citers":4},{"title":"LeCun, Y","work_id":"f959cefa-9092-49df-9fb5-a4e6654500f1","shared_citers":3},{"title":"Planning and acting in partially observable stochastic domains","work_id":"b3ea0098-466e-4f58-abfe-1641bbce18de","shared_citers":3},{"title":"Q -learning","work_id":"56b15667-338b-4f66-8bf6-e2c1e429cbbb","shared_citers":3},{"title":"Soft Actor-Critic: Off-Policy Maximum Entropy Deep Reinforcement Learning with a Stochastic Actor","work_id":"6674e5db-4e1c-49c0-b598-c108a0ecadb6","shared_citers":3},{"title":"Weihao Tan, Ziluo Ding, Wentao Zhang, Boyu Li, Bohan Zhou, Junpeng Yue, Haochong Xia, Jiechuan Jiang, Longtao Zheng, Xinrun Xu, Yifei Bi, Pengjie Gu, Xinrun Wang, B ¨orje F","work_id":"3f16fe58-d421-46e9-adaa-c38090223a36","shared_citers":3},{"title":"Asynchronous methods for deep reinforcement learning.arXiv preprint arXiv:1602.01783","work_id":"b54058f9-0b3b-46ae-a9a0-f22748625e2a","shared_citers":2},{"title":"AWAC: Accelerating Online Reinforcement Learning with Offline Datasets","work_id":"f0a11265-1acf-4ffc-a822-08bd04b6bddf","shared_citers":2},{"title":"Compute-optimal scaling for value-based deep rl","work_id":"b5c1a21a-c316-4024-8ce9-c6108e5b5c50","shared_citers":2},{"title":"Efﬁcient online reinforcement learning with ofﬂine data","work_id":"dd85f6f4-3190-4462-afb4-4f6a12115555","shared_citers":2},{"title":"Language Models are Few-Shot Learners","work_id":"214732c0-2edd-44a0-af9e-28184a2b8279","shared_citers":2},{"title":"Learning representations by back-propagating errors.Nature1986,323, 533–536","work_id":"2d37f26a-0175-4c28-ab1f-1d76fc2b1082","shared_citers":2},{"title":"Meta-world: A benchmark and evaluation for multi-task and meta reinforcement learning.CoRR, abs/1910.10897","work_id":"42b3081e-43bd-451c-b777-259c81be7953","shared_citers":2},{"title":"Offline Reinforcement Learning with Implicit Q-Learning","work_id":"4adca4ff-8975-49b3-aee4-2ef7e0f95275","shared_citers":2},{"title":"Playing Atari with Deep Reinforcement Learning","work_id":"736a8ddf-e365-4940-ad58-4699fddedb86","shared_citers":2},{"title":"PonderNet: Learning to ponder.arXiv preprint arXiv:2106.01345","work_id":"a0295445-c5e8-43f7-95e4-eec56e7c0d07","shared_citers":2},{"title":"Puterman","work_id":"02ff1fe9-a285-487d-a56f-1e5ddd479dd7","shared_citers":2},{"title":"Rainbow: Combining Improvements in Deep Reinforcement Learning , October 2017","work_id":"d0dd9ed8-9406-49f8-aa8c-ffef61ca5e50","shared_citers":2},{"title":"Scaling Laws for Neural Language Models","work_id":"b7dd8749-9c45-4977-ab9b-64478dce1ae8","shared_citers":2},{"title":"Stabilizing oﬀ-policy q-learning via bootstrapping error reduction.arXiv preprint arXiv:1906.00949","work_id":"5245a2f6-b3e1-4edb-8d34-fb132fc9ccdc","shared_citers":2},{"title":"Tarasov, V","work_id":"08ad3e5a-4806-4a75-b300-70d46dafd1a0","shared_citers":2},{"title":"Williams","work_id":"469b3b81-55f9-4542-9dd7-570a63cfda74","shared_citers":2},{"title":"Ziegler, Ryan Lowe, Chelsea V oss, Alec Radford, Dario Amodei, and Paul Christiano","work_id":"1fae7759-93bb-4cba-a0d5-ebff515b9d39","shared_citers":2},{"title":"$\\pi_{0.5}$: a Vision-Language-Action Model with Open-World Generalization","work_id":"d1ad7304-d09a-49bc-809e-846439f6aff9","shared_citers":1}],"time_series":[{"n":1,"year":2019},{"n":1,"year":2024},{"n":12,"year":2026}],"dependency_candidates":[]},"authors":[{"id":"d96aa666-85ce-48ea-bd97-e08bc711dd3f","orcid":null,"display_name":"Alex Graves","source":"manual","import_confidence":0.72},{"id":"e2efc0fa-f410-47f1-89a1-6aa4a8b2230d","orcid":null,"display_name":"Andreas K. Fidjeland","source":"manual","import_confidence":0.72},{"id":"8add448b-7e2c-41ff-b581-9a375f6cbc46","orcid":null,"display_name":"Andrei A. Rusu","source":"manual","import_confidence":0.72},{"id":"feb2fa51-a335-4f95-95c8-69cf3d4ab43d","orcid":null,"display_name":"Charles Beattie","source":"manual","import_confidence":0.72},{"id":"c6fd3013-f440-4f0f-a73d-da5b40715448","orcid":null,"display_name":"David Silver","source":"manual","import_confidence":0.72},{"id":"9772b573-60f0-42ef-ad21-dd05e6e5f0de","orcid":null,"display_name":"Georg Ostrovski","source":"manual","import_confidence":0.72},{"id":"818a31da-186b-45aa-be91-1c39949e23a4","orcid":null,"display_name":"Joel Veness","source":"manual","import_confidence":0.72},{"id":"9d8577f6-4451-4e27-a0d2-3e66e22c0c4f","orcid":null,"display_name":"Koray Kavukcuoglu","source":"manual","import_confidence":0.72},{"id":"7026d45d-b38f-4005-89c8-4b1c13ec99c5","orcid":null,"display_name":"Marc G. Bellemare","source":"manual","import_confidence":0.72},{"id":"4c65da08-ae53-4a60-a847-fdaa6481b67b","orcid":null,"display_name":"Martin Riedmiller","source":"manual","import_confidence":0.72},{"id":"ed72766a-376e-4e84-8270-7fbce7fcfde9","orcid":null,"display_name":"Stig Petersen","source":"manual","import_confidence":0.72},{"id":"86d7ce36-293b-4b1d-9886-ff6dc1068818","orcid":null,"display_name":"Volodymyr Mnih","source":"manual","import_confidence":0.72}]}}