{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:RFTTTYSBSTXXTT3DZARWRELJNB","short_pith_number":"pith:RFTTTYSB","schema_version":"1.0","canonical_sha256":"896739e24194ef79cf63c8236891696873bce19527a645088eedaa8b6da231ff","source":{"kind":"arxiv","id":"1812.06110","version":1},"attestation_state":"computed","paper":{"title":"Dopamine: A Research Framework for Deep Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Carles Gelada, Marc G. Bellemare, Pablo Samuel Castro, Saurabh Kumar, Subhodeep Moitra","submitted_at":"2018-12-14T19:03:38Z","abstract_excerpt":"Deep reinforcement learning (deep RL) research has grown significantly in recent years. A number of software offerings now exist that provide stable, comprehensive implementations for benchmarking. At the same time, recent deep RL research has become more diverse in its goals. In this paper we introduce Dopamine, a new research framework for deep RL that aims to support some of that diversity. Dopamine is open-source, TensorFlow-based, and provides compact and reliable implementations of some state-of-the-art deep RL agents. We complement this offering with a taxonomy of the different research"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1812.06110","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-12-14T19:03:38Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"ae6fd518333a140d1e0fe640c06846572ad0fcb6cd967215036c20f9c7192161","abstract_canon_sha256":"79d590aa6b4f291335961c819cb814cb788e2c6115525745e91d23ed8153c015"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:58:13.136543Z","signature_b64":"4MH9u3Pleh+6LUQTrkj3KO9X+RsWdoSXYc0CgltMQAxDaPy4iikyePTe6BQ97dmoSCwmuFInpKfbtxeMHdaOAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"896739e24194ef79cf63c8236891696873bce19527a645088eedaa8b6da231ff","last_reissued_at":"2026-05-17T23:58:13.135750Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:58:13.135750Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Dopamine: A Research Framework for Deep Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Carles Gelada, Marc G. Bellemare, Pablo Samuel Castro, Saurabh Kumar, Subhodeep Moitra","submitted_at":"2018-12-14T19:03:38Z","abstract_excerpt":"Deep reinforcement learning (deep RL) research has grown significantly in recent years. A number of software offerings now exist that provide stable, comprehensive implementations for benchmarking. At the same time, recent deep RL research has become more diverse in its goals. In this paper we introduce Dopamine, a new research framework for deep RL that aims to support some of that diversity. Dopamine is open-source, TensorFlow-based, and provides compact and reliable implementations of some state-of-the-art deep RL agents. We complement this offering with a taxonomy of the different research"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1812.06110","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1812.06110","created_at":"2026-05-17T23:58:13.135897+00:00"},{"alias_kind":"arxiv_version","alias_value":"1812.06110v1","created_at":"2026-05-17T23:58:13.135897+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1812.06110","created_at":"2026-05-17T23:58:13.135897+00:00"},{"alias_kind":"pith_short_12","alias_value":"RFTTTYSBSTXX","created_at":"2026-05-18T12:32:50.500415+00:00"},{"alias_kind":"pith_short_16","alias_value":"RFTTTYSBSTXXTT3D","created_at":"2026-05-18T12:32:50.500415+00:00"},{"alias_kind":"pith_short_8","alias_value":"RFTTTYSB","created_at":"2026-05-18T12:32:50.500415+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":7,"internal_anchor_count":4,"sample":[{"citing_arxiv_id":"2411.04832","citing_title":"Plasticity Loss in Deep Reinforcement Learning: A Survey","ref_index":17,"is_internal_anchor":true},{"citing_arxiv_id":"2510.02590","citing_title":"Use the Online Network If You Can: Towards Fast and Stable Reinforcement Learning","ref_index":1,"is_internal_anchor":true},{"citing_arxiv_id":"1910.01708","citing_title":"Benchmarking Batch Deep Reinforcement Learning Algorithms","ref_index":3,"is_internal_anchor":true},{"citing_arxiv_id":"2010.02193","citing_title":"Mastering Atari with Discrete World Models","ref_index":7,"is_internal_anchor":true},{"citing_arxiv_id":"2605.08182","citing_title":"Quantile Geometry Regularization for Distributional Reinforcement Learning","ref_index":23,"is_internal_anchor":false},{"citing_arxiv_id":"1912.01603","citing_title":"Dream to Control: Learning Behaviors by Latent Imagination","ref_index":8,"is_internal_anchor":false},{"citing_arxiv_id":"2407.17032","citing_title":"Gymnasium: A Standard Interface for Reinforcement Learning Environments","ref_index":8,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/RFTTTYSBSTXXTT3DZARWRELJNB","json":"https://pith.science/pith/RFTTTYSBSTXXTT3DZARWRELJNB.json","graph_json":"https://pith.science/api/pith-number/RFTTTYSBSTXXTT3DZARWRELJNB/graph.json","events_json":"https://pith.science/api/pith-number/RFTTTYSBSTXXTT3DZARWRELJNB/events.json","paper":"https://pith.science/paper/RFTTTYSB"},"agent_actions":{"view_html":"https://pith.science/pith/RFTTTYSBSTXXTT3DZARWRELJNB","download_json":"https://pith.science/pith/RFTTTYSBSTXXTT3DZARWRELJNB.json","view_paper":"https://pith.science/paper/RFTTTYSB","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1812.06110&json=true","fetch_graph":"https://pith.science/api/pith-number/RFTTTYSBSTXXTT3DZARWRELJNB/graph.json","fetch_events":"https://pith.science/api/pith-number/RFTTTYSBSTXXTT3DZARWRELJNB/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/RFTTTYSBSTXXTT3DZARWRELJNB/action/timestamp_anchor","attest_storage":"https://pith.science/pith/RFTTTYSBSTXXTT3DZARWRELJNB/action/storage_attestation","attest_author":"https://pith.science/pith/RFTTTYSBSTXXTT3DZARWRELJNB/action/author_attestation","sign_citation":"https://pith.science/pith/RFTTTYSBSTXXTT3DZARWRELJNB/action/citation_signature","submit_replication":"https://pith.science/pith/RFTTTYSBSTXXTT3DZARWRELJNB/action/replication_record"}},"created_at":"2026-05-17T23:58:13.135897+00:00","updated_at":"2026-05-17T23:58:13.135897+00:00"}