{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2019:PYEA5EAQV7RR62H5AHPO2WAUMQ","short_pith_number":"pith:PYEA5EAQ","schema_version":"1.0","canonical_sha256":"7e080e9010afe31f68fd01deed581464086dc1a0c723ab2bf93de20f9fe85a1d","source":{"kind":"arxiv","id":"1903.03176","version":2},"attestation_state":"computed","paper":{"title":"MinAtar: An Atari-Inspired Testbed for Thorough and Reproducible Reinforcement Learning Experiments","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Kenny Young, Tian Tian","submitted_at":"2019-03-07T20:34:36Z","abstract_excerpt":"The Arcade Learning Environment (ALE) is a popular platform for evaluating reinforcement learning agents. Much of the appeal comes from the fact that Atari games demonstrate aspects of competency we expect from an intelligent agent and are not biased toward any particular solution approach. The challenge of the ALE includes (1) the representation learning problem of extracting pertinent information from raw pixels, and (2) the behavioural learning problem of leveraging complex, delayed associations between actions and rewards. Often, the research questions we are interested in pertain more to "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1903.03176","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-03-07T20:34:36Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"c335f1389960ace7ada8d806147ec66325596e07c8e7a8156b98edf654db018c","abstract_canon_sha256":"1f554c18df2b654744f4273ba9d31345eaec47d87dee18d93ea96e3ec8747ecd"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:44:00.022215Z","signature_b64":"nCRJ8wIF5Z6sgKytxEY5uDDScSQzl6QnPlzeqaElOQ0At7gBsDQjDKbvJfrWa2JasZh+8wetiF/tw8LlcjfdDw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"7e080e9010afe31f68fd01deed581464086dc1a0c723ab2bf93de20f9fe85a1d","last_reissued_at":"2026-05-17T23:44:00.021569Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:44:00.021569Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"MinAtar: An Atari-Inspired Testbed for Thorough and Reproducible Reinforcement Learning Experiments","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Kenny Young, Tian Tian","submitted_at":"2019-03-07T20:34:36Z","abstract_excerpt":"The Arcade Learning Environment (ALE) is a popular platform for evaluating reinforcement learning agents. Much of the appeal comes from the fact that Atari games demonstrate aspects of competency we expect from an intelligent agent and are not biased toward any particular solution approach. The challenge of the ALE includes (1) the representation learning problem of extracting pertinent information from raw pixels, and (2) the behavioural learning problem of leveraging complex, delayed associations between actions and rewards. Often, the research questions we are interested in pertain more to "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1903.03176","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1903.03176","created_at":"2026-05-17T23:44:00.021661+00:00"},{"alias_kind":"arxiv_version","alias_value":"1903.03176v2","created_at":"2026-05-17T23:44:00.021661+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1903.03176","created_at":"2026-05-17T23:44:00.021661+00:00"},{"alias_kind":"pith_short_12","alias_value":"PYEA5EAQV7RR","created_at":"2026-05-18T12:33:24.271573+00:00"},{"alias_kind":"pith_short_16","alias_value":"PYEA5EAQV7RR62H5","created_at":"2026-05-18T12:33:24.271573+00:00"},{"alias_kind":"pith_short_8","alias_value":"PYEA5EAQ","created_at":"2026-05-18T12:33:24.271573+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":5,"internal_anchor_count":0,"sample":[{"citing_arxiv_id":"2605.12379","citing_title":"Discrete Flow Matching for Offline-to-Online Reinforcement Learning","ref_index":29,"is_internal_anchor":false},{"citing_arxiv_id":"2407.17032","citing_title":"Gymnasium: A Standard Interface for Reinforcement Learning Environments","ref_index":36,"is_internal_anchor":false},{"citing_arxiv_id":"2604.19033","citing_title":"Intentional Updates for Streaming Reinforcement Learning","ref_index":45,"is_internal_anchor":false},{"citing_arxiv_id":"2605.06764","citing_title":"Revisiting Adam for Streaming Reinforcement Learning","ref_index":38,"is_internal_anchor":false},{"citing_arxiv_id":"2605.04368","citing_title":"Extending Differential Temporal Difference Methods for Episodic Problems","ref_index":11,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/PYEA5EAQV7RR62H5AHPO2WAUMQ","json":"https://pith.science/pith/PYEA5EAQV7RR62H5AHPO2WAUMQ.json","graph_json":"https://pith.science/api/pith-number/PYEA5EAQV7RR62H5AHPO2WAUMQ/graph.json","events_json":"https://pith.science/api/pith-number/PYEA5EAQV7RR62H5AHPO2WAUMQ/events.json","paper":"https://pith.science/paper/PYEA5EAQ"},"agent_actions":{"view_html":"https://pith.science/pith/PYEA5EAQV7RR62H5AHPO2WAUMQ","download_json":"https://pith.science/pith/PYEA5EAQV7RR62H5AHPO2WAUMQ.json","view_paper":"https://pith.science/paper/PYEA5EAQ","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1903.03176&json=true","fetch_graph":"https://pith.science/api/pith-number/PYEA5EAQV7RR62H5AHPO2WAUMQ/graph.json","fetch_events":"https://pith.science/api/pith-number/PYEA5EAQV7RR62H5AHPO2WAUMQ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/PYEA5EAQV7RR62H5AHPO2WAUMQ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/PYEA5EAQV7RR62H5AHPO2WAUMQ/action/storage_attestation","attest_author":"https://pith.science/pith/PYEA5EAQV7RR62H5AHPO2WAUMQ/action/author_attestation","sign_citation":"https://pith.science/pith/PYEA5EAQV7RR62H5AHPO2WAUMQ/action/citation_signature","submit_replication":"https://pith.science/pith/PYEA5EAQV7RR62H5AHPO2WAUMQ/action/replication_record"}},"created_at":"2026-05-17T23:44:00.021661+00:00","updated_at":"2026-05-17T23:44:00.021661+00:00"}