{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:6HZGPRTGGMDE7T4IBRN3JVKRQN","short_pith_number":"pith:6HZGPRTG","schema_version":"1.0","canonical_sha256":"f1f267c66633064fcf880c5bb4d5518362842ff7287b23169af0a866a72a428e","source":{"kind":"arxiv","id":"1804.06893","version":2},"attestation_state":"computed","paper":{"title":"A Study on Overfitting in Deep Reinforcement Learning","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Chiyuan Zhang, Oriol Vinyals, Remi Munos, Samy Bengio","submitted_at":"2018-04-18T19:49:13Z","abstract_excerpt":"Recent years have witnessed significant progresses in deep Reinforcement Learning (RL). Empowered with large scale neural networks, carefully designed architectures, novel training algorithms and massively parallel computing devices, researchers are able to attack many challenging RL problems. However, in machine learning, more training power comes with a potential risk of more overfitting. As deep RL techniques are being applied to critical problems such as healthcare and finance, it is important to understand the generalization behaviors of the trained agents. In this paper, we conduct a sys"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1804.06893","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2018-04-18T19:49:13Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"80416c3ebc9c8247c4e149d003dab3f77acc81e292a075871bdd3b79f3660017","abstract_canon_sha256":"45872a0b4cf4b9d50411ef36ef58f6f38d843fe9c7c560100c83b9828e811b5e"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:17:59.231031Z","signature_b64":"5POoMSLWUF+3AKBpUk6VF7MjkGnuybTDVjIBtsCst1cpE5lgfZo58ylJ6g8tklR6UYO50UFBkuqSp0fwhXFWCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"f1f267c66633064fcf880c5bb4d5518362842ff7287b23169af0a866a72a428e","last_reissued_at":"2026-05-18T00:17:59.230390Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:17:59.230390Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"A Study on Overfitting in Deep Reinforcement Learning","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Chiyuan Zhang, Oriol Vinyals, Remi Munos, Samy Bengio","submitted_at":"2018-04-18T19:49:13Z","abstract_excerpt":"Recent years have witnessed significant progresses in deep Reinforcement Learning (RL). Empowered with large scale neural networks, carefully designed architectures, novel training algorithms and massively parallel computing devices, researchers are able to attack many challenging RL problems. However, in machine learning, more training power comes with a potential risk of more overfitting. As deep RL techniques are being applied to critical problems such as healthcare and finance, it is important to understand the generalization behaviors of the trained agents. In this paper, we conduct a sys"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1804.06893","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1804.06893","created_at":"2026-05-18T00:17:59.230497+00:00"},{"alias_kind":"arxiv_version","alias_value":"1804.06893v2","created_at":"2026-05-18T00:17:59.230497+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1804.06893","created_at":"2026-05-18T00:17:59.230497+00:00"},{"alias_kind":"pith_short_12","alias_value":"6HZGPRTGGMDE","created_at":"2026-05-18T12:32:08.215937+00:00"},{"alias_kind":"pith_short_16","alias_value":"6HZGPRTGGMDE7T4I","created_at":"2026-05-18T12:32:08.215937+00:00"},{"alias_kind":"pith_short_8","alias_value":"6HZGPRTG","created_at":"2026-05-18T12:32:08.215937+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":5,"internal_anchor_count":2,"sample":[{"citing_arxiv_id":"1907.01475","citing_title":"Generalizing from a few environments in safety-critical reinforcement learning","ref_index":39,"is_internal_anchor":true},{"citing_arxiv_id":"2605.18591","citing_title":"Randomized Advantage Transformation (RAT): Computing Natural Policy Gradients via Direct Backpropagation","ref_index":64,"is_internal_anchor":true},{"citing_arxiv_id":"2605.10546","citing_title":"Higher Resolution, Better Generalization: Unlocking Visual Scaling in Deep Reinforcement Learning","ref_index":40,"is_internal_anchor":false},{"citing_arxiv_id":"2309.07864","citing_title":"The Rise and Potential of Large Language Model Based Agents: A Survey","ref_index":74,"is_internal_anchor":false},{"citing_arxiv_id":"2605.07057","citing_title":"Integrating Causal DAGs in Deep RL: Activating Minimal Markovian States with Multi-Order Exposure","ref_index":17,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/6HZGPRTGGMDE7T4IBRN3JVKRQN","json":"https://pith.science/pith/6HZGPRTGGMDE7T4IBRN3JVKRQN.json","graph_json":"https://pith.science/api/pith-number/6HZGPRTGGMDE7T4IBRN3JVKRQN/graph.json","events_json":"https://pith.science/api/pith-number/6HZGPRTGGMDE7T4IBRN3JVKRQN/events.json","paper":"https://pith.science/paper/6HZGPRTG"},"agent_actions":{"view_html":"https://pith.science/pith/6HZGPRTGGMDE7T4IBRN3JVKRQN","download_json":"https://pith.science/pith/6HZGPRTGGMDE7T4IBRN3JVKRQN.json","view_paper":"https://pith.science/paper/6HZGPRTG","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1804.06893&json=true","fetch_graph":"https://pith.science/api/pith-number/6HZGPRTGGMDE7T4IBRN3JVKRQN/graph.json","fetch_events":"https://pith.science/api/pith-number/6HZGPRTGGMDE7T4IBRN3JVKRQN/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/6HZGPRTGGMDE7T4IBRN3JVKRQN/action/timestamp_anchor","attest_storage":"https://pith.science/pith/6HZGPRTGGMDE7T4IBRN3JVKRQN/action/storage_attestation","attest_author":"https://pith.science/pith/6HZGPRTGGMDE7T4IBRN3JVKRQN/action/author_attestation","sign_citation":"https://pith.science/pith/6HZGPRTGGMDE7T4IBRN3JVKRQN/action/citation_signature","submit_replication":"https://pith.science/pith/6HZGPRTGGMDE7T4IBRN3JVKRQN/action/replication_record"}},"created_at":"2026-05-18T00:17:59.230497+00:00","updated_at":"2026-05-18T00:17:59.230497+00:00"}