{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:6HZGPRTGGMDE7T4IBRN3JVKRQN","short_pith_number":"pith:6HZGPRTG","canonical_record":{"source":{"id":"1804.06893","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2018-04-18T19:49:13Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"80416c3ebc9c8247c4e149d003dab3f77acc81e292a075871bdd3b79f3660017","abstract_canon_sha256":"45872a0b4cf4b9d50411ef36ef58f6f38d843fe9c7c560100c83b9828e811b5e"},"schema_version":"1.0"},"canonical_sha256":"f1f267c66633064fcf880c5bb4d5518362842ff7287b23169af0a866a72a428e","source":{"kind":"arxiv","id":"1804.06893","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1804.06893","created_at":"2026-05-18T00:17:59Z"},{"alias_kind":"arxiv_version","alias_value":"1804.06893v2","created_at":"2026-05-18T00:17:59Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1804.06893","created_at":"2026-05-18T00:17:59Z"},{"alias_kind":"pith_short_12","alias_value":"6HZGPRTGGMDE","created_at":"2026-05-18T12:32:08Z"},{"alias_kind":"pith_short_16","alias_value":"6HZGPRTGGMDE7T4I","created_at":"2026-05-18T12:32:08Z"},{"alias_kind":"pith_short_8","alias_value":"6HZGPRTG","created_at":"2026-05-18T12:32:08Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:6HZGPRTGGMDE7T4IBRN3JVKRQN","target":"record","payload":{"canonical_record":{"source":{"id":"1804.06893","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2018-04-18T19:49:13Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"80416c3ebc9c8247c4e149d003dab3f77acc81e292a075871bdd3b79f3660017","abstract_canon_sha256":"45872a0b4cf4b9d50411ef36ef58f6f38d843fe9c7c560100c83b9828e811b5e"},"schema_version":"1.0"},"canonical_sha256":"f1f267c66633064fcf880c5bb4d5518362842ff7287b23169af0a866a72a428e","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:17:59.231031Z","signature_b64":"5POoMSLWUF+3AKBpUk6VF7MjkGnuybTDVjIBtsCst1cpE5lgfZo58ylJ6g8tklR6UYO50UFBkuqSp0fwhXFWCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"f1f267c66633064fcf880c5bb4d5518362842ff7287b23169af0a866a72a428e","last_reissued_at":"2026-05-18T00:17:59.230390Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:17:59.230390Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1804.06893","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:17:59Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"xA4TU5PDi4bIeiAbUdp2dVddt99PqF+1Ezrd0lpWu3HjMw4Ya4d25NXGnZYV1EgDLuMHnW3TaPXJ8qtOhDytDw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T04:07:54.182538Z"},"content_sha256":"a1713553d28c97182738beac635a92d17d217a8d1cfbc32a70be4d98f5da74a7","schema_version":"1.0","event_id":"sha256:a1713553d28c97182738beac635a92d17d217a8d1cfbc32a70be4d98f5da74a7"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:6HZGPRTGGMDE7T4IBRN3JVKRQN","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"A Study on Overfitting in Deep Reinforcement Learning","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Chiyuan Zhang, Oriol Vinyals, Remi Munos, Samy Bengio","submitted_at":"2018-04-18T19:49:13Z","abstract_excerpt":"Recent years have witnessed significant progresses in deep Reinforcement Learning (RL). Empowered with large scale neural networks, carefully designed architectures, novel training algorithms and massively parallel computing devices, researchers are able to attack many challenging RL problems. However, in machine learning, more training power comes with a potential risk of more overfitting. As deep RL techniques are being applied to critical problems such as healthcare and finance, it is important to understand the generalization behaviors of the trained agents. In this paper, we conduct a sys"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1804.06893","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:17:59Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"RvnBTrKwCA44jQtjuDVupLEkqHCwW2IHe2QasLEacc1JdJRUD6+1rrOtNdId9sV6X5HVZvsdmG9HnArvpXvhBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T04:07:54.182925Z"},"content_sha256":"7c76b8fd6cb8f897af2786a152f6883239075ac65328bd8935abdd46052a6137","schema_version":"1.0","event_id":"sha256:7c76b8fd6cb8f897af2786a152f6883239075ac65328bd8935abdd46052a6137"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/6HZGPRTGGMDE7T4IBRN3JVKRQN/bundle.json","state_url":"https://pith.science/pith/6HZGPRTGGMDE7T4IBRN3JVKRQN/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/6HZGPRTGGMDE7T4IBRN3JVKRQN/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-26T04:07:54Z","links":{"resolver":"https://pith.science/pith/6HZGPRTGGMDE7T4IBRN3JVKRQN","bundle":"https://pith.science/pith/6HZGPRTGGMDE7T4IBRN3JVKRQN/bundle.json","state":"https://pith.science/pith/6HZGPRTGGMDE7T4IBRN3JVKRQN/state.json","well_known_bundle":"https://pith.science/.well-known/pith/6HZGPRTGGMDE7T4IBRN3JVKRQN/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:6HZGPRTGGMDE7T4IBRN3JVKRQN","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"45872a0b4cf4b9d50411ef36ef58f6f38d843fe9c7c560100c83b9828e811b5e","cross_cats_sorted":["stat.ML"],"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2018-04-18T19:49:13Z","title_canon_sha256":"80416c3ebc9c8247c4e149d003dab3f77acc81e292a075871bdd3b79f3660017"},"schema_version":"1.0","source":{"id":"1804.06893","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1804.06893","created_at":"2026-05-18T00:17:59Z"},{"alias_kind":"arxiv_version","alias_value":"1804.06893v2","created_at":"2026-05-18T00:17:59Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1804.06893","created_at":"2026-05-18T00:17:59Z"},{"alias_kind":"pith_short_12","alias_value":"6HZGPRTGGMDE","created_at":"2026-05-18T12:32:08Z"},{"alias_kind":"pith_short_16","alias_value":"6HZGPRTGGMDE7T4I","created_at":"2026-05-18T12:32:08Z"},{"alias_kind":"pith_short_8","alias_value":"6HZGPRTG","created_at":"2026-05-18T12:32:08Z"}],"graph_snapshots":[{"event_id":"sha256:7c76b8fd6cb8f897af2786a152f6883239075ac65328bd8935abdd46052a6137","target":"graph","created_at":"2026-05-18T00:17:59Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Recent years have witnessed significant progresses in deep Reinforcement Learning (RL). Empowered with large scale neural networks, carefully designed architectures, novel training algorithms and massively parallel computing devices, researchers are able to attack many challenging RL problems. However, in machine learning, more training power comes with a potential risk of more overfitting. As deep RL techniques are being applied to critical problems such as healthcare and finance, it is important to understand the generalization behaviors of the trained agents. In this paper, we conduct a sys","authors_text":"Chiyuan Zhang, Oriol Vinyals, Remi Munos, Samy Bengio","cross_cats":["stat.ML"],"headline":"","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2018-04-18T19:49:13Z","title":"A Study on Overfitting in Deep Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1804.06893","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:a1713553d28c97182738beac635a92d17d217a8d1cfbc32a70be4d98f5da74a7","target":"record","created_at":"2026-05-18T00:17:59Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"45872a0b4cf4b9d50411ef36ef58f6f38d843fe9c7c560100c83b9828e811b5e","cross_cats_sorted":["stat.ML"],"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2018-04-18T19:49:13Z","title_canon_sha256":"80416c3ebc9c8247c4e149d003dab3f77acc81e292a075871bdd3b79f3660017"},"schema_version":"1.0","source":{"id":"1804.06893","kind":"arxiv","version":2}},"canonical_sha256":"f1f267c66633064fcf880c5bb4d5518362842ff7287b23169af0a866a72a428e","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"f1f267c66633064fcf880c5bb4d5518362842ff7287b23169af0a866a72a428e","first_computed_at":"2026-05-18T00:17:59.230390Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:17:59.230390Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"5POoMSLWUF+3AKBpUk6VF7MjkGnuybTDVjIBtsCst1cpE5lgfZo58ylJ6g8tklR6UYO50UFBkuqSp0fwhXFWCg==","signature_status":"signed_v1","signed_at":"2026-05-18T00:17:59.231031Z","signed_message":"canonical_sha256_bytes"},"source_id":"1804.06893","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:a1713553d28c97182738beac635a92d17d217a8d1cfbc32a70be4d98f5da74a7","sha256:7c76b8fd6cb8f897af2786a152f6883239075ac65328bd8935abdd46052a6137"],"state_sha256":"d572d607773a7a93853052cba04f4abb8b3fdc864ded145cfc067f5c20229a6a"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"YheI5UBQ7EOPoSwcYxdRSDLTrPHxQlhRwbWx704vW2mP+fgjF5iL7OvcmSo7tDiqpUQ4qK+rgPWvDAxamvEiAg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-26T04:07:54.186308Z","bundle_sha256":"dedcd8d240128044965db6e313af9bba80ed5ba910dc04ff909e7443546afb7d"}}