{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:7BIMGNMH6PDMKL75B7YM3RGCYP","short_pith_number":"pith:7BIMGNMH","schema_version":"1.0","canonical_sha256":"f850c33587f3c6c52ffd0ff0cdc4c2c3edb3aaf5c76af0c31a54765e29173834","source":{"kind":"arxiv","id":"1806.10729","version":5},"attestation_state":"computed","paper":{"title":"Illuminating Generalization in Deep Reinforcement Learning through Procedural Level Generation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Ahmed Khalifa, Julian Togelius, Niels Justesen, Philip Bontrager, Ruben Rodriguez Torrado, Sebastian Risi","submitted_at":"2018-06-28T01:16:11Z","abstract_excerpt":"Deep reinforcement learning (RL) has shown impressive results in a variety of domains, learning directly from high-dimensional sensory streams. However, when neural networks are trained in a fixed environment, such as a single level in a video game, they will usually overfit and fail to generalize to new levels. When RL models overfit, even slight modifications to the environment can result in poor agent performance. This paper explores how procedurally generated levels during training can increase generality. We show that for some games procedural level generation enables generalization to ne"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1806.10729","kind":"arxiv","version":5},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-06-28T01:16:11Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"9843b21eb3500d780249a9f679f4f4372c9959c237079643d97b958e409eb082","abstract_canon_sha256":"791b05255c4de7cf675b1a0006d0c96ba4f88c5a5ee0dd5d47017411463753d1"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:59:38.625965Z","signature_b64":"mCnJ5pPY034sWMT2mKsIijmhp03/sTlpPxD85fnuijefszvzNzQD/lNrjOCHG905tuTz4O8RK5LHYAEYJklMDw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"f850c33587f3c6c52ffd0ff0cdc4c2c3edb3aaf5c76af0c31a54765e29173834","last_reissued_at":"2026-05-17T23:59:38.625183Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:59:38.625183Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Illuminating Generalization in Deep Reinforcement Learning through Procedural Level Generation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Ahmed Khalifa, Julian Togelius, Niels Justesen, Philip Bontrager, Ruben Rodriguez Torrado, Sebastian Risi","submitted_at":"2018-06-28T01:16:11Z","abstract_excerpt":"Deep reinforcement learning (RL) has shown impressive results in a variety of domains, learning directly from high-dimensional sensory streams. However, when neural networks are trained in a fixed environment, such as a single level in a video game, they will usually overfit and fail to generalize to new levels. When RL models overfit, even slight modifications to the environment can result in poor agent performance. This paper explores how procedurally generated levels during training can increase generality. We show that for some games procedural level generation enables generalization to ne"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1806.10729","kind":"arxiv","version":5},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1806.10729","created_at":"2026-05-17T23:59:38.625295+00:00"},{"alias_kind":"arxiv_version","alias_value":"1806.10729v5","created_at":"2026-05-17T23:59:38.625295+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1806.10729","created_at":"2026-05-17T23:59:38.625295+00:00"},{"alias_kind":"pith_short_12","alias_value":"7BIMGNMH6PDM","created_at":"2026-05-18T12:32:11.075285+00:00"},{"alias_kind":"pith_short_16","alias_value":"7BIMGNMH6PDMKL75","created_at":"2026-05-18T12:32:11.075285+00:00"},{"alias_kind":"pith_short_8","alias_value":"7BIMGNMH","created_at":"2026-05-18T12:32:11.075285+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":4,"internal_anchor_count":2,"sample":[{"citing_arxiv_id":"1907.02548","citing_title":"Procedural Generation of Initial States of Sokoban","ref_index":13,"is_internal_anchor":true},{"citing_arxiv_id":"2605.18591","citing_title":"Randomized Advantage Transformation (RAT): Computing Natural Policy Gradients via Direct Backpropagation","ref_index":62,"is_internal_anchor":true},{"citing_arxiv_id":"1911.01547","citing_title":"On the Measure of Intelligence","ref_index":50,"is_internal_anchor":false},{"citing_arxiv_id":"2309.07864","citing_title":"The Rise and Potential of Large Language Model Based Agents: A Survey","ref_index":75,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/7BIMGNMH6PDMKL75B7YM3RGCYP","json":"https://pith.science/pith/7BIMGNMH6PDMKL75B7YM3RGCYP.json","graph_json":"https://pith.science/api/pith-number/7BIMGNMH6PDMKL75B7YM3RGCYP/graph.json","events_json":"https://pith.science/api/pith-number/7BIMGNMH6PDMKL75B7YM3RGCYP/events.json","paper":"https://pith.science/paper/7BIMGNMH"},"agent_actions":{"view_html":"https://pith.science/pith/7BIMGNMH6PDMKL75B7YM3RGCYP","download_json":"https://pith.science/pith/7BIMGNMH6PDMKL75B7YM3RGCYP.json","view_paper":"https://pith.science/paper/7BIMGNMH","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1806.10729&json=true","fetch_graph":"https://pith.science/api/pith-number/7BIMGNMH6PDMKL75B7YM3RGCYP/graph.json","fetch_events":"https://pith.science/api/pith-number/7BIMGNMH6PDMKL75B7YM3RGCYP/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/7BIMGNMH6PDMKL75B7YM3RGCYP/action/timestamp_anchor","attest_storage":"https://pith.science/pith/7BIMGNMH6PDMKL75B7YM3RGCYP/action/storage_attestation","attest_author":"https://pith.science/pith/7BIMGNMH6PDMKL75B7YM3RGCYP/action/author_attestation","sign_citation":"https://pith.science/pith/7BIMGNMH6PDMKL75B7YM3RGCYP/action/citation_signature","submit_replication":"https://pith.science/pith/7BIMGNMH6PDMKL75B7YM3RGCYP/action/replication_record"}},"created_at":"2026-05-17T23:59:38.625295+00:00","updated_at":"2026-05-17T23:59:38.625295+00:00"}