{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:T2T3YSUBUCAN44YWONBNR45Q5I","short_pith_number":"pith:T2T3YSUB","schema_version":"1.0","canonical_sha256":"9ea7bc4a81a080de73167342d8f3b0ea0d36579ca63c2c64eeb9a042cc4a617b","source":{"kind":"arxiv","id":"1810.12894","version":1},"attestation_state":"computed","paper":{"title":"Exploration by Random Network Distillation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Amos Storkey, Harrison Edwards, Oleg Klimov, Yuri Burda","submitted_at":"2018-10-30T17:44:42Z","abstract_excerpt":"We introduce an exploration bonus for deep reinforcement learning methods that is easy to implement and adds minimal overhead to the computation performed. The bonus is the error of a neural network predicting features of the observations given by a fixed randomly initialized neural network. We also introduce a method to flexibly combine intrinsic and extrinsic rewards. We find that the random network distillation (RND) bonus combined with this increased flexibility enables significant progress on several hard exploration Atari games. In particular we establish state of the art performance on "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1810.12894","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-10-30T17:44:42Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"1ab08f6ae7f04c37cf32cf0f21e35a01ad7a96be3077cfa08bdb748f2fa92de3","abstract_canon_sha256":"2f694bab426974257335b0bdaaf4903335e3d31a07e1da401c50d60cb7f00b29"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:01:53.903424Z","signature_b64":"FUJGDkoWsJth2jOPUtVRnFVRI0I4kOna7wlWv0lfQeiDA56+Ty3Py3BNK/dfTFnWCKLC+Drq1qLnaILVqB05DA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"9ea7bc4a81a080de73167342d8f3b0ea0d36579ca63c2c64eeb9a042cc4a617b","last_reissued_at":"2026-05-18T00:01:53.902962Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:01:53.902962Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Exploration by Random Network Distillation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Amos Storkey, Harrison Edwards, Oleg Klimov, Yuri Burda","submitted_at":"2018-10-30T17:44:42Z","abstract_excerpt":"We introduce an exploration bonus for deep reinforcement learning methods that is easy to implement and adds minimal overhead to the computation performed. The bonus is the error of a neural network predicting features of the observations given by a fixed randomly initialized neural network. We also introduce a method to flexibly combine intrinsic and extrinsic rewards. We find that the random network distillation (RND) bonus combined with this increased flexibility enables significant progress on several hard exploration Atari games. In particular we establish state of the art performance on "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1810.12894","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1810.12894","created_at":"2026-05-18T00:01:53.903030+00:00"},{"alias_kind":"arxiv_version","alias_value":"1810.12894v1","created_at":"2026-05-18T00:01:53.903030+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1810.12894","created_at":"2026-05-18T00:01:53.903030+00:00"},{"alias_kind":"pith_short_12","alias_value":"T2T3YSUBUCAN","created_at":"2026-05-18T12:32:53.628368+00:00"},{"alias_kind":"pith_short_16","alias_value":"T2T3YSUBUCAN44YW","created_at":"2026-05-18T12:32:53.628368+00:00"},{"alias_kind":"pith_short_8","alias_value":"T2T3YSUB","created_at":"2026-05-18T12:32:53.628368+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":27,"internal_anchor_count":15,"sample":[{"citing_arxiv_id":"2605.23551","citing_title":"Goal-Conditioned Agents that Learn Everything All at Once","ref_index":82,"is_internal_anchor":true},{"citing_arxiv_id":"2412.08812","citing_title":"Test-Time Alignment via Hypothesis Reweighting","ref_index":8,"is_internal_anchor":true},{"citing_arxiv_id":"2504.06355","citing_title":"An Information-Geometric Approach to Artificial Curiosity","ref_index":2,"is_internal_anchor":true},{"citing_arxiv_id":"2506.14648","citing_title":"SENIOR: Efficient Query Selection and Preference-Guided Exploration in Preference-based Reinforcement Learning","ref_index":33,"is_internal_anchor":true},{"citing_arxiv_id":"2605.16395","citing_title":"OrbiSim: World Models as Differentiable Physics Engines for Embodied Intelligence","ref_index":6,"is_internal_anchor":true},{"citing_arxiv_id":"2605.20061","citing_title":"Rewarding Beliefs, Not Actions: Consistency-Guided Credit Assignment for Long-Horizon Agents","ref_index":3,"is_internal_anchor":true},{"citing_arxiv_id":"2605.19461","citing_title":"Beyond Mode Collapse: Distribution Matching for Diverse Reasoning","ref_index":3,"is_internal_anchor":true},{"citing_arxiv_id":"2605.16725","citing_title":"Baba in Wonderland: Online Self-Supervised Dynamics Discovery for Executable World Models","ref_index":2,"is_internal_anchor":true},{"citing_arxiv_id":"2605.17017","citing_title":"When Dynamics Shift, Robust Task Inference Wins: Offline Imitation Learning with Behavior Foundation Models Revisited","ref_index":15,"is_internal_anchor":true},{"citing_arxiv_id":"2509.25438","citing_title":"Beyond Noisy-TVs: Noise-Robust Exploration Via Learning Progress Monitoring","ref_index":4,"is_internal_anchor":true},{"citing_arxiv_id":"2408.00724","citing_title":"Inference Scaling Laws: An Empirical Analysis of Compute-Optimal Inference for Problem-Solving with Language Models","ref_index":38,"is_internal_anchor":true},{"citing_arxiv_id":"1910.01708","citing_title":"Benchmarking Batch Deep Reinforcement Learning Algorithms","ref_index":2,"is_internal_anchor":true},{"citing_arxiv_id":"1910.11215","citing_title":"RoboNet: Large-Scale Multi-Robot Learning","ref_index":56,"is_internal_anchor":true},{"citing_arxiv_id":"2309.16797","citing_title":"Promptbreeder: Self-Referential Self-Improvement Via Prompt Evolution","ref_index":56,"is_internal_anchor":true},{"citing_arxiv_id":"1910.07113","citing_title":"Solving Rubik's Cube with a Robot Hand","ref_index":12,"is_internal_anchor":true},{"citing_arxiv_id":"2605.11688","citing_title":"Shaping Zero-Shot Coordination via State Blocking","ref_index":37,"is_internal_anchor":false},{"citing_arxiv_id":"2605.12084","citing_title":"Learning What Matters: Adaptive Information-Theoretic Objectives for Robot Exploration","ref_index":23,"is_internal_anchor":false},{"citing_arxiv_id":"1912.06680","citing_title":"Dota 2 with Large Scale Deep Reinforcement Learning","ref_index":44,"is_internal_anchor":false},{"citing_arxiv_id":"2604.26095","citing_title":"Distill-Belief: Closed-Loop Inverse Source Localization and Characterization in Physical Fields","ref_index":9,"is_internal_anchor":false},{"citing_arxiv_id":"2604.25496","citing_title":"Improving Zero-Shot Offline RL via Behavioral Task Sampling","ref_index":3,"is_internal_anchor":false},{"citing_arxiv_id":"2605.03413","citing_title":"Learning to Theorize the World from Observation","ref_index":60,"is_internal_anchor":false},{"citing_arxiv_id":"2605.01242","citing_title":"Breaking the Computational Barrier: Provably Efficient Actor-Critic for Low-Rank MDPs","ref_index":60,"is_internal_anchor":false},{"citing_arxiv_id":"2605.01862","citing_title":"QHyer: Q-conditioned Hybrid Attention-mamba Transformer for Offline Goal-conditioned RL","ref_index":19,"is_internal_anchor":false},{"citing_arxiv_id":"2604.16509","citing_title":"Learning-Based Sparsification of Dynamic Graphs in Robotic Exploration Algorithms","ref_index":25,"is_internal_anchor":false},{"citing_arxiv_id":"2604.15414","citing_title":"Beyond Single-Model Optimization: Preserving Plasticity in Continual Reinforcement Learning","ref_index":2,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/T2T3YSUBUCAN44YWONBNR45Q5I","json":"https://pith.science/pith/T2T3YSUBUCAN44YWONBNR45Q5I.json","graph_json":"https://pith.science/api/pith-number/T2T3YSUBUCAN44YWONBNR45Q5I/graph.json","events_json":"https://pith.science/api/pith-number/T2T3YSUBUCAN44YWONBNR45Q5I/events.json","paper":"https://pith.science/paper/T2T3YSUB"},"agent_actions":{"view_html":"https://pith.science/pith/T2T3YSUBUCAN44YWONBNR45Q5I","download_json":"https://pith.science/pith/T2T3YSUBUCAN44YWONBNR45Q5I.json","view_paper":"https://pith.science/paper/T2T3YSUB","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1810.12894&json=true","fetch_graph":"https://pith.science/api/pith-number/T2T3YSUBUCAN44YWONBNR45Q5I/graph.json","fetch_events":"https://pith.science/api/pith-number/T2T3YSUBUCAN44YWONBNR45Q5I/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/T2T3YSUBUCAN44YWONBNR45Q5I/action/timestamp_anchor","attest_storage":"https://pith.science/pith/T2T3YSUBUCAN44YWONBNR45Q5I/action/storage_attestation","attest_author":"https://pith.science/pith/T2T3YSUBUCAN44YWONBNR45Q5I/action/author_attestation","sign_citation":"https://pith.science/pith/T2T3YSUBUCAN44YWONBNR45Q5I/action/citation_signature","submit_replication":"https://pith.science/pith/T2T3YSUBUCAN44YWONBNR45Q5I/action/replication_record"}},"created_at":"2026-05-18T00:01:53.903030+00:00","updated_at":"2026-05-18T00:01:53.903030+00:00"}