{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:TDBZ2QUS7YDXM2IQERFHYZFV2J","short_pith_number":"pith:TDBZ2QUS","schema_version":"1.0","canonical_sha256":"98c39d4292fe07766910244a7c64b5d2671c0d1c51796e706a6ff3ea5c369782","source":{"kind":"arxiv","id":"1707.02286","version":2},"attestation_state":"computed","paper":{"title":"Emergence of Locomotion Behaviours in Rich Environments","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"David Silver, Dhruva TB, Greg Wayne, Jay Lemmon, Josh Merel, Martin Riedmiller, Nicolas Heess, S. M. Ali Eslami, Srinivasan Sriram, Tom Erez, Yuval Tassa, Ziyu Wang","submitted_at":"2017-07-07T17:56:57Z","abstract_excerpt":"The reinforcement learning paradigm allows, in principle, for complex behaviours to be learned directly from simple reward signals. In practice, however, it is common to carefully hand-design the reward function to encourage a particular solution, or to derive it from demonstration data. In this paper explore how a rich environment can help to promote the learning of complex behavior. Specifically, we train agents in diverse environmental contexts, and find that this encourages the emergence of robust behaviours that perform well across a suite of tasks. We demonstrate this principle for locom"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1707.02286","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-07-07T17:56:57Z","cross_cats_sorted":[],"title_canon_sha256":"35b27057198b8b9a51a75dd41ab6f18625ec3b9cda631719bc0eaf32a5d0ad3d","abstract_canon_sha256":"84c853aa11cc877b74b5f68e779e0799f46d0c82f9b8e63eedcbbf9206c685c3"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:40:34.680359Z","signature_b64":"3Pw4+HqvIhFkoZnvBXpWyOVCq9Qzr4tXPdVm0zOXumgubP1UAyKRYQJBmLgPeZPJknDXkOTmcCCdFulrG4gZBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"98c39d4292fe07766910244a7c64b5d2671c0d1c51796e706a6ff3ea5c369782","last_reissued_at":"2026-05-18T00:40:34.679555Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:40:34.679555Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Emergence of Locomotion Behaviours in Rich Environments","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"David Silver, Dhruva TB, Greg Wayne, Jay Lemmon, Josh Merel, Martin Riedmiller, Nicolas Heess, S. M. Ali Eslami, Srinivasan Sriram, Tom Erez, Yuval Tassa, Ziyu Wang","submitted_at":"2017-07-07T17:56:57Z","abstract_excerpt":"The reinforcement learning paradigm allows, in principle, for complex behaviours to be learned directly from simple reward signals. In practice, however, it is common to carefully hand-design the reward function to encourage a particular solution, or to derive it from demonstration data. In this paper explore how a rich environment can help to promote the learning of complex behavior. Specifically, we train agents in diverse environmental contexts, and find that this encourages the emergence of robust behaviours that perform well across a suite of tasks. We demonstrate this principle for locom"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1707.02286","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1707.02286","created_at":"2026-05-18T00:40:34.679653+00:00"},{"alias_kind":"arxiv_version","alias_value":"1707.02286v2","created_at":"2026-05-18T00:40:34.679653+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1707.02286","created_at":"2026-05-18T00:40:34.679653+00:00"},{"alias_kind":"pith_short_12","alias_value":"TDBZ2QUS7YDX","created_at":"2026-05-18T12:31:46.661854+00:00"},{"alias_kind":"pith_short_16","alias_value":"TDBZ2QUS7YDXM2IQ","created_at":"2026-05-18T12:31:46.661854+00:00"},{"alias_kind":"pith_short_8","alias_value":"TDBZ2QUS","created_at":"2026-05-18T12:31:46.661854+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":10,"internal_anchor_count":7,"sample":[{"citing_arxiv_id":"1906.08649","citing_title":"Exploring Model-based Planning with Policy Networks","ref_index":16,"is_internal_anchor":true},{"citing_arxiv_id":"1906.10667","citing_title":"Reinforcement Learning with Competitive Ensembles of Information-Constrained Primitives","ref_index":16,"is_internal_anchor":true},{"citing_arxiv_id":"1907.02057","citing_title":"Benchmarking Model-Based Reinforcement Learning","ref_index":20,"is_internal_anchor":true},{"citing_arxiv_id":"1907.09466","citing_title":"An Actor-Critic-Attention Mechanism for Deep Reinforcement Learning in Multi-view Environments","ref_index":7,"is_internal_anchor":true},{"citing_arxiv_id":"2602.07832","citing_title":"rePIRL: Learn PRM with Inverse RL for LLM Reasoning","ref_index":9,"is_internal_anchor":true},{"citing_arxiv_id":"2605.18591","citing_title":"Randomized Advantage Transformation (RAT): Computing Natural Policy Gradients via Direct Backpropagation","ref_index":125,"is_internal_anchor":true},{"citing_arxiv_id":"2309.16797","citing_title":"Promptbreeder: Self-Referential Self-Improvement Via Prompt Evolution","ref_index":144,"is_internal_anchor":true},{"citing_arxiv_id":"2605.01833","citing_title":"Remote Action Generation: Remote Control with Minimal Communication","ref_index":7,"is_internal_anchor":false},{"citing_arxiv_id":"1910.00177","citing_title":"Advantage-Weighted Regression: Simple and Scalable Off-Policy Reinforcement Learning","ref_index":5,"is_internal_anchor":false},{"citing_arxiv_id":"2605.02320","citing_title":"ANO: A Principled Approach to Robust Policy Optimization","ref_index":9,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/TDBZ2QUS7YDXM2IQERFHYZFV2J","json":"https://pith.science/pith/TDBZ2QUS7YDXM2IQERFHYZFV2J.json","graph_json":"https://pith.science/api/pith-number/TDBZ2QUS7YDXM2IQERFHYZFV2J/graph.json","events_json":"https://pith.science/api/pith-number/TDBZ2QUS7YDXM2IQERFHYZFV2J/events.json","paper":"https://pith.science/paper/TDBZ2QUS"},"agent_actions":{"view_html":"https://pith.science/pith/TDBZ2QUS7YDXM2IQERFHYZFV2J","download_json":"https://pith.science/pith/TDBZ2QUS7YDXM2IQERFHYZFV2J.json","view_paper":"https://pith.science/paper/TDBZ2QUS","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1707.02286&json=true","fetch_graph":"https://pith.science/api/pith-number/TDBZ2QUS7YDXM2IQERFHYZFV2J/graph.json","fetch_events":"https://pith.science/api/pith-number/TDBZ2QUS7YDXM2IQERFHYZFV2J/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/TDBZ2QUS7YDXM2IQERFHYZFV2J/action/timestamp_anchor","attest_storage":"https://pith.science/pith/TDBZ2QUS7YDXM2IQERFHYZFV2J/action/storage_attestation","attest_author":"https://pith.science/pith/TDBZ2QUS7YDXM2IQERFHYZFV2J/action/author_attestation","sign_citation":"https://pith.science/pith/TDBZ2QUS7YDXM2IQERFHYZFV2J/action/citation_signature","submit_replication":"https://pith.science/pith/TDBZ2QUS7YDXM2IQERFHYZFV2J/action/replication_record"}},"created_at":"2026-05-18T00:40:34.679653+00:00","updated_at":"2026-05-18T00:40:34.679653+00:00"}