{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:X6AFUFR5K5NAZKRQFOXEB2NAPG","short_pith_number":"pith:X6AFUFR5","schema_version":"1.0","canonical_sha256":"bf805a163d575a0caa302bae40e9a0799b30e6b0957e1f2ac8ae4a310385b058","source":{"kind":"arxiv","id":"1812.11103","version":3},"attestation_state":"computed","paper":{"title":"Learning to Walk via Deep Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.RO","stat.ML"],"primary_cat":"cs.LG","authors_text":"Aurick Zhou, George Tucker, Jie Tan, Sehoon Ha, Sergey Levine, Tuomas Haarnoja","submitted_at":"2018-12-26T10:07:13Z","abstract_excerpt":"Deep reinforcement learning (deep RL) holds the promise of automating the acquisition of complex controllers that can map sensory inputs directly to low-level actions. In the domain of robotic locomotion, deep RL could enable learning locomotion skills with minimal engineering and without an explicit model of the robot dynamics. Unfortunately, applying deep RL to real-world robotic tasks is exceptionally difficult, primarily due to poor sample complexity and sensitivity to hyperparameters. While hyperparameters can be easily tuned in simulated domains, tuning may be prohibitively expensive on "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1812.11103","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-12-26T10:07:13Z","cross_cats_sorted":["cs.AI","cs.RO","stat.ML"],"title_canon_sha256":"d8261b8c7589615c4e548e7eca869219b1169efa6bba9a3948c2504a0dc2f909","abstract_canon_sha256":"7ba8b96594b0ee9a086a22b3deae5c71d5105b0d4373fed93ea7469b15e2f28e"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:42:58.533603Z","signature_b64":"5tiRf6329Y7GTMzB8uvun/smbGAm6WWjAma4W70wy+qTke41xdUaQi/M19s9lwrejzalBinJRsLglopnpSq8BQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"bf805a163d575a0caa302bae40e9a0799b30e6b0957e1f2ac8ae4a310385b058","last_reissued_at":"2026-05-17T23:42:58.532994Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:42:58.532994Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Learning to Walk via Deep Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.RO","stat.ML"],"primary_cat":"cs.LG","authors_text":"Aurick Zhou, George Tucker, Jie Tan, Sehoon Ha, Sergey Levine, Tuomas Haarnoja","submitted_at":"2018-12-26T10:07:13Z","abstract_excerpt":"Deep reinforcement learning (deep RL) holds the promise of automating the acquisition of complex controllers that can map sensory inputs directly to low-level actions. In the domain of robotic locomotion, deep RL could enable learning locomotion skills with minimal engineering and without an explicit model of the robot dynamics. Unfortunately, applying deep RL to real-world robotic tasks is exceptionally difficult, primarily due to poor sample complexity and sensitivity to hyperparameters. While hyperparameters can be easily tuned in simulated domains, tuning may be prohibitively expensive on "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1812.11103","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1812.11103","created_at":"2026-05-17T23:42:58.533084+00:00"},{"alias_kind":"arxiv_version","alias_value":"1812.11103v3","created_at":"2026-05-17T23:42:58.533084+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1812.11103","created_at":"2026-05-17T23:42:58.533084+00:00"},{"alias_kind":"pith_short_12","alias_value":"X6AFUFR5K5NA","created_at":"2026-05-18T12:33:01.666342+00:00"},{"alias_kind":"pith_short_16","alias_value":"X6AFUFR5K5NAZKRQ","created_at":"2026-05-18T12:33:01.666342+00:00"},{"alias_kind":"pith_short_8","alias_value":"X6AFUFR5","created_at":"2026-05-18T12:33:01.666342+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":3,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"2603.15759","citing_title":"Simulation Distillation: Pretraining World Models in Simulation for Rapid Real-World Adaptation","ref_index":12,"is_internal_anchor":true},{"citing_arxiv_id":"2604.02744","citing_title":"Learning Locomotion on Complex Terrain for Quadrupedal Robots with Foot Position Maps and Stability Rewards","ref_index":6,"is_internal_anchor":false},{"citing_arxiv_id":"2605.09595","citing_title":"Neuromorphic Reinforcement Learning for Quadruped Locomotion Control on Uneven Terrain","ref_index":9,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/X6AFUFR5K5NAZKRQFOXEB2NAPG","json":"https://pith.science/pith/X6AFUFR5K5NAZKRQFOXEB2NAPG.json","graph_json":"https://pith.science/api/pith-number/X6AFUFR5K5NAZKRQFOXEB2NAPG/graph.json","events_json":"https://pith.science/api/pith-number/X6AFUFR5K5NAZKRQFOXEB2NAPG/events.json","paper":"https://pith.science/paper/X6AFUFR5"},"agent_actions":{"view_html":"https://pith.science/pith/X6AFUFR5K5NAZKRQFOXEB2NAPG","download_json":"https://pith.science/pith/X6AFUFR5K5NAZKRQFOXEB2NAPG.json","view_paper":"https://pith.science/paper/X6AFUFR5","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1812.11103&json=true","fetch_graph":"https://pith.science/api/pith-number/X6AFUFR5K5NAZKRQFOXEB2NAPG/graph.json","fetch_events":"https://pith.science/api/pith-number/X6AFUFR5K5NAZKRQFOXEB2NAPG/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/X6AFUFR5K5NAZKRQFOXEB2NAPG/action/timestamp_anchor","attest_storage":"https://pith.science/pith/X6AFUFR5K5NAZKRQFOXEB2NAPG/action/storage_attestation","attest_author":"https://pith.science/pith/X6AFUFR5K5NAZKRQFOXEB2NAPG/action/author_attestation","sign_citation":"https://pith.science/pith/X6AFUFR5K5NAZKRQFOXEB2NAPG/action/citation_signature","submit_replication":"https://pith.science/pith/X6AFUFR5K5NAZKRQFOXEB2NAPG/action/replication_record"}},"created_at":"2026-05-17T23:42:58.533084+00:00","updated_at":"2026-05-17T23:42:58.533084+00:00"}