{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:CQ3BKGTDLUN4BXO7XILEQPMECW","short_pith_number":"pith:CQ3BKGTD","schema_version":"1.0","canonical_sha256":"1436151a635d1bc0dddfba16483d8415811ce85e3ae8887bf364a0f6864ed474","source":{"kind":"arxiv","id":"1804.10332","version":2},"attestation_state":"computed","paper":{"title":"Sim-to-Real: Learning Agile Locomotion For Quadruped Robots","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.RO","authors_text":"Atil Iscen, Danijar Hafner, Erwin Coumans, Jie Tan, Steven Bohez, Tingnan Zhang, Vincent Vanhoucke, Yunfei Bai","submitted_at":"2018-04-27T03:42:55Z","abstract_excerpt":"Designing agile locomotion for quadruped robots often requires extensive expertise and tedious manual tuning. In this paper, we present a system to automate this process by leveraging deep reinforcement learning techniques. Our system can learn quadruped locomotion from scratch using simple reward signals. In addition, users can provide an open loop reference to guide the learning process when more control over the learned gait is needed. The control policies are learned in a physics simulator and then deployed on real robots. In robotics, policies trained in simulation often do not transfer t"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1804.10332","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2018-04-27T03:42:55Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"99b1a8dedf87a7ad7589b1f49a45befdc58caad26c3d5872955939448777fdd1","abstract_canon_sha256":"99f3d7b91b95a3eb0cb59e80ede37584a5e8d18ae880e90b1e0af6a16261cce2"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:15:46.341017Z","signature_b64":"l01ZXoaOGfkdkALL2BS2K39AzKVfBUiTOL2NRtymxEFF2DxECZOby3f40UuW10cyxBTd+PCAx1ovnm3wJlOyAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"1436151a635d1bc0dddfba16483d8415811ce85e3ae8887bf364a0f6864ed474","last_reissued_at":"2026-05-18T00:15:46.340345Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:15:46.340345Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Sim-to-Real: Learning Agile Locomotion For Quadruped Robots","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.RO","authors_text":"Atil Iscen, Danijar Hafner, Erwin Coumans, Jie Tan, Steven Bohez, Tingnan Zhang, Vincent Vanhoucke, Yunfei Bai","submitted_at":"2018-04-27T03:42:55Z","abstract_excerpt":"Designing agile locomotion for quadruped robots often requires extensive expertise and tedious manual tuning. In this paper, we present a system to automate this process by leveraging deep reinforcement learning techniques. Our system can learn quadruped locomotion from scratch using simple reward signals. In addition, users can provide an open loop reference to guide the learning process when more control over the learned gait is needed. The control policies are learned in a physics simulator and then deployed on real robots. In robotics, policies trained in simulation often do not transfer t"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1804.10332","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1804.10332","created_at":"2026-05-18T00:15:46.340461+00:00"},{"alias_kind":"arxiv_version","alias_value":"1804.10332v2","created_at":"2026-05-18T00:15:46.340461+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1804.10332","created_at":"2026-05-18T00:15:46.340461+00:00"},{"alias_kind":"pith_short_12","alias_value":"CQ3BKGTDLUN4","created_at":"2026-05-18T12:32:16.446611+00:00"},{"alias_kind":"pith_short_16","alias_value":"CQ3BKGTDLUN4BXO7","created_at":"2026-05-18T12:32:16.446611+00:00"},{"alias_kind":"pith_short_8","alias_value":"CQ3BKGTD","created_at":"2026-05-18T12:32:16.446611+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":15,"internal_anchor_count":8,"sample":[{"citing_arxiv_id":"1907.04796","citing_title":"Bayesian Optimization in Variational Latent Spaces with Dynamic Compression","ref_index":27,"is_internal_anchor":true},{"citing_arxiv_id":"2605.09183","citing_title":"Learning When to Stop: Selective Imitation Learning Under Arbitrary Dynamics Shift","ref_index":34,"is_internal_anchor":true},{"citing_arxiv_id":"2509.18964","citing_title":"Central Limit Theorems for Asynchronous Averaged Q-Learning","ref_index":10,"is_internal_anchor":true},{"citing_arxiv_id":"2510.03599","citing_title":"Learning to Act Through Contact: A Unified View of Multi-Task Robot Learning","ref_index":20,"is_internal_anchor":true},{"citing_arxiv_id":"2603.04531","citing_title":"PTLD: Sim-to-real Privileged Tactile Latent Distillation for Dexterous Manipulation","ref_index":7,"is_internal_anchor":true},{"citing_arxiv_id":"2506.07339","citing_title":"Real-Time Execution of Action Chunking Flow Policies","ref_index":57,"is_internal_anchor":true},{"citing_arxiv_id":"1910.07113","citing_title":"Solving Rubik's Cube with a Robot Hand","ref_index":105,"is_internal_anchor":true},{"citing_arxiv_id":"2605.13058","citing_title":"MUJICA: Multi-skill Unified Joint Integration of Control Architecture for Wheeled-Legged Robots","ref_index":16,"is_internal_anchor":true},{"citing_arxiv_id":"2605.09183","citing_title":"Learning When to Stop: Selective Imitation Learning Under Arbitrary Dynamics Shift","ref_index":34,"is_internal_anchor":false},{"citing_arxiv_id":"2605.09595","citing_title":"Neuromorphic Reinforcement Learning for Quadruped Locomotion Control on Uneven Terrain","ref_index":39,"is_internal_anchor":false},{"citing_arxiv_id":"2005.01643","citing_title":"Offline Reinforcement Learning: Tutorial, Review, and Perspectives on Open Problems","ref_index":4,"is_internal_anchor":false},{"citing_arxiv_id":"2605.01716","citing_title":"Towards Real-time Control of a CartPole System on a Quantum Computer","ref_index":12,"is_internal_anchor":false},{"citing_arxiv_id":"2604.10165","citing_title":"MoRI: Mixture of RL and IL Experts for Long-Horizon Manipulation Tasks","ref_index":37,"is_internal_anchor":false},{"citing_arxiv_id":"2604.05828","citing_title":"Precise Aggressive Aerial Maneuvers with Sensorimotor Policies","ref_index":60,"is_internal_anchor":false},{"citing_arxiv_id":"2604.15289","citing_title":"Abstract Sim2Real through Approximate Information States","ref_index":32,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/CQ3BKGTDLUN4BXO7XILEQPMECW","json":"https://pith.science/pith/CQ3BKGTDLUN4BXO7XILEQPMECW.json","graph_json":"https://pith.science/api/pith-number/CQ3BKGTDLUN4BXO7XILEQPMECW/graph.json","events_json":"https://pith.science/api/pith-number/CQ3BKGTDLUN4BXO7XILEQPMECW/events.json","paper":"https://pith.science/paper/CQ3BKGTD"},"agent_actions":{"view_html":"https://pith.science/pith/CQ3BKGTDLUN4BXO7XILEQPMECW","download_json":"https://pith.science/pith/CQ3BKGTDLUN4BXO7XILEQPMECW.json","view_paper":"https://pith.science/paper/CQ3BKGTD","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1804.10332&json=true","fetch_graph":"https://pith.science/api/pith-number/CQ3BKGTDLUN4BXO7XILEQPMECW/graph.json","fetch_events":"https://pith.science/api/pith-number/CQ3BKGTDLUN4BXO7XILEQPMECW/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/CQ3BKGTDLUN4BXO7XILEQPMECW/action/timestamp_anchor","attest_storage":"https://pith.science/pith/CQ3BKGTDLUN4BXO7XILEQPMECW/action/storage_attestation","attest_author":"https://pith.science/pith/CQ3BKGTDLUN4BXO7XILEQPMECW/action/author_attestation","sign_citation":"https://pith.science/pith/CQ3BKGTDLUN4BXO7XILEQPMECW/action/citation_signature","submit_replication":"https://pith.science/pith/CQ3BKGTDLUN4BXO7XILEQPMECW/action/replication_record"}},"created_at":"2026-05-18T00:15:46.340461+00:00","updated_at":"2026-05-18T00:15:46.340461+00:00"}