{"total":18,"items":[{"citing_arxiv_id":"2607.02205","ref_index":16,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Actuator Reality Shaping for Zero-Shot Sim-to-Real Robot Learning","primary_cat":"cs.RO","submitted_at":"2026-07-02T14:12:41+00:00","verdict":"CONDITIONAL","verdict_confidence":"MODERATE","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Actuator reality shaping uses a 2DOF controller to align real actuator closed-loop behavior with idealized simulation reference dynamics, enabling zero-shot sim-to-real policy deployment across multiple robot platforms.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.25179","ref_index":14,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Learning Perceptive Platform Adaptive Locomotion Controllers for Quadrupedal Robots","primary_cat":"cs.RO","submitted_at":"2026-06-23T21:10:42+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"Empirical comparison of blind, critic-perceptive, and fully perceptive variants of morphology-aware RL locomotion controllers shows critic-only perception improves robustness over blind baselines while remaining more stable under perception noise than full perception.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.21387","ref_index":14,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Long-Distance Real-World Navigation of the Legged-Wheeled Robot Go2-W Using Deep Reinforcement Learning","primary_cat":"cs.RO","submitted_at":"2026-06-19T12:53:37+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"A DRL locomotion controller extended from prior quadruped work enabled the Go2-W robot to complete 2.8 km of autonomous real-world navigation including mixed terrain and stairs.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.20645","ref_index":13,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"TACT-ful: Multi-Channel Terrain Affordance and Compliance Training for Payload-Robust Perceptive Humanoid Locomotion","primary_cat":"cs.RO","submitted_at":"2026-06-06T10:25:13+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"A multi-channel terrain affordance reward combined with lower-body compliance training via virtual wrenches enables end-to-end PPO-trained humanoid policies to walk at 1 m/s on 0.2 m risers with improved payload robustness.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.08059","ref_index":44,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Perceptive Behavior Foundation Model: Adapting Human Motion Priors to Robot-Centric Terrain","primary_cat":"cs.RO","submitted_at":"2026-06-06T08:46:44+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Perceptive BFM grounds human motion priors in robot terrain perception via terrain-conformal reference synthesis and teacher-student transfer from adapted to raw-reference tracking.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.06944","ref_index":23,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"T-GMP: Terrain-conditioned Generative Motion Priors for Versatile and Natural Humanoid Locomotion","primary_cat":"cs.RO","submitted_at":"2026-06-05T06:15:42+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"T-GMP learns a terrain-conditioned latent motion manifold via CVAE from demonstrations and integrates it into an adversarial pipeline with a foothold penalty for versatile, natural humanoid locomotion.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.05143","ref_index":27,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"HORIZON: Recoverability-Governed Curriculum for Physical-Domain Scaling","primary_cat":"cs.RO","submitted_at":"2026-06-03T17:50:02+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"HORIZON is a recoverability-governed checkpointed frontier curriculum for on-policy physical-domain scaling on quadruped locomotion that identifies three regularities: uneven widening, non-monotonic composition, and the necessity of joint on-policy interaction.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.01332","ref_index":5,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"S2M-Trek: From Single to Multi-Sphere Transport via Per-Frame Deep Sets on a Wheel-Legged Robot","primary_cat":"cs.RO","submitted_at":"2026-05-31T16:35:38+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Per-Frame Deep Sets enables scaling single-sphere to five-sphere transport on a quadruped by performing permutation-invariant pooling within each history frame, reaching 100% no-drop success in simulation where standard encoders plateau.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.02636","ref_index":5,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Too Much of a Good Thing: When sim2real Efforts Impede Policy Learning (And What to Do About It)","primary_cat":"cs.RO","submitted_at":"2026-05-30T22:17:04+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"Excessive sim2real focus impedes robotics policy learning via simulator lock-in; a kinematics-only sim2sim2real paradigm is proposed to restore exploration.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.26478","ref_index":9,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Efficient On-policy Visual-RL via Stochastic Decoupled Policy Gradient","primary_cat":"cs.RO","submitted_at":"2026-05-26T02:35:08+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"SDPG is a new on-policy visual RL algorithm that estimates gradients via stochastic perturbations of rollouts, achieving faster training and lower memory use than baselines on visual MuJoCo tasks while adding new robotics benchmarks and sim-to-real results.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.19503","ref_index":17,"ref_count":2,"confidence":0.88,"is_internal_anchor":false,"paper_title":"ARC-RL: A Reinforcement Learning Playground Inspired by ARC Raiders","primary_cat":"cs.RO","submitted_at":"2026-05-19T07:54:40+00:00","verdict":"ACCEPT","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"ARC-RL is a new suite of four MuJoCo continuous-control environments featuring game-inspired hexapod and quadruped morphologies, a single closed-form multi-component reward function, CPG demonstrators, and empirical comparisons of online and offline-to-online RL algorithms.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.16520","ref_index":151,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Global Convergence of Sampling-Based Nonconvex Optimization through Diffusion-Style Smoothing","primary_cat":"cs.LG","submitted_at":"2026-05-15T18:14:38+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Recasts sampling-based nonconvex optimization as smoothed gradient descent to obtain non-asymptotic convergence guarantees and introduces the DIDA annealed algorithm that converges to the global optimum.","context_count":1,"top_context_role":"background","top_context_polarity":"unclear","context_text":"Substituting these relationships from equations(146) and (147) into (148) and (149), and simplifying the exponents oft, we arrive at the final result as stated in equations (112) and (113): E[|∇xg(0)(x;t)−∇xg(x;t)|]≤λd N ( M−1 2 t−1 2 +M 0 L λ+M 1 2p (L λ )1+ 1 p t 1 2p +M 1 2 (L λ )2 t 1 2 ) (150) E[|∇xg(0)(x;t)−∇xg(x;t)|2]≤λ2d N ( V−1t−1+V 0 (L λ )2 +V 1 p (L λ )2+ 2 p t 1 p +V 1 (L λ )4 t ) (151) whereM−1 2 ,M 0,M 1 2 ,M 1 2p ,M 1 2 ,V−1,V 0,V 1,V 1 p are positive constants. 50 Given thatp∈(1, +∞), under worst case,p→1, plugging inp = 1into the above bound, termM 1 2p and V 1 p can be merged intoM1 2 andV 1 respectively. E[|∇xg(0)(x;t)−∇xg(x;t)|]≤λd N ( M−1 2 t−1 2 +M 0 L λ+M 1 2 (L λ )2 t 1 2 ) (152) E[|∇xg(0)(x;t)−∇xg(x;t)|2]≤λ2d N ( V−1t−1+V 0"},{"citing_arxiv_id":"2605.14411","ref_index":2,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Energy-Efficient Quadruped Locomotion with Compliant Feet","primary_cat":"cs.RO","submitted_at":"2026-05-14T05:54:13+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"Tuned foot compliance in quadruped robots lowers locomotion energy consumption by roughly 17 percent relative to rigid or overly soft designs.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.11138","ref_index":15,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"ViserDex: Visual Sim-to-Real for Robust Dexterous In-hand Reorientation","primary_cat":"cs.RO","submitted_at":"2026-04-13T07:50:41+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"A framework using 3D Gaussian Splatting for visual domain randomization enables robust monocular RGB-based dexterous in-hand reorientation on real hardware for multiple objects under varied lighting.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.02744","ref_index":9,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Learning Locomotion on Complex Terrain for Quadrupedal Robots with Foot Position Maps and Stability Rewards","primary_cat":"cs.RO","submitted_at":"2026-04-03T05:37:26+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Integrating foot position maps into heightmaps and adding a locomotion-stability reward in an attention-based RL framework improves quadrupedal success rates on both trained and out-of-domain complex terrains.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"To reliably deploy quadrupedal robots, robust locomotion strategies that can handle complex terrains are needed. Over the years classical optimization-based locomotion methods have been proposed to achieve locomotion on diverse terrain [1]-[5]. More recent methods based on reinforcement learning (RL) propose to learn locomotion through trial and error [6]-[9]. For locomotion over sparse terrain, much research has been conducted on optimization-based methods [5], [10]- [12], RL-based methods [13]-[16], and hybrid methods [17]- [19]. Optimization-based methods exhibit high foot place- ment precision but lack robustness against real world noise and uncertainties. On the other hand, RL-based methods excel in robustness but lack foot placement precision."},{"citing_arxiv_id":"2602.06382","ref_index":32,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Now You See That: Learning End-to-End Humanoid Locomotion from Raw Pixels","primary_cat":"cs.RO","submitted_at":"2026-02-06T04:34:20+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"An end-to-end policy learns robust humanoid locomotion directly from noisy depth images via high-fidelity sensor simulation, vision-aware distillation from privileged maps, and terrain-specific multi-critic reward shaping.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2410.13149","ref_index":13,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Power in Numbers: Primitive Algorithm for Swarm Robot Navigation in Unknown Environments","primary_cat":"cs.RO","submitted_at":"2024-10-17T02:08:34+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":3.0,"formal_verification":"none","one_line_summary":"Swarm robots navigate unknown environments using goal direction and neighbor positions only, with mathematical validation, potential-field simulations, and sound-field robot experiments.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2406.07069","ref_index":15,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Optimal Gait Control for a Tendon-driven Soft Quadruped Robot by Model-based Reinforcement Learning","primary_cat":"cs.RO","submitted_at":"2024-06-11T08:56:08+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"Develops and tests a model-based RL controller with post-training for gait in a tendon-driven soft quadruped, reporting improved efficiency and robustness over benchmarks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null}],"limit":50,"offset":0}