{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:5UEZA2UHFCXB5IND22XSI5Q56F","short_pith_number":"pith:5UEZA2UH","schema_version":"1.0","canonical_sha256":"ed09906a8728ae1ea1a3d6af24761df1698479b95e2a6edfb02cb17222fc0d15","source":{"kind":"arxiv","id":"2605.15480","version":1},"attestation_state":"computed","paper":{"title":"Residual Reinforcement Learning for Robot Teleoperation under Stochastic Delays","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"An LSTM state estimator paired with residual RL produces stable robot teleoperation under stochastic delays.","cross_cats":["cs.AI"],"primary_cat":"cs.RO","authors_text":"Kaize Deng, Zewen Yang","submitted_at":"2026-05-14T23:45:59Z","abstract_excerpt":"Stochastic communication delays in teleoperation introduce signal discontinuities that undermine control stability and degrade control performance. Consequently, the conventional reinforcement learning (RL) methods struggle with the delayed observations due to the delay-induced observations, leading to high-frequency chattering. To address this, we propose a hybrid control framework, delay-resilient RL, integrating a state estimator utilizing Long Short-Term Memory (LSTM) with a residual RL policy, which is resilient to stochastic delays. The LSTM reconstructs smooth, continuous state estimate"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":true,"formal_links_present":true},"canonical_record":{"source":{"id":"2605.15480","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2026-05-14T23:45:59Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"31cef72b916e8610ed3473378c8a259332185108d2e9fea2c7dd336baecb30c1","abstract_canon_sha256":"4c7c11da28e3709e7c21c8d78e8c32f40af3091d5278f9da7f3cf61d0ad606f3"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:01:00.711685Z","signature_b64":"Ch9eGg4xP5euBmSVSJd3a9u1OEMH+TWwFOg4h/P/yMK/7VfgXl06xJWOl9iNTibN70vRA0qN5z1usxf7roHHBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"ed09906a8728ae1ea1a3d6af24761df1698479b95e2a6edfb02cb17222fc0d15","last_reissued_at":"2026-05-20T00:01:00.710865Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:01:00.710865Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Residual Reinforcement Learning for Robot Teleoperation under Stochastic Delays","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"An LSTM state estimator paired with residual RL produces stable robot teleoperation under stochastic delays.","cross_cats":["cs.AI"],"primary_cat":"cs.RO","authors_text":"Kaize Deng, Zewen Yang","submitted_at":"2026-05-14T23:45:59Z","abstract_excerpt":"Stochastic communication delays in teleoperation introduce signal discontinuities that undermine control stability and degrade control performance. Consequently, the conventional reinforcement learning (RL) methods struggle with the delayed observations due to the delay-induced observations, leading to high-frequency chattering. To address this, we propose a hybrid control framework, delay-resilient RL, integrating a state estimator utilizing Long Short-Term Memory (LSTM) with a residual RL policy, which is resilient to stochastic delays. The LSTM reconstructs smooth, continuous state estimate"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Experimental validation on Franka Panda robots demonstrates that our approach significantly outperforms the state-of-the-art baselines, ensuring robust and stable teleoperation even under high-variance stochastic delays.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The LSTM can reliably reconstruct smooth, continuous state estimates from delayed and discontinuous observations in a way that does not introduce errors large enough to destabilize the residual RL policy or degrade overall control performance.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"An LSTM state estimator paired with a residual RL policy enables robust robot teleoperation under stochastic delays by reconstructing continuous states and learning compensatory torques, outperforming baselines on Franka Panda robots.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"An LSTM state estimator paired with residual RL produces stable robot teleoperation under stochastic delays.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"6294b24b1de3fe2bfaf0e6de062596bdb94562e03be6b20cc454871819ec2631"},"source":{"id":"2605.15480","kind":"arxiv","version":1},"verdict":{"id":"9bd4cf8d-c454-4b7e-b7d7-320b6497894d","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-19T14:26:07.129188Z","strongest_claim":"Experimental validation on Franka Panda robots demonstrates that our approach significantly outperforms the state-of-the-art baselines, ensuring robust and stable teleoperation even under high-variance stochastic delays.","one_line_summary":"An LSTM state estimator paired with a residual RL policy enables robust robot teleoperation under stochastic delays by reconstructing continuous states and learning compensatory torques, outperforming baselines on Franka Panda robots.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The LSTM can reliably reconstruct smooth, continuous state estimates from delayed and discontinuous observations in a way that does not introduce errors large enough to destabilize the residual RL policy or degrade overall control performance.","pith_extraction_headline":"An LSTM state estimator paired with residual RL produces stable robot teleoperation under stochastic delays."},"integrity":{"clean":false,"summary":{"advisory":1,"critical":0,"by_detector":{"doi_compliance":{"total":1,"advisory":1,"critical":0,"informational":0}},"informational":0},"endpoint":"/pith/2605.15480/integrity.json","findings":[{"note":"DOI in the printed bibliography is fragmented by whitespace or line breaks. A longer candidate (10.1109/TCYB.2020.2988820.Appendix) was visible in the surrounding text but could not be confirmed against doi.org as printed.","detector":"doi_compliance","severity":"advisory","ref_index":14,"audited_at":"2026-05-19T14:37:41.651882Z","detected_doi":"10.1109/TCYB.2020.2988820.Appendix","finding_type":"recoverable_identifier","verdict_class":"incontrovertible","detected_arxiv_id":null}],"available":true,"detectors_run":[{"name":"cited_work_retraction","ran_at":"2026-05-19T15:22:01.589566Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"doi_compliance","ran_at":"2026-05-19T14:37:41.651882Z","status":"completed","version":"1.0.0","findings_count":1},{"name":"doi_title_agreement","ran_at":"2026-05-19T14:31:17.483207Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"claim_evidence","ran_at":"2026-05-19T14:21:54.080376Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"citation_quote_validity","ran_at":"2026-05-19T13:49:41.405814Z","status":"skipped","version":"0.1.0","findings_count":0},{"name":"ai_meta_artifact","ran_at":"2026-05-19T13:33:22.656112Z","status":"skipped","version":"1.0.0","findings_count":0}],"snapshot_sha256":"43fa3dc8d218bb36766e8057e504e8c0953ed32c8dfd794a06f79a09b254aa0b"},"references":{"count":14,"sample":[{"doi":"","year":2020,"title":"Barde, P., Roy, J., de La Saulece, ´E., Calauz` enes, C., and Moinard, V. (2020). At human speed: Deep re- inforcement learning with action delay. InInternational Conference on Learning Representation","work_id":"4ad40f71-be83-4077-b16a-59a79793d40f","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2018,"title":"Choi, P.J., Oskouian, R.J., and Tubbs, R.S. (2018). Telesurgery: Past, Present, and Future.Cureus, 10(5)","work_id":"85463e11-af4a-44b0-a317-7e524a714b27","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2026,"title":"Huang, B., Gong, Y., Yang, Z., Ren, T., and Figueredo, L. (2026). Contact-Safe Reinforcement Learning with ProMP Reparameterization and Energy Awareness","work_id":"af14a616-c486-4059-b547-7008094032a7","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"10.1016/j.arcontrol.2019.06.005","year":2019,"title":"Huang, J., Chen, J., and Sun, C. (2019). Reinforcement learning in robotic teleoperation with time delay: A survey.Annual Reviews in Control, 48, 189–203. doi: 10.1016/j.arcontrol.2019.06.005","work_id":"e6e5c763-112c-4ec8-8be0-341ae80a4b86","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"10.1109/icra.2019.8794127","year":2019,"title":"Residual Reinforce- ment Learning for Robot Control","work_id":"7468142b-c0a6-4091-9a0a-8c6768d54afd","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":14,"snapshot_sha256":"3e2ddd8d4faf4b96dd010e0cda8302a727244953f30c285c21f5648a9f56ab3c","internal_anchors":0},"formal_canon":{"evidence_count":2,"snapshot_sha256":"307ece386c3ddbae403e76988118fb394ffa1e534cbfc0b146c5ea13738207c2"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.15480","created_at":"2026-05-20T00:01:00.711003+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.15480v1","created_at":"2026-05-20T00:01:00.711003+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.15480","created_at":"2026-05-20T00:01:00.711003+00:00"},{"alias_kind":"pith_short_12","alias_value":"5UEZA2UHFCXB","created_at":"2026-05-20T00:01:00.711003+00:00"},{"alias_kind":"pith_short_16","alias_value":"5UEZA2UHFCXB5IND","created_at":"2026-05-20T00:01:00.711003+00:00"},{"alias_kind":"pith_short_8","alias_value":"5UEZA2UH","created_at":"2026-05-20T00:01:00.711003+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":2,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/5UEZA2UHFCXB5IND22XSI5Q56F","json":"https://pith.science/pith/5UEZA2UHFCXB5IND22XSI5Q56F.json","graph_json":"https://pith.science/api/pith-number/5UEZA2UHFCXB5IND22XSI5Q56F/graph.json","events_json":"https://pith.science/api/pith-number/5UEZA2UHFCXB5IND22XSI5Q56F/events.json","paper":"https://pith.science/paper/5UEZA2UH"},"agent_actions":{"view_html":"https://pith.science/pith/5UEZA2UHFCXB5IND22XSI5Q56F","download_json":"https://pith.science/pith/5UEZA2UHFCXB5IND22XSI5Q56F.json","view_paper":"https://pith.science/paper/5UEZA2UH","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.15480&json=true","fetch_graph":"https://pith.science/api/pith-number/5UEZA2UHFCXB5IND22XSI5Q56F/graph.json","fetch_events":"https://pith.science/api/pith-number/5UEZA2UHFCXB5IND22XSI5Q56F/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/5UEZA2UHFCXB5IND22XSI5Q56F/action/timestamp_anchor","attest_storage":"https://pith.science/pith/5UEZA2UHFCXB5IND22XSI5Q56F/action/storage_attestation","attest_author":"https://pith.science/pith/5UEZA2UHFCXB5IND22XSI5Q56F/action/author_attestation","sign_citation":"https://pith.science/pith/5UEZA2UHFCXB5IND22XSI5Q56F/action/citation_signature","submit_replication":"https://pith.science/pith/5UEZA2UHFCXB5IND22XSI5Q56F/action/replication_record"}},"created_at":"2026-05-20T00:01:00.711003+00:00","updated_at":"2026-05-20T00:01:00.711003+00:00"}