{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:ADI2R24GDUMXRMYWBJFDG34VU7","short_pith_number":"pith:ADI2R24G","schema_version":"1.0","canonical_sha256":"00d1a8eb861d1978b3160a4a336f95a7cfbbf9a09f40ffdc104300fd4cd6cea6","source":{"kind":"arxiv","id":"1812.06298","version":2},"attestation_state":"computed","paper":{"title":"Residual Policy Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.RO","authors_text":"Josh Tenenbaum, Kelsey Allen, Leslie Kaelbling, Tom Silver","submitted_at":"2018-12-15T14:47:21Z","abstract_excerpt":"We present Residual Policy Learning (RPL): a simple method for improving nondifferentiable policies using model-free deep reinforcement learning. RPL thrives in complex robotic manipulation tasks where good but imperfect controllers are available. In these tasks, reinforcement learning from scratch remains data-inefficient or intractable, but learning a residual on top of the initial controller can yield substantial improvements. We study RPL in six challenging MuJoCo tasks involving partial observability, sensor noise, model misspecification, and controller miscalibration. For initial control"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1812.06298","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2018-12-15T14:47:21Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"b9887412f3287d9e23f834b3dcbebd9b38a390077abfcd986052d4a5d356073a","abstract_canon_sha256":"7a6fec54c379495a3e832d84dab2aea696fe4df7257f6e506f49fba52e9abce9"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:57:02.403211Z","signature_b64":"O0bGRBLcx9NTU0f2p4F1apYG5V1aYWJsLPKgQ8X/KIfNEkVtXHDqR+BLfanwTZAnLOBD9GdOgd+wZgpE0gtdCQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"00d1a8eb861d1978b3160a4a336f95a7cfbbf9a09f40ffdc104300fd4cd6cea6","last_reissued_at":"2026-05-17T23:57:02.402520Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:57:02.402520Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Residual Policy Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.RO","authors_text":"Josh Tenenbaum, Kelsey Allen, Leslie Kaelbling, Tom Silver","submitted_at":"2018-12-15T14:47:21Z","abstract_excerpt":"We present Residual Policy Learning (RPL): a simple method for improving nondifferentiable policies using model-free deep reinforcement learning. RPL thrives in complex robotic manipulation tasks where good but imperfect controllers are available. In these tasks, reinforcement learning from scratch remains data-inefficient or intractable, but learning a residual on top of the initial controller can yield substantial improvements. We study RPL in six challenging MuJoCo tasks involving partial observability, sensor noise, model misspecification, and controller miscalibration. For initial control"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1812.06298","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1812.06298","created_at":"2026-05-17T23:57:02.402646+00:00"},{"alias_kind":"arxiv_version","alias_value":"1812.06298v2","created_at":"2026-05-17T23:57:02.402646+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1812.06298","created_at":"2026-05-17T23:57:02.402646+00:00"},{"alias_kind":"pith_short_12","alias_value":"ADI2R24GDUMX","created_at":"2026-05-18T12:32:13.499390+00:00"},{"alias_kind":"pith_short_16","alias_value":"ADI2R24GDUMXRMYW","created_at":"2026-05-18T12:32:13.499390+00:00"},{"alias_kind":"pith_short_8","alias_value":"ADI2R24G","created_at":"2026-05-18T12:32:13.499390+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":11,"internal_anchor_count":5,"sample":[{"citing_arxiv_id":"2605.22082","citing_title":"CoRMA: Contrastive RMA for Contact-Rich Meta-Adaptation","ref_index":13,"is_internal_anchor":true},{"citing_arxiv_id":"2605.19919","citing_title":"Beyond Action Residuals: Real-World Robot Policy Steering via Bottleneck Latent Reinforcement Learning","ref_index":49,"is_internal_anchor":true},{"citing_arxiv_id":"2602.22474","citing_title":"When to Act, Ask, or Learn: Uncertainty-Aware Policy Steering","ref_index":24,"is_internal_anchor":true},{"citing_arxiv_id":"2603.15757","citing_title":"You've Got a Golden Ticket: Improving Generative Robot Policies With A Single Noise Vector","ref_index":34,"is_internal_anchor":true},{"citing_arxiv_id":"2603.15956","citing_title":"ExpertGen: Scalable Sim-to-Real Expert Policy Learning from Imperfect Behavior Priors","ref_index":29,"is_internal_anchor":true},{"citing_arxiv_id":"2605.11369","citing_title":"Dynamic Full-body Motion Agent with Object Interaction via Blending Pre-trained Modular Controllers","ref_index":47,"is_internal_anchor":false},{"citing_arxiv_id":"2604.27411","citing_title":"Detecting is Easy, Adapting is Hard: Local Expert Growth for Visual Model-Based Reinforcement Learning under Distribution Shift","ref_index":9,"is_internal_anchor":false},{"citing_arxiv_id":"2605.05172","citing_title":"When Life Gives You BC, Make Q-functions: Extracting Q-values from Behavior Cloning for On-Robot Reinforcement Learning","ref_index":52,"is_internal_anchor":false},{"citing_arxiv_id":"2604.07945","citing_title":"Incremental Residual Reinforcement Learning Toward Real-World Learning for Social Navigation","ref_index":12,"is_internal_anchor":false},{"citing_arxiv_id":"2604.17787","citing_title":"AnchorRefine: Synergy-Manipulation Based on Trajectory Anchor and Residual Refinement for Vision-Language-Action Models","ref_index":40,"is_internal_anchor":false},{"citing_arxiv_id":"2604.17919","citing_title":"Fisher Decorator: Refining Flow Policy via a Local Transport Map","ref_index":68,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/ADI2R24GDUMXRMYWBJFDG34VU7","json":"https://pith.science/pith/ADI2R24GDUMXRMYWBJFDG34VU7.json","graph_json":"https://pith.science/api/pith-number/ADI2R24GDUMXRMYWBJFDG34VU7/graph.json","events_json":"https://pith.science/api/pith-number/ADI2R24GDUMXRMYWBJFDG34VU7/events.json","paper":"https://pith.science/paper/ADI2R24G"},"agent_actions":{"view_html":"https://pith.science/pith/ADI2R24GDUMXRMYWBJFDG34VU7","download_json":"https://pith.science/pith/ADI2R24GDUMXRMYWBJFDG34VU7.json","view_paper":"https://pith.science/paper/ADI2R24G","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1812.06298&json=true","fetch_graph":"https://pith.science/api/pith-number/ADI2R24GDUMXRMYWBJFDG34VU7/graph.json","fetch_events":"https://pith.science/api/pith-number/ADI2R24GDUMXRMYWBJFDG34VU7/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/ADI2R24GDUMXRMYWBJFDG34VU7/action/timestamp_anchor","attest_storage":"https://pith.science/pith/ADI2R24GDUMXRMYWBJFDG34VU7/action/storage_attestation","attest_author":"https://pith.science/pith/ADI2R24GDUMXRMYWBJFDG34VU7/action/author_attestation","sign_citation":"https://pith.science/pith/ADI2R24GDUMXRMYWBJFDG34VU7/action/citation_signature","submit_replication":"https://pith.science/pith/ADI2R24GDUMXRMYWBJFDG34VU7/action/replication_record"}},"created_at":"2026-05-17T23:57:02.402646+00:00","updated_at":"2026-05-17T23:57:02.402646+00:00"}