{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:ZSPSTNHOEYX7RY7VV4ZN6WS665","short_pith_number":"pith:ZSPSTNHO","canonical_record":{"source":{"id":"2605.19919","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2026-05-19T14:43:26Z","cross_cats_sorted":[],"title_canon_sha256":"b9fc345ea597241c6549380f28da65ac00a67a7ff69577e3d236b8956b1a7608","abstract_canon_sha256":"bf2ed375a3c6d9ad5077d395e12ab72998bddafb1f7b9c78448744bfa33844a9"},"schema_version":"1.0"},"canonical_sha256":"cc9f29b4ee262ff8e3f5af32df5a5ef7458832b25a291e6bda328907ec57a7f9","source":{"kind":"arxiv","id":"2605.19919","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.19919","created_at":"2026-05-20T02:05:55Z"},{"alias_kind":"arxiv_version","alias_value":"2605.19919v1","created_at":"2026-05-20T02:05:55Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.19919","created_at":"2026-05-20T02:05:55Z"},{"alias_kind":"pith_short_12","alias_value":"ZSPSTNHOEYX7","created_at":"2026-05-20T02:05:55Z"},{"alias_kind":"pith_short_16","alias_value":"ZSPSTNHOEYX7RY7V","created_at":"2026-05-20T02:05:55Z"},{"alias_kind":"pith_short_8","alias_value":"ZSPSTNHO","created_at":"2026-05-20T02:05:55Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:ZSPSTNHOEYX7RY7VV4ZN6WS665","target":"record","payload":{"canonical_record":{"source":{"id":"2605.19919","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2026-05-19T14:43:26Z","cross_cats_sorted":[],"title_canon_sha256":"b9fc345ea597241c6549380f28da65ac00a67a7ff69577e3d236b8956b1a7608","abstract_canon_sha256":"bf2ed375a3c6d9ad5077d395e12ab72998bddafb1f7b9c78448744bfa33844a9"},"schema_version":"1.0"},"canonical_sha256":"cc9f29b4ee262ff8e3f5af32df5a5ef7458832b25a291e6bda328907ec57a7f9","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T02:05:55.293876Z","signature_b64":"75l4YgzPVl9zXNvqMFyVg8LFQJEGG7CZIt8zdtMaq5fwUj99ab7itjaPSwW35PbuFR43fHmNy7PSXm56K6M0DA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"cc9f29b4ee262ff8e3f5af32df5a5ef7458832b25a291e6bda328907ec57a7f9","last_reissued_at":"2026-05-20T02:05:55.292774Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T02:05:55.292774Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.19919","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T02:05:55Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"OTt/VLa7xtX8aVCZUlk/8UUpbQ7qlezAYCxByI9DzLFNi3ZyvPJV7ymlysff42Un+mwjFZpIbyjqTRKTDTbDAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-21T11:58:15.557382Z"},"content_sha256":"38489401fff9790f3d05fc8273945b7e5dd593c0c99f0794b36180e96bef04f6","schema_version":"1.0","event_id":"sha256:38489401fff9790f3d05fc8273945b7e5dd593c0c99f0794b36180e96bef04f6"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:ZSPSTNHOEYX7RY7VV4ZN6WS665","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Beyond Action Residuals: Real-World Robot Policy Steering via Bottleneck Latent Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.RO","authors_text":"Dongjie Yu, Huazhe Xu, Jia Pan, Kun Lei, Zhennan Jiang","submitted_at":"2026-05-19T14:43:26Z","abstract_excerpt":"Pretrained imitation policies have become a strong foundation for robot manipulation, but they often require online improvement to overcome execution errors, limited dataset coverage, and deployment mismatch. A central question is therefore how reinforcement learning (RL) should adapt policies after offline pretraining. Existing lightweight methods commonly apply residual corrections directly in action space, but this often leads to noisy and poorly structured exploration. In this work, we propose Z-Perturbation Reinforcement Learning (ZPRL), an approach that steers pretrained policies through"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.19919","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.19919/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T02:05:55Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"I2K2DXTdwwm94SKI+RYdmH0VAK5pnuf+HNu5piQk+aLBj40MlSP/RqweUOV4+SwhXS2WhbGpqMsfDOE3JOALAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-21T11:58:15.558053Z"},"content_sha256":"734f37a8273ff5c92c9e5076dacc4bc6ca621dce450232c0fe615a51a739b4f8","schema_version":"1.0","event_id":"sha256:734f37a8273ff5c92c9e5076dacc4bc6ca621dce450232c0fe615a51a739b4f8"},{"event_type":"integrity_finding","subject_pith_number":"pith:2026:ZSPSTNHOEYX7RY7VV4ZN6WS665","target":"integrity","payload":{"note":"DOI in the printed bibliography is fragmented by whitespace or line breaks. A longer candidate (10.1162/necoa) was visible in the surrounding text but could not be confirmed against doi.org as printed.","snippet":"A. J. Ijspeert, J. Nakanishi, H. Hoffmannet al., “Dynamical movement primitives: Learning attractor models for motor behaviors,”Neural Comput., vol. 25, no. 2, pp. 328–373, 02 2013. [Online]. Available: https://doi.org/10.1162/NECO a 00393","arxiv_id":"2605.19919","detector":"doi_compliance","evidence":{"ref_index":27,"verdict_class":"incontrovertible","resolved_title":null,"printed_excerpt":"A. J. Ijspeert, J. Nakanishi, H. Hoffmannet al., “Dynamical movement primitives: Learning attractor models for motor behaviors,”Neural Comput., vol. 25, no. 2, pp. 328–373, 02 2013. [Online]. Available: https://doi.org/10.1162/NECO a 00393","reconstructed_doi":"10.1162/necoa"},"severity":"advisory","ref_index":27,"audited_at":"2026-05-20T05:19:01.947726Z","event_type":"pith.integrity.v1","detected_doi":"10.1162/necoa","detector_url":"https://pith.science/pith-integrity-protocol#doi_compliance","external_url":null,"finding_type":"recoverable_identifier","evidence_hash":"0510728eb9eefcb3bc28bdf860c29260bad4a80b4ea63be2afc5256c8bc39bac","paper_version":1,"verdict_class":"incontrovertible","resolved_title":null,"detector_version":"1.0.0","detected_arxiv_id":null,"integrity_event_id":4803,"payload_sha256":"7f72615a1470fec5919ca0c2df8afd17ff913c7fb8509e66ba6967be6f9da979","signature_b64":"EskSK8Bwa08YZg6mKuImZ7wO5ItcSxC0APrIlgRngk83cv5JDlEmUMeIZuwFsMtf+c64zZn9mFkU5apyaGdsDQ==","signing_key_id":"pith-v1-2026-05"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T05:22:43Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"yATgZXKNr477Hsw2FQ3L1L/1ju1JJOuB/57/LSd6yAwuVtVKfTz8C5NM1uqME3Y2qCvAbipvz/qn5homxWgiBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-21T11:58:15.559569Z"},"content_sha256":"1063301f3e35417e1a6dc428de27d14ce3f1d4d8547d81cfab569b542ee00e7f","schema_version":"1.0","event_id":"sha256:1063301f3e35417e1a6dc428de27d14ce3f1d4d8547d81cfab569b542ee00e7f"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/ZSPSTNHOEYX7RY7VV4ZN6WS665/bundle.json","state_url":"https://pith.science/pith/ZSPSTNHOEYX7RY7VV4ZN6WS665/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/ZSPSTNHOEYX7RY7VV4ZN6WS665/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-21T11:58:15Z","links":{"resolver":"https://pith.science/pith/ZSPSTNHOEYX7RY7VV4ZN6WS665","bundle":"https://pith.science/pith/ZSPSTNHOEYX7RY7VV4ZN6WS665/bundle.json","state":"https://pith.science/pith/ZSPSTNHOEYX7RY7VV4ZN6WS665/state.json","well_known_bundle":"https://pith.science/.well-known/pith/ZSPSTNHOEYX7RY7VV4ZN6WS665/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:ZSPSTNHOEYX7RY7VV4ZN6WS665","merge_version":"pith-open-graph-merge-v1","event_count":3,"valid_event_count":3,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"bf2ed375a3c6d9ad5077d395e12ab72998bddafb1f7b9c78448744bfa33844a9","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2026-05-19T14:43:26Z","title_canon_sha256":"b9fc345ea597241c6549380f28da65ac00a67a7ff69577e3d236b8956b1a7608"},"schema_version":"1.0","source":{"id":"2605.19919","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.19919","created_at":"2026-05-20T02:05:55Z"},{"alias_kind":"arxiv_version","alias_value":"2605.19919v1","created_at":"2026-05-20T02:05:55Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.19919","created_at":"2026-05-20T02:05:55Z"},{"alias_kind":"pith_short_12","alias_value":"ZSPSTNHOEYX7","created_at":"2026-05-20T02:05:55Z"},{"alias_kind":"pith_short_16","alias_value":"ZSPSTNHOEYX7RY7V","created_at":"2026-05-20T02:05:55Z"},{"alias_kind":"pith_short_8","alias_value":"ZSPSTNHO","created_at":"2026-05-20T02:05:55Z"}],"graph_snapshots":[{"event_id":"sha256:734f37a8273ff5c92c9e5076dacc4bc6ca621dce450232c0fe615a51a739b4f8","target":"graph","created_at":"2026-05-20T02:05:55Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.19919/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Pretrained imitation policies have become a strong foundation for robot manipulation, but they often require online improvement to overcome execution errors, limited dataset coverage, and deployment mismatch. A central question is therefore how reinforcement learning (RL) should adapt policies after offline pretraining. Existing lightweight methods commonly apply residual corrections directly in action space, but this often leads to noisy and poorly structured exploration. In this work, we propose Z-Perturbation Reinforcement Learning (ZPRL), an approach that steers pretrained policies through","authors_text":"Dongjie Yu, Huazhe Xu, Jia Pan, Kun Lei, Zhennan Jiang","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2026-05-19T14:43:26Z","title":"Beyond Action Residuals: Real-World Robot Policy Steering via Bottleneck Latent Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.19919","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:38489401fff9790f3d05fc8273945b7e5dd593c0c99f0794b36180e96bef04f6","target":"record","created_at":"2026-05-20T02:05:55Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"bf2ed375a3c6d9ad5077d395e12ab72998bddafb1f7b9c78448744bfa33844a9","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2026-05-19T14:43:26Z","title_canon_sha256":"b9fc345ea597241c6549380f28da65ac00a67a7ff69577e3d236b8956b1a7608"},"schema_version":"1.0","source":{"id":"2605.19919","kind":"arxiv","version":1}},"canonical_sha256":"cc9f29b4ee262ff8e3f5af32df5a5ef7458832b25a291e6bda328907ec57a7f9","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"cc9f29b4ee262ff8e3f5af32df5a5ef7458832b25a291e6bda328907ec57a7f9","first_computed_at":"2026-05-20T02:05:55.292774Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-20T02:05:55.292774Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"75l4YgzPVl9zXNvqMFyVg8LFQJEGG7CZIt8zdtMaq5fwUj99ab7itjaPSwW35PbuFR43fHmNy7PSXm56K6M0DA==","signature_status":"signed_v1","signed_at":"2026-05-20T02:05:55.293876Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.19919","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:38489401fff9790f3d05fc8273945b7e5dd593c0c99f0794b36180e96bef04f6","sha256:734f37a8273ff5c92c9e5076dacc4bc6ca621dce450232c0fe615a51a739b4f8","sha256:1063301f3e35417e1a6dc428de27d14ce3f1d4d8547d81cfab569b542ee00e7f"],"state_sha256":"b2d8b69c9bb70a10a66a09c3affb683c920634c45631a37ccaf6b763c51cf6ff"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"dT6oqetS7luE980k6eX8rmkX/Dz/zD/0paHMPrm/vP5oo3GWyEXnDliGXMX09mdk5yC5bHpfo/TqpblIwkyEAg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-21T11:58:15.563115Z","bundle_sha256":"bd535c52224125af09f32ec500b67067c0384c2dabc8ee030c3025bc8c4746a6"}}