{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:B5ISACWY42VG7WEWE5VDQ7WC2W","short_pith_number":"pith:B5ISACWY","schema_version":"1.0","canonical_sha256":"0f51200ad8e6aa6fd896276a387ec2d5aa90d773642d679bcdfd2665131da133","source":{"kind":"arxiv","id":"2606.31958","version":1},"attestation_state":"computed","paper":{"title":"Adapting Generalist Robot Policies with Semantic Reinforcement Learning","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.RO","authors_text":"Andrew Wagenmaker, Jagdeep Singh Bhatia, Sergey Levine, William Chen","submitted_at":"2026-06-30T17:00:33Z","abstract_excerpt":"Generalist robot policies learn a diverse repertoire of behaviors from large-scale pretraining. In principle, this makes them excellent priors for downstream adaptation via reinforcement learning (RL). In practice, however, standard RL methods leveraging this prior optimize directly over robot actions, requiring the base policy's action distribution to be close to that of a performant policy from the start. This assumption breaks down for complex or long-horizon tasks that fall outside the pretraining distribution. Our key insight is that, for sufficiently expressive generalist policies, langu"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.31958","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.RO","submitted_at":"2026-06-30T17:00:33Z","cross_cats_sorted":[],"title_canon_sha256":"adbd714556209b3400824463c944272ff5e9855f7cb9967b0f7ad29a9a5cd66c","abstract_canon_sha256":"68c8f04f25bf31ee4eb860e0b0d4201639a9134ffae3f708f3660fa11d3a1eb4"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-01T01:18:27.005810Z","signature_b64":"5bdPAXynYOnH/qSvZkkgaE4h/hLVtAPPh/4kclCqBssbvoORyjDj1iOqPhh7cIfOvUOfn6ZPr2OXMY6pAfzYDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"0f51200ad8e6aa6fd896276a387ec2d5aa90d773642d679bcdfd2665131da133","last_reissued_at":"2026-07-01T01:18:27.005357Z","signature_status":"signed_v1","first_computed_at":"2026-07-01T01:18:27.005357Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Adapting Generalist Robot Policies with Semantic Reinforcement Learning","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.RO","authors_text":"Andrew Wagenmaker, Jagdeep Singh Bhatia, Sergey Levine, William Chen","submitted_at":"2026-06-30T17:00:33Z","abstract_excerpt":"Generalist robot policies learn a diverse repertoire of behaviors from large-scale pretraining. In principle, this makes them excellent priors for downstream adaptation via reinforcement learning (RL). In practice, however, standard RL methods leveraging this prior optimize directly over robot actions, requiring the base policy's action distribution to be close to that of a performant policy from the start. This assumption breaks down for complex or long-horizon tasks that fall outside the pretraining distribution. Our key insight is that, for sufficiently expressive generalist policies, langu"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.31958","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.31958/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.31958","created_at":"2026-07-01T01:18:27.005424+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.31958v1","created_at":"2026-07-01T01:18:27.005424+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.31958","created_at":"2026-07-01T01:18:27.005424+00:00"},{"alias_kind":"pith_short_12","alias_value":"B5ISACWY42VG","created_at":"2026-07-01T01:18:27.005424+00:00"},{"alias_kind":"pith_short_16","alias_value":"B5ISACWY42VG7WEW","created_at":"2026-07-01T01:18:27.005424+00:00"},{"alias_kind":"pith_short_8","alias_value":"B5ISACWY","created_at":"2026-07-01T01:18:27.005424+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/B5ISACWY42VG7WEWE5VDQ7WC2W","json":"https://pith.science/pith/B5ISACWY42VG7WEWE5VDQ7WC2W.json","graph_json":"https://pith.science/api/pith-number/B5ISACWY42VG7WEWE5VDQ7WC2W/graph.json","events_json":"https://pith.science/api/pith-number/B5ISACWY42VG7WEWE5VDQ7WC2W/events.json","paper":"https://pith.science/paper/B5ISACWY"},"agent_actions":{"view_html":"https://pith.science/pith/B5ISACWY42VG7WEWE5VDQ7WC2W","download_json":"https://pith.science/pith/B5ISACWY42VG7WEWE5VDQ7WC2W.json","view_paper":"https://pith.science/paper/B5ISACWY","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.31958&json=true","fetch_graph":"https://pith.science/api/pith-number/B5ISACWY42VG7WEWE5VDQ7WC2W/graph.json","fetch_events":"https://pith.science/api/pith-number/B5ISACWY42VG7WEWE5VDQ7WC2W/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/B5ISACWY42VG7WEWE5VDQ7WC2W/action/timestamp_anchor","attest_storage":"https://pith.science/pith/B5ISACWY42VG7WEWE5VDQ7WC2W/action/storage_attestation","attest_author":"https://pith.science/pith/B5ISACWY42VG7WEWE5VDQ7WC2W/action/author_attestation","sign_citation":"https://pith.science/pith/B5ISACWY42VG7WEWE5VDQ7WC2W/action/citation_signature","submit_replication":"https://pith.science/pith/B5ISACWY42VG7WEWE5VDQ7WC2W/action/replication_record"}},"created_at":"2026-07-01T01:18:27.005424+00:00","updated_at":"2026-07-01T01:18:27.005424+00:00"}