{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2022:U7MKIHTAG55AVTMND75WNOZIPN","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"b1dc77bf137ee54d0a3622710c2b747760b2b27507ed3759239ee53333661129","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2022-10-19T17:57:24Z","title_canon_sha256":"828933039c4c317aa5ec78b16c8224f4a9217cef79568077711222fe140f836d"},"schema_version":"1.0","source":{"id":"2210.10765","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2210.10765","created_at":"2026-07-05T05:08:23Z"},{"alias_kind":"arxiv_version","alias_value":"2210.10765v1","created_at":"2026-07-05T05:08:23Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2210.10765","created_at":"2026-07-05T05:08:23Z"},{"alias_kind":"pith_short_12","alias_value":"U7MKIHTAG55A","created_at":"2026-07-05T05:08:23Z"},{"alias_kind":"pith_short_16","alias_value":"U7MKIHTAG55AVTMN","created_at":"2026-07-05T05:08:23Z"},{"alias_kind":"pith_short_8","alias_value":"U7MKIHTA","created_at":"2026-07-05T05:08:23Z"}],"graph_snapshots":[{"event_id":"sha256:6bd963133e621e0b5b01b01c12200ff51ad8566f1d2eb7151bf480dcd080ad75","target":"graph","created_at":"2026-07-05T05:08:23Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2210.10765/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"A long-term goal of reinforcement learning is to design agents that can autonomously interact and learn in the world. A critical challenge to such autonomy is the presence of irreversible states which require external assistance to recover from, such as when a robot arm has pushed an object off of a table. While standard agents require constant monitoring to decide when to intervene, we aim to design proactive agents that can request human intervention only when needed. To this end, we propose an algorithm that efficiently learns to detect and avoid states that are irreversible, and proactivel","authors_text":"Annie Xie, Archit Sharma, Chelsea Finn, Fahim Tajwar","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2022-10-19T17:57:24Z","title":"When to Ask for Help: Proactive Interventions in Autonomous Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2210.10765","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:bafe6ae6286db5e698842b6708d9a31910dffba2f610c4a2834a5afca79d7830","target":"record","created_at":"2026-07-05T05:08:23Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"b1dc77bf137ee54d0a3622710c2b747760b2b27507ed3759239ee53333661129","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2022-10-19T17:57:24Z","title_canon_sha256":"828933039c4c317aa5ec78b16c8224f4a9217cef79568077711222fe140f836d"},"schema_version":"1.0","source":{"id":"2210.10765","kind":"arxiv","version":1}},"canonical_sha256":"a7d8a41e60377a0acd8d1ffb66bb287b50e38a0d60e93fbed9d30921e4004ebe","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"a7d8a41e60377a0acd8d1ffb66bb287b50e38a0d60e93fbed9d30921e4004ebe","first_computed_at":"2026-07-05T05:08:23.548993Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-07-05T05:08:23.548993Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"faYG4/M5WsWN7GETAwTNijG2QCv6Sxroc7QPRvMky2f8eWf3OYnaSO/8eNs9u6nrVzSS4HqxJVuTyoiehlCnBg==","signature_status":"signed_v1","signed_at":"2026-07-05T05:08:23.549503Z","signed_message":"canonical_sha256_bytes"},"source_id":"2210.10765","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:bafe6ae6286db5e698842b6708d9a31910dffba2f610c4a2834a5afca79d7830","sha256:6bd963133e621e0b5b01b01c12200ff51ad8566f1d2eb7151bf480dcd080ad75"],"state_sha256":"87770635b68328e8d3d52528644b17ea6d49965bbcb02d8f4d463f44fe815e44"}