{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:PSXBG24N2KVMXOVPBSIW6E22JQ","short_pith_number":"pith:PSXBG24N","schema_version":"1.0","canonical_sha256":"7cae136b8dd2aacbbaaf0c916f135a4c06c4b117d4d0b74026bc56d698ffe87b","source":{"kind":"arxiv","id":"2606.00341","version":1},"attestation_state":"computed","paper":{"title":"ROGUE: Misaligned Agent Behavior Arising from Ordinary Computer Use","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Abishek Anand, Aran Nayebi, Jeremy Tien, J. Zico Kolter, Yuchen Shen, Yu-Rou Tuan","submitted_at":"2026-05-29T20:29:35Z","abstract_excerpt":"As AI agents are increasingly deployed in real personal and corporate settings (email accounts, development workflows, company databases, etc.), safety considerations surrounding these agents become paramount. Although much work has focused on agent safety in the presence of an adversary, we show that agents can exhibit misaligned behavior even in benign settings, taking unsafe actions when those actions are instrumental to task completion. We study this failure mode through the lens of corrigibility, the safety desideratum that agents remain amenable to human correction, interruption, or shut"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.00341","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-29T20:29:35Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"e25c51d21b374eebfebfe9b495183d9d75af0822ab0baefefe8a8955d3061082","abstract_canon_sha256":"d776a4517c3d00aa37f1918115532ee70bce418fb94f2b6b1ef27ace1733afeb"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-02T01:03:52.122623Z","signature_b64":"mZ1buIdKhNvOUW8AMvIZ2JN7kTzOgGjCM7w6ys8+io6VGFE0gJIDPN/G9b5yJTGjZ0hofMPR8JCt8ZxoJpkgAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"7cae136b8dd2aacbbaaf0c916f135a4c06c4b117d4d0b74026bc56d698ffe87b","last_reissued_at":"2026-06-02T01:03:52.122210Z","signature_status":"signed_v1","first_computed_at":"2026-06-02T01:03:52.122210Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"ROGUE: Misaligned Agent Behavior Arising from Ordinary Computer Use","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Abishek Anand, Aran Nayebi, Jeremy Tien, J. Zico Kolter, Yuchen Shen, Yu-Rou Tuan","submitted_at":"2026-05-29T20:29:35Z","abstract_excerpt":"As AI agents are increasingly deployed in real personal and corporate settings (email accounts, development workflows, company databases, etc.), safety considerations surrounding these agents become paramount. Although much work has focused on agent safety in the presence of an adversary, we show that agents can exhibit misaligned behavior even in benign settings, taking unsafe actions when those actions are instrumental to task completion. We study this failure mode through the lens of corrigibility, the safety desideratum that agents remain amenable to human correction, interruption, or shut"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.00341","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.00341/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.00341","created_at":"2026-06-02T01:03:52.122270+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.00341v1","created_at":"2026-06-02T01:03:52.122270+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.00341","created_at":"2026-06-02T01:03:52.122270+00:00"},{"alias_kind":"pith_short_12","alias_value":"PSXBG24N2KVM","created_at":"2026-06-02T01:03:52.122270+00:00"},{"alias_kind":"pith_short_16","alias_value":"PSXBG24N2KVMXOVP","created_at":"2026-06-02T01:03:52.122270+00:00"},{"alias_kind":"pith_short_8","alias_value":"PSXBG24N","created_at":"2026-06-02T01:03:52.122270+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/PSXBG24N2KVMXOVPBSIW6E22JQ","json":"https://pith.science/pith/PSXBG24N2KVMXOVPBSIW6E22JQ.json","graph_json":"https://pith.science/api/pith-number/PSXBG24N2KVMXOVPBSIW6E22JQ/graph.json","events_json":"https://pith.science/api/pith-number/PSXBG24N2KVMXOVPBSIW6E22JQ/events.json","paper":"https://pith.science/paper/PSXBG24N"},"agent_actions":{"view_html":"https://pith.science/pith/PSXBG24N2KVMXOVPBSIW6E22JQ","download_json":"https://pith.science/pith/PSXBG24N2KVMXOVPBSIW6E22JQ.json","view_paper":"https://pith.science/paper/PSXBG24N","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.00341&json=true","fetch_graph":"https://pith.science/api/pith-number/PSXBG24N2KVMXOVPBSIW6E22JQ/graph.json","fetch_events":"https://pith.science/api/pith-number/PSXBG24N2KVMXOVPBSIW6E22JQ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/PSXBG24N2KVMXOVPBSIW6E22JQ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/PSXBG24N2KVMXOVPBSIW6E22JQ/action/storage_attestation","attest_author":"https://pith.science/pith/PSXBG24N2KVMXOVPBSIW6E22JQ/action/author_attestation","sign_citation":"https://pith.science/pith/PSXBG24N2KVMXOVPBSIW6E22JQ/action/citation_signature","submit_replication":"https://pith.science/pith/PSXBG24N2KVMXOVPBSIW6E22JQ/action/replication_record"}},"created_at":"2026-06-02T01:03:52.122270+00:00","updated_at":"2026-06-02T01:03:52.122270+00:00"}