{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:QKAZES5KHESTCQ2FOJZSBNP4J2","short_pith_number":"pith:QKAZES5K","schema_version":"1.0","canonical_sha256":"8281924baa3925314345727320b5fc4ea32bd1cc46e57ba3871ceafb30f2636c","source":{"kind":"arxiv","id":"2606.08892","version":1},"attestation_state":"computed","paper":{"title":"Diffuse AI Control on Fuzzy Tasks","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Caglar Gulcehre, Joe Benton, Mikhail Terekhov, Vivek Hebbar","submitted_at":"2026-06-08T00:35:48Z","abstract_excerpt":"AI models deployed in critical domains, such as AI safety research, may subtly sabotage our efforts due to misalignment. Diffuse AI Control is a subfield of AI safety concerned with mitigating risks from AI sabotage distributed over long deployment horizons (diffuse threats). These risks are particularly pernicious on fuzzy tasks, i.e. tasks which are hard to grade or require intuition. To understand diffuse threats on fuzzy tasks, we introduce a novel framework that considers AI control as an adversarial game between a blue team and a red team. The blue team uses a weak trusted model to const"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.08892","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-08T00:35:48Z","cross_cats_sorted":[],"title_canon_sha256":"05ff07e31af4fdf920cdce130769cd8a2170aca22f5a5a7fde3753ef4eb73d09","abstract_canon_sha256":"f4e9bbe364120333da50cce48c4f3538f11eba0c2d684587fa9b01c20ecce100"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-09T02:07:45.609157Z","signature_b64":"a6dO9Czku7P1Nnb1m2TssWZXNCByFgN62ltQZSmZmNhtv8I+p1E2InH/a52TA1VV4q+Gop7L72bwddUthLwQBw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"8281924baa3925314345727320b5fc4ea32bd1cc46e57ba3871ceafb30f2636c","last_reissued_at":"2026-06-09T02:07:45.603811Z","signature_status":"signed_v1","first_computed_at":"2026-06-09T02:07:45.603811Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Diffuse AI Control on Fuzzy Tasks","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Caglar Gulcehre, Joe Benton, Mikhail Terekhov, Vivek Hebbar","submitted_at":"2026-06-08T00:35:48Z","abstract_excerpt":"AI models deployed in critical domains, such as AI safety research, may subtly sabotage our efforts due to misalignment. Diffuse AI Control is a subfield of AI safety concerned with mitigating risks from AI sabotage distributed over long deployment horizons (diffuse threats). These risks are particularly pernicious on fuzzy tasks, i.e. tasks which are hard to grade or require intuition. To understand diffuse threats on fuzzy tasks, we introduce a novel framework that considers AI control as an adversarial game between a blue team and a red team. The blue team uses a weak trusted model to const"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.08892","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.08892/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.08892","created_at":"2026-06-09T02:07:45.607349+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.08892v1","created_at":"2026-06-09T02:07:45.607349+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.08892","created_at":"2026-06-09T02:07:45.607349+00:00"},{"alias_kind":"pith_short_12","alias_value":"QKAZES5KHEST","created_at":"2026-06-09T02:07:45.607349+00:00"},{"alias_kind":"pith_short_16","alias_value":"QKAZES5KHESTCQ2F","created_at":"2026-06-09T02:07:45.607349+00:00"},{"alias_kind":"pith_short_8","alias_value":"QKAZES5K","created_at":"2026-06-09T02:07:45.607349+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/QKAZES5KHESTCQ2FOJZSBNP4J2","json":"https://pith.science/pith/QKAZES5KHESTCQ2FOJZSBNP4J2.json","graph_json":"https://pith.science/api/pith-number/QKAZES5KHESTCQ2FOJZSBNP4J2/graph.json","events_json":"https://pith.science/api/pith-number/QKAZES5KHESTCQ2FOJZSBNP4J2/events.json","paper":"https://pith.science/paper/QKAZES5K"},"agent_actions":{"view_html":"https://pith.science/pith/QKAZES5KHESTCQ2FOJZSBNP4J2","download_json":"https://pith.science/pith/QKAZES5KHESTCQ2FOJZSBNP4J2.json","view_paper":"https://pith.science/paper/QKAZES5K","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.08892&json=true","fetch_graph":"https://pith.science/api/pith-number/QKAZES5KHESTCQ2FOJZSBNP4J2/graph.json","fetch_events":"https://pith.science/api/pith-number/QKAZES5KHESTCQ2FOJZSBNP4J2/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/QKAZES5KHESTCQ2FOJZSBNP4J2/action/timestamp_anchor","attest_storage":"https://pith.science/pith/QKAZES5KHESTCQ2FOJZSBNP4J2/action/storage_attestation","attest_author":"https://pith.science/pith/QKAZES5KHESTCQ2FOJZSBNP4J2/action/author_attestation","sign_citation":"https://pith.science/pith/QKAZES5KHESTCQ2FOJZSBNP4J2/action/citation_signature","submit_replication":"https://pith.science/pith/QKAZES5KHESTCQ2FOJZSBNP4J2/action/replication_record"}},"created_at":"2026-06-09T02:07:45.607349+00:00","updated_at":"2026-06-09T02:07:45.607349+00:00"}