{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:KHLRLZVVRXZDAZ3HEGVE2E4EGV","short_pith_number":"pith:KHLRLZVV","schema_version":"1.0","canonical_sha256":"51d715e6b58df230676721aa4d13843578927a8ce61886b92ec20d25e77ba7d1","source":{"kind":"arxiv","id":"2504.21072","version":2},"attestation_state":"computed","paper":{"title":"Erased but Not Forgotten: How Backdoors Compromise Concept Erasure","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.LG"],"primary_cat":"cs.CR","authors_text":"Anna Rohrbach, Jonas Henry Grebe, Marcus Rohrbach, Tobias Braun","submitted_at":"2025-04-29T16:13:06Z","abstract_excerpt":"The expansion of text-to-image diffusion models has raised concerns about harmful outputs, from fabricated depictions of public figures to sexually explicit imagery. To mitigate such risks, prior work has proposed concept erasure methods that aim to sever unwanted concepts from the model via fine-tuning, yet it remains unclear whether these approaches truly remove all links to the harmful concept or merely conceal superficial connections. In this work, we reveal a critical vulnerability, the Erasure Evasion Backdoor (EEB): an adversary binds a backdoor trigger to a concept slated for removal, "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2504.21072","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CR","submitted_at":"2025-04-29T16:13:06Z","cross_cats_sorted":["cs.AI","cs.LG"],"title_canon_sha256":"33af044d640b7523dc4480146f3ea5e347149c549545a390fa40d0668a4ec2bd","abstract_canon_sha256":"608206e38717fae043d56ec1776cf3b2275fef6f82a37cf9c37799ec91e6ba9c"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-02T01:03:31.415956Z","signature_b64":"e2yLnjZs4DJPWR/eHwlx6rpO3T1k47cH51GUxDHDODxhGH0SVRBe0C+8xAMNLv7rx/b5IhCLuDiQ0u6YjLr6BQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"51d715e6b58df230676721aa4d13843578927a8ce61886b92ec20d25e77ba7d1","last_reissued_at":"2026-06-02T01:03:31.415475Z","signature_status":"signed_v1","first_computed_at":"2026-06-02T01:03:31.415475Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Erased but Not Forgotten: How Backdoors Compromise Concept Erasure","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.LG"],"primary_cat":"cs.CR","authors_text":"Anna Rohrbach, Jonas Henry Grebe, Marcus Rohrbach, Tobias Braun","submitted_at":"2025-04-29T16:13:06Z","abstract_excerpt":"The expansion of text-to-image diffusion models has raised concerns about harmful outputs, from fabricated depictions of public figures to sexually explicit imagery. To mitigate such risks, prior work has proposed concept erasure methods that aim to sever unwanted concepts from the model via fine-tuning, yet it remains unclear whether these approaches truly remove all links to the harmful concept or merely conceal superficial connections. In this work, we reveal a critical vulnerability, the Erasure Evasion Backdoor (EEB): an adversary binds a backdoor trigger to a concept slated for removal, "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2504.21072","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2504.21072/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2504.21072","created_at":"2026-06-02T01:03:31.415533+00:00"},{"alias_kind":"arxiv_version","alias_value":"2504.21072v2","created_at":"2026-06-02T01:03:31.415533+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2504.21072","created_at":"2026-06-02T01:03:31.415533+00:00"},{"alias_kind":"pith_short_12","alias_value":"KHLRLZVVRXZD","created_at":"2026-06-02T01:03:31.415533+00:00"},{"alias_kind":"pith_short_16","alias_value":"KHLRLZVVRXZDAZ3H","created_at":"2026-06-02T01:03:31.415533+00:00"},{"alias_kind":"pith_short_8","alias_value":"KHLRLZVV","created_at":"2026-06-02T01:03:31.415533+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":2,"internal_anchor_count":2,"sample":[{"citing_arxiv_id":"2605.18150","citing_title":"Whispers in the Noise: Surrogate-Guided Concept Awakening via a Multi-Agent Framework","ref_index":5,"is_internal_anchor":true},{"citing_arxiv_id":"2605.19227","citing_title":"Token by Token, Compromised: Backdoor Vulnerabilities in Unified Autoregressive Models","ref_index":20,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/KHLRLZVVRXZDAZ3HEGVE2E4EGV","json":"https://pith.science/pith/KHLRLZVVRXZDAZ3HEGVE2E4EGV.json","graph_json":"https://pith.science/api/pith-number/KHLRLZVVRXZDAZ3HEGVE2E4EGV/graph.json","events_json":"https://pith.science/api/pith-number/KHLRLZVVRXZDAZ3HEGVE2E4EGV/events.json","paper":"https://pith.science/paper/KHLRLZVV"},"agent_actions":{"view_html":"https://pith.science/pith/KHLRLZVVRXZDAZ3HEGVE2E4EGV","download_json":"https://pith.science/pith/KHLRLZVVRXZDAZ3HEGVE2E4EGV.json","view_paper":"https://pith.science/paper/KHLRLZVV","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2504.21072&json=true","fetch_graph":"https://pith.science/api/pith-number/KHLRLZVVRXZDAZ3HEGVE2E4EGV/graph.json","fetch_events":"https://pith.science/api/pith-number/KHLRLZVVRXZDAZ3HEGVE2E4EGV/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/KHLRLZVVRXZDAZ3HEGVE2E4EGV/action/timestamp_anchor","attest_storage":"https://pith.science/pith/KHLRLZVVRXZDAZ3HEGVE2E4EGV/action/storage_attestation","attest_author":"https://pith.science/pith/KHLRLZVVRXZDAZ3HEGVE2E4EGV/action/author_attestation","sign_citation":"https://pith.science/pith/KHLRLZVVRXZDAZ3HEGVE2E4EGV/action/citation_signature","submit_replication":"https://pith.science/pith/KHLRLZVVRXZDAZ3HEGVE2E4EGV/action/replication_record"}},"created_at":"2026-06-02T01:03:31.415533+00:00","updated_at":"2026-06-02T01:03:31.415533+00:00"}