{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:JRQQOFBLXTVMYMF6NHOWZ5XGWN","short_pith_number":"pith:JRQQOFBL","schema_version":"1.0","canonical_sha256":"4c6107142bbceacc30be69dd6cf6e6b37a54abec4d1e97116f246b1b52f62766","source":{"kind":"arxiv","id":"2605.19159","version":1},"attestation_state":"computed","paper":{"title":"On the Geometric Limits of Transformer Defenses against Obfuscation Attacks: Latent Embedding Collapse & Performance Robustness Gap","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CR","authors_text":"Becky Mashaido, Tapadhir Das","submitted_at":"2026-05-18T22:25:47Z","abstract_excerpt":"Prompt injection attacks pose significant risks to language model safety, yet existing defenses are typically evaluated using classification performance. We show that high detection performance does not imply representational robustness. Specifically, multi-operator obfuscated prompts (combining homoglyphs, zero-width characters, and punctuation or emoji noise) can partially collapse onto the embedding manifold of clean prompts, a phenomenon we term latent embedding collapse. Results indicate that across multiple BERT family encoders with varying depth and capacity, detectors achieve near-perf"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.19159","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CR","submitted_at":"2026-05-18T22:25:47Z","cross_cats_sorted":[],"title_canon_sha256":"dccf564d3ea036e7825099b95aaf2b7db2d390558fd5af95495cd1a540b2aea5","abstract_canon_sha256":"15d0208511f69814e3c0860023863073a2763d6683aecb020e36fb48d330f8e6"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T01:05:30.882793Z","signature_b64":"FftSmegownESQ/zOQoVmMSn2dusn7jvhgJ/yUfv/aW6FTOdhuX10rIAkzbQtsYvlNi2tDc6DQ0BCn/eKJy5SDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"4c6107142bbceacc30be69dd6cf6e6b37a54abec4d1e97116f246b1b52f62766","last_reissued_at":"2026-05-20T01:05:30.881972Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T01:05:30.881972Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"On the Geometric Limits of Transformer Defenses against Obfuscation Attacks: Latent Embedding Collapse & Performance Robustness Gap","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CR","authors_text":"Becky Mashaido, Tapadhir Das","submitted_at":"2026-05-18T22:25:47Z","abstract_excerpt":"Prompt injection attacks pose significant risks to language model safety, yet existing defenses are typically evaluated using classification performance. We show that high detection performance does not imply representational robustness. Specifically, multi-operator obfuscated prompts (combining homoglyphs, zero-width characters, and punctuation or emoji noise) can partially collapse onto the embedding manifold of clean prompts, a phenomenon we term latent embedding collapse. Results indicate that across multiple BERT family encoders with varying depth and capacity, detectors achieve near-perf"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.19159","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.19159/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.19159","created_at":"2026-05-20T01:05:30.882109+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.19159v1","created_at":"2026-05-20T01:05:30.882109+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.19159","created_at":"2026-05-20T01:05:30.882109+00:00"},{"alias_kind":"pith_short_12","alias_value":"JRQQOFBLXTVM","created_at":"2026-05-20T01:05:30.882109+00:00"},{"alias_kind":"pith_short_16","alias_value":"JRQQOFBLXTVMYMF6","created_at":"2026-05-20T01:05:30.882109+00:00"},{"alias_kind":"pith_short_8","alias_value":"JRQQOFBL","created_at":"2026-05-20T01:05:30.882109+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/JRQQOFBLXTVMYMF6NHOWZ5XGWN","json":"https://pith.science/pith/JRQQOFBLXTVMYMF6NHOWZ5XGWN.json","graph_json":"https://pith.science/api/pith-number/JRQQOFBLXTVMYMF6NHOWZ5XGWN/graph.json","events_json":"https://pith.science/api/pith-number/JRQQOFBLXTVMYMF6NHOWZ5XGWN/events.json","paper":"https://pith.science/paper/JRQQOFBL"},"agent_actions":{"view_html":"https://pith.science/pith/JRQQOFBLXTVMYMF6NHOWZ5XGWN","download_json":"https://pith.science/pith/JRQQOFBLXTVMYMF6NHOWZ5XGWN.json","view_paper":"https://pith.science/paper/JRQQOFBL","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.19159&json=true","fetch_graph":"https://pith.science/api/pith-number/JRQQOFBLXTVMYMF6NHOWZ5XGWN/graph.json","fetch_events":"https://pith.science/api/pith-number/JRQQOFBLXTVMYMF6NHOWZ5XGWN/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/JRQQOFBLXTVMYMF6NHOWZ5XGWN/action/timestamp_anchor","attest_storage":"https://pith.science/pith/JRQQOFBLXTVMYMF6NHOWZ5XGWN/action/storage_attestation","attest_author":"https://pith.science/pith/JRQQOFBLXTVMYMF6NHOWZ5XGWN/action/author_attestation","sign_citation":"https://pith.science/pith/JRQQOFBLXTVMYMF6NHOWZ5XGWN/action/citation_signature","submit_replication":"https://pith.science/pith/JRQQOFBLXTVMYMF6NHOWZ5XGWN/action/replication_record"}},"created_at":"2026-05-20T01:05:30.882109+00:00","updated_at":"2026-05-20T01:05:30.882109+00:00"}