{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:3QSICUOKE2LEEGFJFFMZYIFRLC","short_pith_number":"pith:3QSICUOK","schema_version":"1.0","canonical_sha256":"dc248151ca26964218a929599c20b158a4af01a3cc4cebfc5d980b742aa4abd7","source":{"kind":"arxiv","id":"2605.25194","version":1},"attestation_state":"computed","paper":{"title":"Localization then Neutralization: Gradient-guided Token Suppression against Visual Prompt Injection Attack","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Dongpeng Zhang, Gaozheng Pei, Ke Ma, Longtao Huang, Qianqian Xu, Qingming Huang, Yangbangyan Jiang","submitted_at":"2026-05-24T17:51:34Z","abstract_excerpt":"Adversarial images pose a severe security threat to multimodal large language models through prompt injection. Existing defenses largely lack a principled understanding of the underlying mechanisms and struggle to balance efficiency and defense utility. In this work, we show that successful adversarial attacks do not rely on the entire image uniformly but instead depend on a small subset of critical image tokens. Based on this insight, we propose Gradient Token Masking (GTM), which localizes these tokens via gradient analysis and neutralizes them through masking. We find that attribution based"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.25194","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-24T17:51:34Z","cross_cats_sorted":[],"title_canon_sha256":"2ed52de81a668684e212e7a913065815c7160dbea57217be80438bd9ffa5686c","abstract_canon_sha256":"2b3db5e31cff2650cd498eac7ff03353a87f579cd1a6046ce5b8bc447b9fa9ad"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-26T02:04:22.542079Z","signature_b64":"Qp4einvDe5/VjEmYZmvQOF6haasAkzq/k5u9FslfdvcBFkTRNCpXD8JMFlLdT0HiN/wZJT0QB9bR2usMU4d0AA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"dc248151ca26964218a929599c20b158a4af01a3cc4cebfc5d980b742aa4abd7","last_reissued_at":"2026-05-26T02:04:22.541223Z","signature_status":"signed_v1","first_computed_at":"2026-05-26T02:04:22.541223Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Localization then Neutralization: Gradient-guided Token Suppression against Visual Prompt Injection Attack","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Dongpeng Zhang, Gaozheng Pei, Ke Ma, Longtao Huang, Qianqian Xu, Qingming Huang, Yangbangyan Jiang","submitted_at":"2026-05-24T17:51:34Z","abstract_excerpt":"Adversarial images pose a severe security threat to multimodal large language models through prompt injection. Existing defenses largely lack a principled understanding of the underlying mechanisms and struggle to balance efficiency and defense utility. In this work, we show that successful adversarial attacks do not rely on the entire image uniformly but instead depend on a small subset of critical image tokens. Based on this insight, we propose Gradient Token Masking (GTM), which localizes these tokens via gradient analysis and neutralizes them through masking. We find that attribution based"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.25194","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.25194/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.25194","created_at":"2026-05-26T02:04:22.541378+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.25194v1","created_at":"2026-05-26T02:04:22.541378+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.25194","created_at":"2026-05-26T02:04:22.541378+00:00"},{"alias_kind":"pith_short_12","alias_value":"3QSICUOKE2LE","created_at":"2026-05-26T02:04:22.541378+00:00"},{"alias_kind":"pith_short_16","alias_value":"3QSICUOKE2LEEGFJ","created_at":"2026-05-26T02:04:22.541378+00:00"},{"alias_kind":"pith_short_8","alias_value":"3QSICUOK","created_at":"2026-05-26T02:04:22.541378+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/3QSICUOKE2LEEGFJFFMZYIFRLC","json":"https://pith.science/pith/3QSICUOKE2LEEGFJFFMZYIFRLC.json","graph_json":"https://pith.science/api/pith-number/3QSICUOKE2LEEGFJFFMZYIFRLC/graph.json","events_json":"https://pith.science/api/pith-number/3QSICUOKE2LEEGFJFFMZYIFRLC/events.json","paper":"https://pith.science/paper/3QSICUOK"},"agent_actions":{"view_html":"https://pith.science/pith/3QSICUOKE2LEEGFJFFMZYIFRLC","download_json":"https://pith.science/pith/3QSICUOKE2LEEGFJFFMZYIFRLC.json","view_paper":"https://pith.science/paper/3QSICUOK","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.25194&json=true","fetch_graph":"https://pith.science/api/pith-number/3QSICUOKE2LEEGFJFFMZYIFRLC/graph.json","fetch_events":"https://pith.science/api/pith-number/3QSICUOKE2LEEGFJFFMZYIFRLC/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/3QSICUOKE2LEEGFJFFMZYIFRLC/action/timestamp_anchor","attest_storage":"https://pith.science/pith/3QSICUOKE2LEEGFJFFMZYIFRLC/action/storage_attestation","attest_author":"https://pith.science/pith/3QSICUOKE2LEEGFJFFMZYIFRLC/action/author_attestation","sign_citation":"https://pith.science/pith/3QSICUOKE2LEEGFJFFMZYIFRLC/action/citation_signature","submit_replication":"https://pith.science/pith/3QSICUOKE2LEEGFJFFMZYIFRLC/action/replication_record"}},"created_at":"2026-05-26T02:04:22.541378+00:00","updated_at":"2026-05-26T02:04:22.541378+00:00"}