{"work":{"id":"a7c5b6ec-3407-4330-96c8-3fc58e7d410b","openalex_id":null,"doi":null,"arxiv_id":"2211.09527","raw_key":null,"title":"Ignore Previous Prompt: Attack Techniques For Language Models","authors":null,"authors_text":"F\\'abio Perez, Ian Ribeiro","year":2022,"venue":"cs.CL","abstract":"Transformer-based large language models (LLMs) provide a powerful foundation for natural language tasks in large-scale customer-facing applications. However, studies that explore their vulnerabilities emerging from malicious user interaction are scarce. By proposing PromptInject, a prosaic alignment framework for mask-based iterative adversarial prompt composition, we examine how GPT-3, the most widely deployed language model in production, can be easily misaligned by simple handcrafted inputs. In particular, we investigate two types of attacks -- goal hijacking and prompt leaking -- and demonstrate that even low-aptitude, but sufficiently ill-intentioned agents, can easily exploit GPT-3's stochastic nature, creating long-tail risks. The code for PromptInject is available at https://github.com/agencyenterprise/PromptInject.","external_url":"https://arxiv.org/abs/2211.09527","cited_by_count":null,"metadata_source":"pith","metadata_fetched_at":"2026-06-29T11:23:21.350940+00:00","pith_arxiv_id":"2211.09527","created_at":"2026-05-08T23:19:29.186449+00:00","updated_at":"2026-06-29T11:23:21.350940+00:00","title_quality_ok":true,"display_title":"Ignore Previous Prompt: Attack Techniques For Language Models","render_title":"Ignore Previous Prompt: Attack Techniques For Language Models"},"hub":{"state":{"work_id":"a7c5b6ec-3407-4330-96c8-3fc58e7d410b","tier":"hub","tier_reason":"10+ Pith inbound or 1,000+ external citations","pith_inbound_count":85,"external_cited_by_count":null,"distinct_field_count":8,"first_pith_cited_at":"2023-03-14T17:47:09+00:00","last_pith_cited_at":"2026-06-26T01:12:02+00:00","author_build_status":"not_needed","summary_status":"needed","contexts_status":"needed","graph_status":"needed","ask_index_status":"not_needed","reader_status":"not_needed","recognition_status":"not_needed","updated_at":"2026-06-29T12:38:46.462114+00:00","tier_text":"hub"},"tier":"hub","role_counts":[{"context_role":"background","n":23},{"context_role":"method","n":2},{"context_role":"baseline","n":1}],"polarity_counts":[{"context_polarity":"background","n":21},{"context_polarity":"use_method","n":2},{"context_polarity":"baseline","n":1},{"context_polarity":"support","n":1},{"context_polarity":"unclear","n":1}],"runs":{"context_extract":{"job_type":"context_extract","status":"succeeded","result":{"enqueued_papers":25},"error":null,"updated_at":"2026-05-14T14:21:50.554176+00:00"},"graph_features":{"job_type":"graph_features","status":"succeeded","result":{"co_cited":[{"title":"Universal and Transferable Adversarial Attacks on Aligned Language Models","work_id":"3322fa86-1768-4677-8425-dd326b45e078","shared_citers":19},{"title":"Prompt Injection attack against LLM-integrated Applications","work_id":"977b4683-bba6-49d6-8f3d-496c41cb7fac","shared_citers":13},{"title":"Not what you've signed up for: Compromising Real-World LLM-Integrated Applications with Indirect Prompt Injection","work_id":"7a8cfce1-ada7-4a7a-8516-6f16b1bd077b","shared_citers":11},{"title":"Defending Against Indirect Prompt Injection Attacks With Spotlighting","work_id":"c18cd975-e731-4e0f-a99f-a37d846cdd31","shared_citers":7},{"title":"The Instruction Hierarchy: Training LLMs to Prioritize Privileged Instructions","work_id":"ba941a96-eb3b-48c0-b52c-5e9463085190","shared_citers":7},{"title":"The Llama 3 Herd of Models","work_id":"1549a635-88af-4ac1-acfe-51ae7bb53345","shared_citers":7},{"title":"Benchmarking and Defending Against Indirect Prompt Injection Attacks on Large Language Models","work_id":"0a458c42-fb17-4655-82ad-c93057550c76","shared_citers":6},{"title":"Defeating Prompt Injections by Design","work_id":"86405b86-1c51-4042-9b04-aff0b6541411","shared_citers":6},{"title":"Llama Guard: LLM-based Input-Output Safeguard for Human-AI Conversations","work_id":"93844332-869b-448c-a1be-35466150b1b2","shared_citers":6},{"title":"ReAct: Synergizing Reasoning and Acting in Language Models","work_id":"407a2351-25f1-497d-b611-f77d0292a8e6","shared_citers":6},{"title":"AutoDAN: Generating Stealthy Jailbreak Prompts on Aligned Large Language Models","work_id":"3b676de6-edef-4976-a8b5-082d4ff50867","shared_citers":5},{"title":"GPT-4o System Card","work_id":"f37bf1c7-4964-4e56-9762-d20da8d9009f","shared_citers":5},{"title":"Qwen3 Technical Report","work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e","shared_citers":5},{"title":"Tensor Trust: Interpretable Prompt Injection Attacks from an Online Game","work_id":"2c485010-a490-4283-8a00-55d8996962a4","shared_citers":5},{"title":"ToolLLM: Facilitating Large Language Models to Master 16000+ Real-world APIs","work_id":"3c555b48-a4d9-42dd-9fdd-0f6018fbe9cb","shared_citers":5},{"title":"Training a Helpful and Harmless Assistant with Reinforcement Learning from Human Feedback","work_id":"a1f2574b-a899-4713-be60-c87ba332656c","shared_citers":5},{"title":"AgentDojo: A Dynamic Environment to Evaluate Prompt Injection Attacks and Defenses for LLM Agents","work_id":"7b1b672f-e6b4-4df9-aa8b-3396a2eb8b16","shared_citers":4},{"title":"Baseline Defenses for Adversarial Attacks Against Aligned Language Models","work_id":"db5870ca-177b-4d1d-a08d-ee5ceab17fe3","shared_citers":4},{"title":"GPT-4 Technical Report","work_id":"b928e041-6991-4c08-8c81-0359e4097c7b","shared_citers":4},{"title":"InjecAgent: Benchmarking Indirect Prompt Injections in Tool-Integrated Large Language Model Agents","work_id":"5cbfcda4-ec26-44e4-be60-e1525956d71d","shared_citers":4},{"title":"Jailbreak attacks and defenses against large language models: A survey","work_id":"0ee7fc45-ae61-432b-83ac-f1d93ccd88fb","shared_citers":4},{"title":"ACE: A Security Architecture for LLM-Integrated App Systems","work_id":"efb277a2-ba20-4b62-8693-0df65a92aebe","shared_citers":3},{"title":"AgentBench: Evaluating LLMs as Agents","work_id":"a37549b4-4c94-412d-acc4-4efeb08509be","shared_citers":3},{"title":"arXiv preprint arXiv:2311.17035 , year=","work_id":"7ee4de98-0bdd-47ab-abe6-1865cb65b1ae","shared_citers":3}],"time_series":[{"n":2,"year":2023},{"n":2,"year":2024},{"n":43,"year":2026}],"dependency_candidates":[]},"error":null,"updated_at":"2026-05-14T14:21:53.952792+00:00"},"identity_refresh":{"job_type":"identity_refresh","status":"succeeded","result":{"items":[{"title":"Qwen3 Technical Report","outcome":"unchanged","work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e","resolver":"local_arxiv","confidence":0.98,"old_work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e"}],"counts":{"fixed":0,"merged":0,"unchanged":1,"quarantined":0,"needs_external_resolution":0},"errors":[],"attempted":1},"error":null,"updated_at":"2026-05-14T14:21:44.175671+00:00"},"summary_claims":{"job_type":"summary_claims","status":"succeeded","result":{"title":"Ignore Previous Prompt: Attack Techniques For Language Models","claims":[{"claim_text":"Transformer-based large language models (LLMs) provide a powerful foundation for natural language tasks in large-scale customer-facing applications. However, studies that explore their vulnerabilities emerging from malicious user interaction are scarce. By proposing PromptInject, a prosaic alignment framework for mask-based iterative adversarial prompt composition, we examine how GPT-3, the most widely deployed language model in production, can be easily misaligned by simple handcrafted inputs. In particular, we investigate two types of attacks -- goal hijacking and prompt leaking -- and demon","claim_type":"abstract","evidence_strength":"source_metadata"}],"why_cited":"Pith tracks Ignore Previous Prompt: Attack Techniques For Language Models because it crossed a citation-hub threshold.","role_counts":[]},"error":null,"updated_at":"2026-05-14T14:21:53.957416+00:00"}},"summary":{"title":"Ignore Previous Prompt: Attack Techniques For Language Models","claims":[{"claim_text":"Transformer-based large language models (LLMs) provide a powerful foundation for natural language tasks in large-scale customer-facing applications. However, studies that explore their vulnerabilities emerging from malicious user interaction are scarce. By proposing PromptInject, a prosaic alignment framework for mask-based iterative adversarial prompt composition, we examine how GPT-3, the most widely deployed language model in production, can be easily misaligned by simple handcrafted inputs. In particular, we investigate two types of attacks -- goal hijacking and prompt leaking -- and demon","claim_type":"abstract","evidence_strength":"source_metadata"}],"why_cited":"Pith tracks Ignore Previous Prompt: Attack Techniques For Language Models because it crossed a citation-hub threshold.","role_counts":[]},"graph":{"co_cited":[{"title":"Universal and Transferable Adversarial Attacks on Aligned Language Models","work_id":"3322fa86-1768-4677-8425-dd326b45e078","shared_citers":19},{"title":"Prompt Injection attack against LLM-integrated Applications","work_id":"977b4683-bba6-49d6-8f3d-496c41cb7fac","shared_citers":13},{"title":"Not what you've signed up for: Compromising Real-World LLM-Integrated Applications with Indirect Prompt Injection","work_id":"7a8cfce1-ada7-4a7a-8516-6f16b1bd077b","shared_citers":11},{"title":"Defending Against Indirect Prompt Injection Attacks With Spotlighting","work_id":"c18cd975-e731-4e0f-a99f-a37d846cdd31","shared_citers":7},{"title":"The Instruction Hierarchy: Training LLMs to Prioritize Privileged Instructions","work_id":"ba941a96-eb3b-48c0-b52c-5e9463085190","shared_citers":7},{"title":"The Llama 3 Herd of Models","work_id":"1549a635-88af-4ac1-acfe-51ae7bb53345","shared_citers":7},{"title":"Benchmarking and Defending Against Indirect Prompt Injection Attacks on Large Language Models","work_id":"0a458c42-fb17-4655-82ad-c93057550c76","shared_citers":6},{"title":"Defeating Prompt Injections by Design","work_id":"86405b86-1c51-4042-9b04-aff0b6541411","shared_citers":6},{"title":"Llama Guard: LLM-based Input-Output Safeguard for Human-AI Conversations","work_id":"93844332-869b-448c-a1be-35466150b1b2","shared_citers":6},{"title":"ReAct: Synergizing Reasoning and Acting in Language Models","work_id":"407a2351-25f1-497d-b611-f77d0292a8e6","shared_citers":6},{"title":"AutoDAN: Generating Stealthy Jailbreak Prompts on Aligned Large Language Models","work_id":"3b676de6-edef-4976-a8b5-082d4ff50867","shared_citers":5},{"title":"GPT-4o System Card","work_id":"f37bf1c7-4964-4e56-9762-d20da8d9009f","shared_citers":5},{"title":"Qwen3 Technical Report","work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e","shared_citers":5},{"title":"Tensor Trust: Interpretable Prompt Injection Attacks from an Online Game","work_id":"2c485010-a490-4283-8a00-55d8996962a4","shared_citers":5},{"title":"ToolLLM: Facilitating Large Language Models to Master 16000+ Real-world APIs","work_id":"3c555b48-a4d9-42dd-9fdd-0f6018fbe9cb","shared_citers":5},{"title":"Training a Helpful and Harmless Assistant with Reinforcement Learning from Human Feedback","work_id":"a1f2574b-a899-4713-be60-c87ba332656c","shared_citers":5},{"title":"AgentDojo: A Dynamic Environment to Evaluate Prompt Injection Attacks and Defenses for LLM Agents","work_id":"7b1b672f-e6b4-4df9-aa8b-3396a2eb8b16","shared_citers":4},{"title":"Baseline Defenses for Adversarial Attacks Against Aligned Language Models","work_id":"db5870ca-177b-4d1d-a08d-ee5ceab17fe3","shared_citers":4},{"title":"GPT-4 Technical Report","work_id":"b928e041-6991-4c08-8c81-0359e4097c7b","shared_citers":4},{"title":"InjecAgent: Benchmarking Indirect Prompt Injections in Tool-Integrated Large Language Model Agents","work_id":"5cbfcda4-ec26-44e4-be60-e1525956d71d","shared_citers":4},{"title":"Jailbreak attacks and defenses against large language models: A survey","work_id":"0ee7fc45-ae61-432b-83ac-f1d93ccd88fb","shared_citers":4},{"title":"ACE: A Security Architecture for LLM-Integrated App Systems","work_id":"efb277a2-ba20-4b62-8693-0df65a92aebe","shared_citers":3},{"title":"AgentBench: Evaluating LLMs as Agents","work_id":"a37549b4-4c94-412d-acc4-4efeb08509be","shared_citers":3},{"title":"arXiv preprint arXiv:2311.17035 , year=","work_id":"7ee4de98-0bdd-47ab-abe6-1865cb65b1ae","shared_citers":3}],"time_series":[{"n":2,"year":2023},{"n":2,"year":2024},{"n":43,"year":2026}],"dependency_candidates":[]},"authors":[]}}