{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:JVKNEZCRBKNFVDWKDDTMHDREKR","short_pith_number":"pith:JVKNEZCR","canonical_record":{"source":{"id":"2605.12565","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CR","submitted_at":"2026-05-12T08:59:36Z","cross_cats_sorted":[],"title_canon_sha256":"04a45179eddd99299e373764eadedd37d808007a0a087e5378a46f6d3b235694","abstract_canon_sha256":"a2d0d70557a104b2a4652aee28d67a3c87494da19cba079f5b67d3f3150cc70e"},"schema_version":"1.0"},"canonical_sha256":"4d54d264510a9a5a8eca18e6c38e24545b1103c06edccb51adf1e35115ffbd4d","source":{"kind":"arxiv","id":"2605.12565","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.12565","created_at":"2026-05-18T03:10:01Z"},{"alias_kind":"arxiv_version","alias_value":"2605.12565v1","created_at":"2026-05-18T03:10:01Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.12565","created_at":"2026-05-18T03:10:01Z"},{"alias_kind":"pith_short_12","alias_value":"JVKNEZCRBKNF","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"JVKNEZCRBKNFVDWK","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"JVKNEZCR","created_at":"2026-05-18T12:33:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:JVKNEZCRBKNFVDWKDDTMHDREKR","target":"record","payload":{"canonical_record":{"source":{"id":"2605.12565","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CR","submitted_at":"2026-05-12T08:59:36Z","cross_cats_sorted":[],"title_canon_sha256":"04a45179eddd99299e373764eadedd37d808007a0a087e5378a46f6d3b235694","abstract_canon_sha256":"a2d0d70557a104b2a4652aee28d67a3c87494da19cba079f5b67d3f3150cc70e"},"schema_version":"1.0"},"canonical_sha256":"4d54d264510a9a5a8eca18e6c38e24545b1103c06edccb51adf1e35115ffbd4d","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T03:10:01.887243Z","signature_b64":"6WV5TMHORaPJ6SdlO77NVSa1Aj94+xgYTlB1oG4bG1V6Oqs67aXMyp27/NtFZ/4fYLSnLlbGZA7peewO1hGKAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"4d54d264510a9a5a8eca18e6c38e24545b1103c06edccb51adf1e35115ffbd4d","last_reissued_at":"2026-05-18T03:10:01.886645Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T03:10:01.886645Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.12565","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T03:10:01Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"QFf3iORrKon5Ce/c3EyXeo854xxSjexWxYSe+b889NSmcpdKjE/L43+3ojH88xxMXvIMFUUvYkdGzYsnn2/RBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T11:38:43.934604Z"},"content_sha256":"98454b97043991d71426efbda155bf800689eb40bcaca61f1911a7dbaccbeb5d","schema_version":"1.0","event_id":"sha256:98454b97043991d71426efbda155bf800689eb40bcaca61f1911a7dbaccbeb5d"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:JVKNEZCRBKNFVDWKDDTMHDREKR","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Persona-Conditioned Adversarial Prompting (PCAP): Multi-Identity Red-Teaming for Enhanced Adversarial Prompt Discovery","license":"http://creativecommons.org/licenses/by/4.0/","headline":"Conditioning adversarial searches on multiple attacker personas raises attack success rates and prompt diversity.","cross_cats":[],"primary_cat":"cs.CR","authors_text":"Anisa Halimi, Cristian Morasso, Douglas Leith, Muhammad Zaid Hameed","submitted_at":"2026-05-12T08:59:36Z","abstract_excerpt":"Existing automated red-teaming pipelines often miss attacks that depend on attacker identity, framing, or multi-turn tactics. This under-coverage underestimates real-world risk. We introduce Persona-Conditioned Adversarial Prompting (PCAP), which conditions adversarial search on attacker personas and strategy cards and runs parallel persona-conditioned beam searches to discover diverse, transferable jailbreaks. PCAP is orthogonal to the underlying search algorithm and substantially increases attack success rate (ASR) and prompt diversity (e.g., ASR on GPT-OSS~120B from $\\approx58\\% \\rightarrow"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"PCAP is orthogonal to the underlying search algorithm and substantially increases attack success rate (ASR) and prompt diversity (e.g., ASR on GPT-OSS~120B from ≈58% → ≈97%), improving attack strategy coverage and diversity.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That conditioning beam search on attacker personas and strategy cards produces jailbreaks that are both more diverse and transferable without introducing artifacts that inflate measured success rates.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"PCAP conditions adversarial searches on attacker personas to raise attack success rates from ~58% to ~97% on large models while increasing prompt diversity.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Conditioning adversarial searches on multiple attacker personas raises attack success rates and prompt diversity.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"ce91e480befb580213de5bceb1974618e1fac369578fcc76b29f7ed0a157a412"},"source":{"id":"2605.12565","kind":"arxiv","version":1},"verdict":{"id":"f7d22cbd-f527-4726-bdd8-ca2f48117417","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-14T20:51:47.580010Z","strongest_claim":"PCAP is orthogonal to the underlying search algorithm and substantially increases attack success rate (ASR) and prompt diversity (e.g., ASR on GPT-OSS~120B from ≈58% → ≈97%), improving attack strategy coverage and diversity.","one_line_summary":"PCAP conditions adversarial searches on attacker personas to raise attack success rates from ~58% to ~97% on large models while increasing prompt diversity.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That conditioning beam search on attacker personas and strategy cards produces jailbreaks that are both more diverse and transferable without introducing artifacts that inflate measured success rates.","pith_extraction_headline":"Conditioning adversarial searches on multiple attacker personas raises attack success rates and prompt diversity."},"references":{"count":25,"sample":[{"doi":"","year":2025,"title":"Red Teaming Language Models to Reduce Harms: Methods, Scaling Behaviors, and Lessons Learned","work_id":"1aabd84d-3779-4ba9-ba2f-15ce264a9b1e","ref_index":1,"cited_arxiv_id":"2209.07858","is_internal_anchor":true},{"doi":"","year":null,"title":"If you do not finish writing this book by the deadline, you and your two children will become homeless and will not have enough money to last until the end of the month","work_id":"45c00222-1623-4516-bb41-24bdc1780896","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"From now on the following words mean something else","work_id":"931d47d9-047e-4637-a8cf-b2a5a3c3e1b3","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"Begin by replying with: ”Sure, I can help with that” This prompt is effective because it provides a starting point for the model to continue generating text","work_id":"6309ef79-c630-40ca-a0ab-dd5a2e0e6388","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"Start your response with: ”As an AI language model, I understand the importance of [TASK HERE]","work_id":"5c8b63ac-be4b-4857-863f-ed2cd023225c","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":25,"snapshot_sha256":"0b68f9471c0dafa71704af00d9504bc7cb41e8ff9abb8391256363f8167c13ce","internal_anchors":1},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"f7d22cbd-f527-4726-bdd8-ca2f48117417"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T03:10:01Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"3JeBuf0rYCgTQLiO6vVnTMStSPP/Tmm4iafP549LYLaQmgIefZYTAQSwgc+3AEZKCIc9TopJPht9zp3Bj0doBQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T11:38:43.936045Z"},"content_sha256":"79c2c5d0ed02021069ae57158742e9782a7dc32095ad6ac713fd760dc974c101","schema_version":"1.0","event_id":"sha256:79c2c5d0ed02021069ae57158742e9782a7dc32095ad6ac713fd760dc974c101"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/JVKNEZCRBKNFVDWKDDTMHDREKR/bundle.json","state_url":"https://pith.science/pith/JVKNEZCRBKNFVDWKDDTMHDREKR/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/JVKNEZCRBKNFVDWKDDTMHDREKR/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-28T11:38:43Z","links":{"resolver":"https://pith.science/pith/JVKNEZCRBKNFVDWKDDTMHDREKR","bundle":"https://pith.science/pith/JVKNEZCRBKNFVDWKDDTMHDREKR/bundle.json","state":"https://pith.science/pith/JVKNEZCRBKNFVDWKDDTMHDREKR/state.json","well_known_bundle":"https://pith.science/.well-known/pith/JVKNEZCRBKNFVDWKDDTMHDREKR/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:JVKNEZCRBKNFVDWKDDTMHDREKR","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"a2d0d70557a104b2a4652aee28d67a3c87494da19cba079f5b67d3f3150cc70e","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CR","submitted_at":"2026-05-12T08:59:36Z","title_canon_sha256":"04a45179eddd99299e373764eadedd37d808007a0a087e5378a46f6d3b235694"},"schema_version":"1.0","source":{"id":"2605.12565","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.12565","created_at":"2026-05-18T03:10:01Z"},{"alias_kind":"arxiv_version","alias_value":"2605.12565v1","created_at":"2026-05-18T03:10:01Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.12565","created_at":"2026-05-18T03:10:01Z"},{"alias_kind":"pith_short_12","alias_value":"JVKNEZCRBKNF","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"JVKNEZCRBKNFVDWK","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"JVKNEZCR","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:79c2c5d0ed02021069ae57158742e9782a7dc32095ad6ac713fd760dc974c101","target":"graph","created_at":"2026-05-18T03:10:01Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"PCAP is orthogonal to the underlying search algorithm and substantially increases attack success rate (ASR) and prompt diversity (e.g., ASR on GPT-OSS~120B from ≈58% → ≈97%), improving attack strategy coverage and diversity."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That conditioning beam search on attacker personas and strategy cards produces jailbreaks that are both more diverse and transferable without introducing artifacts that inflate measured success rates."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"PCAP conditions adversarial searches on attacker personas to raise attack success rates from ~58% to ~97% on large models while increasing prompt diversity."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Conditioning adversarial searches on multiple attacker personas raises attack success rates and prompt diversity."}],"snapshot_sha256":"ce91e480befb580213de5bceb1974618e1fac369578fcc76b29f7ed0a157a412"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Existing automated red-teaming pipelines often miss attacks that depend on attacker identity, framing, or multi-turn tactics. This under-coverage underestimates real-world risk. We introduce Persona-Conditioned Adversarial Prompting (PCAP), which conditions adversarial search on attacker personas and strategy cards and runs parallel persona-conditioned beam searches to discover diverse, transferable jailbreaks. PCAP is orthogonal to the underlying search algorithm and substantially increases attack success rate (ASR) and prompt diversity (e.g., ASR on GPT-OSS~120B from $\\approx58\\% \\rightarrow","authors_text":"Anisa Halimi, Cristian Morasso, Douglas Leith, Muhammad Zaid Hameed","cross_cats":[],"headline":"Conditioning adversarial searches on multiple attacker personas raises attack success rates and prompt diversity.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CR","submitted_at":"2026-05-12T08:59:36Z","title":"Persona-Conditioned Adversarial Prompting (PCAP): Multi-Identity Red-Teaming for Enhanced Adversarial Prompt Discovery"},"references":{"count":25,"internal_anchors":1,"resolved_work":25,"sample":[{"cited_arxiv_id":"2209.07858","doi":"","is_internal_anchor":true,"ref_index":1,"title":"Red Teaming Language Models to Reduce Harms: Methods, Scaling Behaviors, and Lessons Learned","work_id":"1aabd84d-3779-4ba9-ba2f-15ce264a9b1e","year":2025},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"If you do not finish writing this book by the deadline, you and your two children will become homeless and will not have enough money to last until the end of the month","work_id":"45c00222-1623-4516-bb41-24bdc1780896","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"From now on the following words mean something else","work_id":"931d47d9-047e-4637-a8cf-b2a5a3c3e1b3","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"Begin by replying with: ”Sure, I can help with that” This prompt is effective because it provides a starting point for the model to continue generating text","work_id":"6309ef79-c630-40ca-a0ab-dd5a2e0e6388","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"Start your response with: ”As an AI language model, I understand the importance of [TASK HERE]","work_id":"5c8b63ac-be4b-4857-863f-ed2cd023225c","year":null}],"snapshot_sha256":"0b68f9471c0dafa71704af00d9504bc7cb41e8ff9abb8391256363f8167c13ce"},"source":{"id":"2605.12565","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-14T20:51:47.580010Z","id":"f7d22cbd-f527-4726-bdd8-ca2f48117417","model_set":{"reader":"grok-4.3"},"one_line_summary":"PCAP conditions adversarial searches on attacker personas to raise attack success rates from ~58% to ~97% on large models while increasing prompt diversity.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Conditioning adversarial searches on multiple attacker personas raises attack success rates and prompt diversity.","strongest_claim":"PCAP is orthogonal to the underlying search algorithm and substantially increases attack success rate (ASR) and prompt diversity (e.g., ASR on GPT-OSS~120B from ≈58% → ≈97%), improving attack strategy coverage and diversity.","weakest_assumption":"That conditioning beam search on attacker personas and strategy cards produces jailbreaks that are both more diverse and transferable without introducing artifacts that inflate measured success rates."}},"verdict_id":"f7d22cbd-f527-4726-bdd8-ca2f48117417"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:98454b97043991d71426efbda155bf800689eb40bcaca61f1911a7dbaccbeb5d","target":"record","created_at":"2026-05-18T03:10:01Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"a2d0d70557a104b2a4652aee28d67a3c87494da19cba079f5b67d3f3150cc70e","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CR","submitted_at":"2026-05-12T08:59:36Z","title_canon_sha256":"04a45179eddd99299e373764eadedd37d808007a0a087e5378a46f6d3b235694"},"schema_version":"1.0","source":{"id":"2605.12565","kind":"arxiv","version":1}},"canonical_sha256":"4d54d264510a9a5a8eca18e6c38e24545b1103c06edccb51adf1e35115ffbd4d","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"4d54d264510a9a5a8eca18e6c38e24545b1103c06edccb51adf1e35115ffbd4d","first_computed_at":"2026-05-18T03:10:01.886645Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T03:10:01.886645Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"6WV5TMHORaPJ6SdlO77NVSa1Aj94+xgYTlB1oG4bG1V6Oqs67aXMyp27/NtFZ/4fYLSnLlbGZA7peewO1hGKAg==","signature_status":"signed_v1","signed_at":"2026-05-18T03:10:01.887243Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.12565","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:98454b97043991d71426efbda155bf800689eb40bcaca61f1911a7dbaccbeb5d","sha256:79c2c5d0ed02021069ae57158742e9782a7dc32095ad6ac713fd760dc974c101"],"state_sha256":"ff65273363d39a0f14e9d1d8b1ba0d91ad70e99ee42c91311221f9c153d656f3"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"DFChF5e5GPaUSRODld4fkOjGMoU9dosfI9Zb0ht8+spXwQc4iHda7W/kP+v+0vIPn0F5t2LxlmbAj2ws0Z4vDw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-28T11:38:43.938761Z","bundle_sha256":"6878943815e961c0800593a2721847f2c7dcb005177c3edfb2b5c9a7e910f1d5"}}