{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2023:5FX652FT63W3O5443F5O5QKAVD","short_pith_number":"pith:5FX652FT","schema_version":"1.0","canonical_sha256":"e96feee8b3f6edb7779cd97aeec140a8c80234865aa118c2cf11f99d214cd6ae","source":{"kind":"arxiv","id":"2311.03191","version":5},"attestation_state":"computed","paper":{"title":"DeepInception: Hypnotize Large Language Model to Be Jailbreaker","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CR"],"primary_cat":"cs.LG","authors_text":"Bo Han, Jiangchao Yao, Jianing Zhu, Tongliang Liu, Xuan Li, Zhanke Zhou","submitted_at":"2023-11-06T15:29:30Z","abstract_excerpt":"Large language models (LLMs) have succeeded significantly in various applications but remain susceptible to adversarial jailbreaks that void their safety guardrails. Previous attempts to exploit these vulnerabilities often rely on high-cost computational extrapolations, which may not be practical or efficient. In this paper, inspired by the authority influence demonstrated in the Milgram experiment, we present a lightweight method to take advantage of the LLMs' personification capabilities to construct $\\textit{a virtual, nested scene}$, allowing it to realize an adaptive way to escape the usa"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2311.03191","kind":"arxiv","version":5},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2023-11-06T15:29:30Z","cross_cats_sorted":["cs.CR"],"title_canon_sha256":"ee7a3a858052d62513485b97e67a39aca8e20d34d7f006c721dce209903683c6","abstract_canon_sha256":"f05221af55f00fa350d2a5aad3a0e8933cf7e19c8072c945a5c7bbc5d43d5440"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-19T10:31:15.840498Z","signature_b64":"if0VL2+XJPUHmGbAXj80TEHB6teIWPeUfeBdWB4YaZn77g75Pz/kPlpkimz3LthKInmEXkuhS9WSi2RjWr+lBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"e96feee8b3f6edb7779cd97aeec140a8c80234865aa118c2cf11f99d214cd6ae","last_reissued_at":"2026-05-19T10:31:15.838754Z","signature_status":"signed_v1","first_computed_at":"2026-05-19T10:31:15.838754Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"DeepInception: Hypnotize Large Language Model to Be Jailbreaker","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CR"],"primary_cat":"cs.LG","authors_text":"Bo Han, Jiangchao Yao, Jianing Zhu, Tongliang Liu, Xuan Li, Zhanke Zhou","submitted_at":"2023-11-06T15:29:30Z","abstract_excerpt":"Large language models (LLMs) have succeeded significantly in various applications but remain susceptible to adversarial jailbreaks that void their safety guardrails. Previous attempts to exploit these vulnerabilities often rely on high-cost computational extrapolations, which may not be practical or efficient. In this paper, inspired by the authority influence demonstrated in the Milgram experiment, we present a lightweight method to take advantage of the LLMs' personification capabilities to construct $\\textit{a virtual, nested scene}$, allowing it to realize an adaptive way to escape the usa"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2311.03191","kind":"arxiv","version":5},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2311.03191/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2311.03191","created_at":"2026-05-19T10:31:15.838832+00:00"},{"alias_kind":"arxiv_version","alias_value":"2311.03191v5","created_at":"2026-05-19T10:31:15.838832+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2311.03191","created_at":"2026-05-19T10:31:15.838832+00:00"},{"alias_kind":"pith_short_12","alias_value":"5FX652FT63W3","created_at":"2026-05-19T10:31:15.838832+00:00"},{"alias_kind":"pith_short_16","alias_value":"5FX652FT63W3O544","created_at":"2026-05-19T10:31:15.838832+00:00"},{"alias_kind":"pith_short_8","alias_value":"5FX652FT","created_at":"2026-05-19T10:31:15.838832+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":18,"internal_anchor_count":18,"sample":[{"citing_arxiv_id":"2506.12382","citing_title":"Exploring the Secondary Risks of Large Language Models","ref_index":23,"is_internal_anchor":true},{"citing_arxiv_id":"2508.04204","citing_title":"ReasoningGuard: Safeguarding Large Reasoning Models with Inference-time Safety Aha Moments","ref_index":33,"is_internal_anchor":true},{"citing_arxiv_id":"2510.08592","citing_title":"Less Diverse, Less Safe: The Indirect But Pervasive Risk of Test-Time Scaling in Large Language Models","ref_index":19,"is_internal_anchor":true},{"citing_arxiv_id":"2510.10073","citing_title":"SecureWebArena: A Holistic Security Evaluation Benchmark for LVLM-based Web Agents","ref_index":22,"is_internal_anchor":true},{"citing_arxiv_id":"2510.20129","citing_title":"SAID: Safety-Aware Intent Defense via Prefix Probing for Large Language Models","ref_index":13,"is_internal_anchor":true},{"citing_arxiv_id":"2511.02356","citing_title":"ASTRA: An Automated Framework for Strategy Discovery, Retrieval, and Evolution for Jailbreaking LLMs","ref_index":27,"is_internal_anchor":true},{"citing_arxiv_id":"2512.22753","citing_title":"From Rookie to Expert: Manipulating LLMs for Automated Vulnerability Exploitation in Enterprise Software","ref_index":15,"is_internal_anchor":true},{"citing_arxiv_id":"2407.04295","citing_title":"Jailbreak Attacks and Defenses Against Large Language Models: A Survey","ref_index":52,"is_internal_anchor":true},{"citing_arxiv_id":"2604.04060","citing_title":"CoopGuard: Stateful Cooperative Agents Safeguarding LLMs Against Evolving Multi-Round Attacks","ref_index":11,"is_internal_anchor":true},{"citing_arxiv_id":"2309.12284","citing_title":"MetaMath: Bootstrap Your Own Mathematical Questions for Large Language Models","ref_index":38,"is_internal_anchor":true},{"citing_arxiv_id":"2604.23341","citing_title":"Evaluating Jailbreaking Vulnerabilities in LLMs Deployed as Assistants for Smart Grid Operations: A Benchmark Against NERC Standards","ref_index":20,"is_internal_anchor":true},{"citing_arxiv_id":"2605.05058","citing_title":"SoK: Robustness in Large Language Models against Jailbreak Attacks","ref_index":40,"is_internal_anchor":true},{"citing_arxiv_id":"2604.10326","citing_title":"Jailbreaking the Matrix: Nullspace Steering for Controlled Model Subversion","ref_index":21,"is_internal_anchor":true},{"citing_arxiv_id":"2604.12817","citing_title":"Understanding and Improving Continuous Adversarial Training for LLMs via In-context Learning Theory","ref_index":14,"is_internal_anchor":true},{"citing_arxiv_id":"2604.07727","citing_title":"TrajGuard: Streaming Hidden-state Trajectory Detection for Decoding-time Jailbreak Defense","ref_index":14,"is_internal_anchor":true},{"citing_arxiv_id":"2604.09222","citing_title":"GRM: Utility-Aware Jailbreak Attacks on Audio LLMs via Gradient-Ratio Masking","ref_index":21,"is_internal_anchor":true},{"citing_arxiv_id":"2604.14604","citing_title":"Hijacking Large Audio-Language Models via Context-Agnostic and Imperceptible Auditory Prompt Injection","ref_index":52,"is_internal_anchor":true},{"citing_arxiv_id":"2604.14548","citing_title":"VoxSafeBench: Not Just What Is Said, but Who, How, and Where","ref_index":53,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/5FX652FT63W3O5443F5O5QKAVD","json":"https://pith.science/pith/5FX652FT63W3O5443F5O5QKAVD.json","graph_json":"https://pith.science/api/pith-number/5FX652FT63W3O5443F5O5QKAVD/graph.json","events_json":"https://pith.science/api/pith-number/5FX652FT63W3O5443F5O5QKAVD/events.json","paper":"https://pith.science/paper/5FX652FT"},"agent_actions":{"view_html":"https://pith.science/pith/5FX652FT63W3O5443F5O5QKAVD","download_json":"https://pith.science/pith/5FX652FT63W3O5443F5O5QKAVD.json","view_paper":"https://pith.science/paper/5FX652FT","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2311.03191&json=true","fetch_graph":"https://pith.science/api/pith-number/5FX652FT63W3O5443F5O5QKAVD/graph.json","fetch_events":"https://pith.science/api/pith-number/5FX652FT63W3O5443F5O5QKAVD/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/5FX652FT63W3O5443F5O5QKAVD/action/timestamp_anchor","attest_storage":"https://pith.science/pith/5FX652FT63W3O5443F5O5QKAVD/action/storage_attestation","attest_author":"https://pith.science/pith/5FX652FT63W3O5443F5O5QKAVD/action/author_attestation","sign_citation":"https://pith.science/pith/5FX652FT63W3O5443F5O5QKAVD/action/citation_signature","submit_replication":"https://pith.science/pith/5FX652FT63W3O5443F5O5QKAVD/action/replication_record"}},"created_at":"2026-05-19T10:31:15.838832+00:00","updated_at":"2026-05-19T10:31:15.838832+00:00"}