{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:36WNU6U676CSZZEBR6MNRJUWU2","short_pith_number":"pith:36WNU6U6","schema_version":"1.0","canonical_sha256":"dfacda7a9eff852ce4818f98d8a696a6a4d70196e61f8ee24c9a6a421706d2c6","source":{"kind":"arxiv","id":"2510.11195","version":2},"attestation_state":"computed","paper":{"title":"RAG-Pull: Turning Retrieval into a Code-Injection Channel via Invisible Unicode Perturbations","license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CR","authors_text":"Aritra Dhar, Lukas Cavigelli, Vasilije Stambolic","submitted_at":"2025-10-13T09:27:26Z","abstract_excerpt":"Retrieval-Augmented Generation (RAG) increases the reliability and trustworthiness of the LLM response and reduces hallucination by eliminating the need for model retraining. It does so by adding external data into the LLM's context. We develop a new class of black-box attack, RAG-Pull, that inserts hidden UTF characters into queries or external code repositories, redirecting retrieval toward malicious code, thereby breaking the models' safety alignment. We observe that query and code perturbations alone can shift retrieval toward attacker-controlled snippets, while combined query-and-target p"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2510.11195","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.CR","submitted_at":"2025-10-13T09:27:26Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"11a5c2684b283eee21095e3f010e9d6c2f70133c8c4deabfabc4df8d0e43de37","abstract_canon_sha256":"9ab5bc561215a6639c17741f94c96e916f85bf522d3d9b7396ca56168684398b"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-25T02:01:08.137001Z","signature_b64":"mJlJ7vvZkd10XOXaMdPimcWS3fzP5tAFj1rL3jPx04Rbm9BwrQhI9CX1xXCwi8oUhDyTYlgVqvfeEqDbEodzCw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"dfacda7a9eff852ce4818f98d8a696a6a4d70196e61f8ee24c9a6a421706d2c6","last_reissued_at":"2026-05-25T02:01:08.136012Z","signature_status":"signed_v1","first_computed_at":"2026-05-25T02:01:08.136012Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"RAG-Pull: Turning Retrieval into a Code-Injection Channel via Invisible Unicode Perturbations","license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CR","authors_text":"Aritra Dhar, Lukas Cavigelli, Vasilije Stambolic","submitted_at":"2025-10-13T09:27:26Z","abstract_excerpt":"Retrieval-Augmented Generation (RAG) increases the reliability and trustworthiness of the LLM response and reduces hallucination by eliminating the need for model retraining. It does so by adding external data into the LLM's context. We develop a new class of black-box attack, RAG-Pull, that inserts hidden UTF characters into queries or external code repositories, redirecting retrieval toward malicious code, thereby breaking the models' safety alignment. We observe that query and code perturbations alone can shift retrieval toward attacker-controlled snippets, while combined query-and-target p"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2510.11195","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2510.11195/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2510.11195","created_at":"2026-05-25T02:01:08.136110+00:00"},{"alias_kind":"arxiv_version","alias_value":"2510.11195v2","created_at":"2026-05-25T02:01:08.136110+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2510.11195","created_at":"2026-05-25T02:01:08.136110+00:00"},{"alias_kind":"pith_short_12","alias_value":"36WNU6U676CS","created_at":"2026-05-25T02:01:08.136110+00:00"},{"alias_kind":"pith_short_16","alias_value":"36WNU6U676CSZZEB","created_at":"2026-05-25T02:01:08.136110+00:00"},{"alias_kind":"pith_short_8","alias_value":"36WNU6U6","created_at":"2026-05-25T02:01:08.136110+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"2604.08304","citing_title":"Securing Retrieval-Augmented Generation: A Taxonomy of Attacks, Defenses, and Future Directions","ref_index":16,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/36WNU6U676CSZZEBR6MNRJUWU2","json":"https://pith.science/pith/36WNU6U676CSZZEBR6MNRJUWU2.json","graph_json":"https://pith.science/api/pith-number/36WNU6U676CSZZEBR6MNRJUWU2/graph.json","events_json":"https://pith.science/api/pith-number/36WNU6U676CSZZEBR6MNRJUWU2/events.json","paper":"https://pith.science/paper/36WNU6U6"},"agent_actions":{"view_html":"https://pith.science/pith/36WNU6U676CSZZEBR6MNRJUWU2","download_json":"https://pith.science/pith/36WNU6U676CSZZEBR6MNRJUWU2.json","view_paper":"https://pith.science/paper/36WNU6U6","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2510.11195&json=true","fetch_graph":"https://pith.science/api/pith-number/36WNU6U676CSZZEBR6MNRJUWU2/graph.json","fetch_events":"https://pith.science/api/pith-number/36WNU6U676CSZZEBR6MNRJUWU2/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/36WNU6U676CSZZEBR6MNRJUWU2/action/timestamp_anchor","attest_storage":"https://pith.science/pith/36WNU6U676CSZZEBR6MNRJUWU2/action/storage_attestation","attest_author":"https://pith.science/pith/36WNU6U676CSZZEBR6MNRJUWU2/action/author_attestation","sign_citation":"https://pith.science/pith/36WNU6U676CSZZEBR6MNRJUWU2/action/citation_signature","submit_replication":"https://pith.science/pith/36WNU6U676CSZZEBR6MNRJUWU2/action/replication_record"}},"created_at":"2026-05-25T02:01:08.136110+00:00","updated_at":"2026-05-25T02:01:08.136110+00:00"}