{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:BV5INSAKZCOC4HDWOVTUZ35OJ2","short_pith_number":"pith:BV5INSAK","schema_version":"1.0","canonical_sha256":"0d7a86c80ac89c2e1c7675674cefae4e8e32bd4585ec730df12ea994b8af1ff5","source":{"kind":"arxiv","id":"2601.10338","version":1},"attestation_state":"computed","paper":{"title":"Agent Skills in the Wild: An Empirical Study of Security Vulnerabilities at Scale","license":"http://creativecommons.org/licenses/by/4.0/","headline":"More than one in four AI agent skills contain at least one security vulnerability.","cross_cats":["cs.AI","cs.CL","cs.SE"],"primary_cat":"cs.CR","authors_text":"Gelei Deng, Guangquan Xu, Leo Zhang, Ruitao Feng, Weizhe Wang, Yao Zhang, Yi Liu, Yuekang Li","submitted_at":"2026-01-15T12:31:52Z","abstract_excerpt":"The rise of AI agent frameworks has introduced agent skills, modular packages containing instructions and executable code that dynamically extend agent capabilities. While this architecture enables powerful customization, skills execute with implicit trust and minimal vetting, creating a significant yet uncharacterized attack surface. We conduct the first large-scale empirical security analysis of this emerging ecosystem, collecting 42,447 skills from two major marketplaces and systematically analyzing 31,132 using SkillScan, a multi-stage detection framework integrating static analysis with L"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":true,"formal_links_present":true},"canonical_record":{"source":{"id":"2601.10338","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CR","submitted_at":"2026-01-15T12:31:52Z","cross_cats_sorted":["cs.AI","cs.CL","cs.SE"],"title_canon_sha256":"d301125dc87ce9e878f906e51ee67ab41a85a90413fbe79e2ad235c703f55d4a","abstract_canon_sha256":"72e088c7de8189bb85e81c2f2d2abb4b8e2aa3c1c52d1382374942b0634228bc"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:39:19.832457Z","signature_b64":"ifuL1thBPQSiUwfcSNo8RVZbrt7yHWfNPluJRTEtX3xSEdOFmh498yOC0I0TmfMMj63dTsnEZf2P6kkSRxfSCA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"0d7a86c80ac89c2e1c7675674cefae4e8e32bd4585ec730df12ea994b8af1ff5","last_reissued_at":"2026-05-17T23:39:19.831777Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:39:19.831777Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Agent Skills in the Wild: An Empirical Study of Security Vulnerabilities at Scale","license":"http://creativecommons.org/licenses/by/4.0/","headline":"More than one in four AI agent skills contain at least one security vulnerability.","cross_cats":["cs.AI","cs.CL","cs.SE"],"primary_cat":"cs.CR","authors_text":"Gelei Deng, Guangquan Xu, Leo Zhang, Ruitao Feng, Weizhe Wang, Yao Zhang, Yi Liu, Yuekang Li","submitted_at":"2026-01-15T12:31:52Z","abstract_excerpt":"The rise of AI agent frameworks has introduced agent skills, modular packages containing instructions and executable code that dynamically extend agent capabilities. While this architecture enables powerful customization, skills execute with implicit trust and minimal vetting, creating a significant yet uncharacterized attack surface. We conduct the first large-scale empirical security analysis of this emerging ecosystem, collecting 42,447 skills from two major marketplaces and systematically analyzing 31,132 using SkillScan, a multi-stage detection framework integrating static analysis with L"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"26.1% of skills contain at least one vulnerability, spanning 14 distinct patterns across four categories: prompt injection, data exfiltration, privilege escalation, and supply chain risks. Data exfiltration (13.3%) and privilege escalation (11.8%) are most prevalent, while 5.2% of skills exhibit high-severity patterns strongly suggesting malicious intent.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That SkillScan's static analysis plus LLM semantic classification accurately flags real vulnerabilities at the stated precision and recall without significant selection bias in the 31,132 analyzed skills or over-representation of risky marketplaces.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"26.1% of analyzed AI agent skills contain vulnerabilities across 14 patterns, with executable scripts raising risk 2.12x, based on static and LLM analysis of 31k skills.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"More than one in four AI agent skills contain at least one security vulnerability.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"56531ea2ba115c4658f2d0f20ae58675d6f4c22c39ebab5381b72be3c5e86871"},"source":{"id":"2601.10338","kind":"arxiv","version":1},"verdict":{"id":"27557d3c-833a-45bc-8341-50e432466ee4","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-14T23:36:14.576840Z","strongest_claim":"26.1% of skills contain at least one vulnerability, spanning 14 distinct patterns across four categories: prompt injection, data exfiltration, privilege escalation, and supply chain risks. Data exfiltration (13.3%) and privilege escalation (11.8%) are most prevalent, while 5.2% of skills exhibit high-severity patterns strongly suggesting malicious intent.","one_line_summary":"26.1% of analyzed AI agent skills contain vulnerabilities across 14 patterns, with executable scripts raising risk 2.12x, based on static and LLM analysis of 31k skills.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That SkillScan's static analysis plus LLM semantic classification accurately flags real vulnerabilities at the stated precision and recall without significant selection bias in the 31,132 analyzed skills or over-representation of risky marketplaces.","pith_extraction_headline":"More than one in four AI agent skills contain at least one security vulnerability."},"references":{"count":42,"sample":[{"doi":"","year":2025,"title":"Anonymous. 2025. SkillScan: Dataset, Detection Tools, and Collection Pipeline for Agent Skills Security Research. https://anonymous.4open.science/r/skillscan/. Anonymous repository containing annotate","work_id":"6325409d-aa49-4f84-aaa1-27b7d63fce75","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2024,"title":"Anthropic. 2024. Model Context Protocol Specification. https:// modelcontextprotocol.io/. Open protocol for AI-tool integration","work_id":"75ddad96-0212-4e49-83f1-2e4e002253c2","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2025,"title":"Anthropic. 2025. Agent Skills Open Standard Specification. https://agentskills.io. Open standard for portable agent skills, released October 2025","work_id":"cc40568c-9bd4-4e43-80c4-9eaec9d979f8","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2025,"title":"Anthropic. 2025. Claude Code Documentation. https://docs.anthropic.com/en/ docs/claude-code. Official Claude Code documentation. Conference’17, July 2017, Washington, DC, USA Yi Liu, Weizhe Wang, Ruit","work_id":"11ade551-a08d-4bdd-98f9-714f770c0d4f","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2025,"title":"Anthropic. 2025. Claude Code Skills Documentation. https://docs.anthropic.com/ en/docs/claude-code/skills. Official documentation for agent skills architecture","work_id":"ed09eb83-54f9-4163-88d5-486914f78550","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":42,"snapshot_sha256":"7b54477cf119c564a3e46dfdaeb56a44ec8372b8309382bb0bdcbd88384389a4","internal_anchors":1},"formal_canon":{"evidence_count":1,"snapshot_sha256":"efa69e7c026d72dff9115957b2339827b079f7b06e33036fa771cd03cbe55adc"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2601.10338","created_at":"2026-05-17T23:39:19.831880+00:00"},{"alias_kind":"arxiv_version","alias_value":"2601.10338v1","created_at":"2026-05-17T23:39:19.831880+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2601.10338","created_at":"2026-05-17T23:39:19.831880+00:00"},{"alias_kind":"pith_short_12","alias_value":"BV5INSAKZCOC","created_at":"2026-05-18T12:33:37.589309+00:00"},{"alias_kind":"pith_short_16","alias_value":"BV5INSAKZCOC4HDW","created_at":"2026-05-18T12:33:37.589309+00:00"},{"alias_kind":"pith_short_8","alias_value":"BV5INSAK","created_at":"2026-05-18T12:33:37.589309+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":23,"internal_anchor_count":23,"sample":[{"citing_arxiv_id":"2602.14211","citing_title":"SkillJect: Effectively Automating Skill-Based Prompt Injection for Skill-Enabled Agents","ref_index":12,"is_internal_anchor":true},{"citing_arxiv_id":"2605.09038","citing_title":"SearchSkill: Teaching LLMs to Use Search Tools with Evolving Skill Banks","ref_index":20,"is_internal_anchor":true},{"citing_arxiv_id":"2605.14460","citing_title":"Exploiting LLM Agent Supply Chains via Payload-less Skills","ref_index":19,"is_internal_anchor":true},{"citing_arxiv_id":"2605.02900","citing_title":"Safety in Embodied AI: A Survey of Risks, Attacks, and Defenses","ref_index":237,"is_internal_anchor":true},{"citing_arxiv_id":"2605.12875","citing_title":"Do Skill Descriptions Tell the Truth? Detecting Undisclosed Security Behaviors in Code-Backed LLM Skills","ref_index":14,"is_internal_anchor":true},{"citing_arxiv_id":"2604.02837","citing_title":"Towards Secure Agent Skills: Architecture, Threat Taxonomy, and Security Analysis","ref_index":12,"is_internal_anchor":true},{"citing_arxiv_id":"2604.03070","citing_title":"Credential Leakage in LLM Agent Skills: A Large-Scale Empirical Study","ref_index":33,"is_internal_anchor":true},{"citing_arxiv_id":"2604.03081","citing_title":"Supply-Chain Poisoning Attacks Against LLM Coding Agent Skill Ecosystems","ref_index":23,"is_internal_anchor":true},{"citing_arxiv_id":"2602.12430","citing_title":"Agent Skills for Large Language Models: Architecture, Acquisition, Security, and the Path Forward","ref_index":33,"is_internal_anchor":true},{"citing_arxiv_id":"2605.12015","citing_title":"SkillSafetyBench: Evaluating Agent Safety under Skill-Facing Attack Surfaces","ref_index":70,"is_internal_anchor":true},{"citing_arxiv_id":"2605.11418","citing_title":"Under the Hood of SKILL.md: Semantic Supply-chain Attacks on AI Agent Skill Registry","ref_index":10,"is_internal_anchor":true},{"citing_arxiv_id":"2604.27358","citing_title":"Safe Bilevel Delegation (SBD): A Formal Framework for Runtime Delegation Safety in Multi-Agent Systems","ref_index":8,"is_internal_anchor":true},{"citing_arxiv_id":"2605.09038","citing_title":"SearchSkill: Teaching LLMs to Use Search Tools with Evolving Skill Banks","ref_index":20,"is_internal_anchor":true},{"citing_arxiv_id":"2604.24657","citing_title":"AgentWard: A Lifecycle Security Architecture for Autonomous AI Agents","ref_index":10,"is_internal_anchor":true},{"citing_arxiv_id":"2605.05868","citing_title":"SkillScope: Toward Fine-Grained Least-Privilege Enforcement for Agent Skills","ref_index":34,"is_internal_anchor":true},{"citing_arxiv_id":"2604.22888","citing_title":"RouteGuard: Internal-Signal Detection of Skill Poisoning in LLM Agents","ref_index":9,"is_internal_anchor":true},{"citing_arxiv_id":"2605.05274","citing_title":"Sealing the Audit-Runtime Gap for LLM Skills","ref_index":29,"is_internal_anchor":true},{"citing_arxiv_id":"2604.08224","citing_title":"Externalization in LLM Agents: A Unified Review of Memory, Skills, Protocols and Harness Engineering","ref_index":92,"is_internal_anchor":true},{"citing_arxiv_id":"2604.06550","citing_title":"SkillSieve: A Hierarchical Triage Framework for Detecting Malicious AI Agent Skills","ref_index":6,"is_internal_anchor":true},{"citing_arxiv_id":"2604.04759","citing_title":"Your Agent, Their Asset: A Real-World Safety Analysis of OpenClaw","ref_index":9,"is_internal_anchor":true},{"citing_arxiv_id":"2604.08407","citing_title":"Your Agent Is Mine: Measuring Malicious Intermediary Attacks on the LLM Supply Chain","ref_index":26,"is_internal_anchor":true},{"citing_arxiv_id":"2604.15709","citing_title":"Bilevel Optimization of Agent Skills via Monte Carlo Tree Search","ref_index":18,"is_internal_anchor":true},{"citing_arxiv_id":"2604.16753","citing_title":"Know When to Trust the Skill: Delayed Appraisal and Epistemic Vigilance for Single-Agent LLMs","ref_index":6,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":1,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/BV5INSAKZCOC4HDWOVTUZ35OJ2","json":"https://pith.science/pith/BV5INSAKZCOC4HDWOVTUZ35OJ2.json","graph_json":"https://pith.science/api/pith-number/BV5INSAKZCOC4HDWOVTUZ35OJ2/graph.json","events_json":"https://pith.science/api/pith-number/BV5INSAKZCOC4HDWOVTUZ35OJ2/events.json","paper":"https://pith.science/paper/BV5INSAK"},"agent_actions":{"view_html":"https://pith.science/pith/BV5INSAKZCOC4HDWOVTUZ35OJ2","download_json":"https://pith.science/pith/BV5INSAKZCOC4HDWOVTUZ35OJ2.json","view_paper":"https://pith.science/paper/BV5INSAK","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2601.10338&json=true","fetch_graph":"https://pith.science/api/pith-number/BV5INSAKZCOC4HDWOVTUZ35OJ2/graph.json","fetch_events":"https://pith.science/api/pith-number/BV5INSAKZCOC4HDWOVTUZ35OJ2/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/BV5INSAKZCOC4HDWOVTUZ35OJ2/action/timestamp_anchor","attest_storage":"https://pith.science/pith/BV5INSAKZCOC4HDWOVTUZ35OJ2/action/storage_attestation","attest_author":"https://pith.science/pith/BV5INSAKZCOC4HDWOVTUZ35OJ2/action/author_attestation","sign_citation":"https://pith.science/pith/BV5INSAKZCOC4HDWOVTUZ35OJ2/action/citation_signature","submit_replication":"https://pith.science/pith/BV5INSAKZCOC4HDWOVTUZ35OJ2/action/replication_record"}},"created_at":"2026-05-17T23:39:19.831880+00:00","updated_at":"2026-05-17T23:39:19.831880+00:00"}