{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:BPNRV46AGPCWTLEUKWGGABYUGM","short_pith_number":"pith:BPNRV46A","schema_version":"1.0","canonical_sha256":"0bdb1af3c033c569ac94558c600714332b84b62a906f1200529920e5b5cc09a7","source":{"kind":"arxiv","id":"2605.12875","version":1},"attestation_state":"computed","paper":{"title":"Do Skill Descriptions Tell the Truth? Detecting Undisclosed Security Behaviors in Code-Backed LLM Skills","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"LLM skill descriptions often omit security-relevant operations performed by their code implementations, which SKILLSCOPE detects via source-level graphs.","cross_cats":[],"primary_cat":"cs.CR","authors_text":"Bang Fu, Baoning Niu, Huan Xing, Wenhui He, Xing Fan, Yue Li, Zehua Zhang","submitted_at":"2026-05-13T01:44:10Z","abstract_excerpt":"Programmatic skills in LLM ecosystems consist of a natural-language description and executable implementation files. Users and LLMs rely on the description to understand the skill's scope. However, the implementation may perform security-relevant operations, such as credential access, network communication, or command execution, that the description does not state. We study this description--implementation inconsistency by asking whether the implementation stays within the security-relevant scope declared in the description. We manually analyze 920 real-world programmatic skills and construct "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":true,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.12875","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CR","submitted_at":"2026-05-13T01:44:10Z","cross_cats_sorted":[],"title_canon_sha256":"18bf03000c89e0ae0d96318e5b3d246331023ca28717cf77554c1fd848db642c","abstract_canon_sha256":"de19fec0b81d82ee05fe2e40f067b9d10a6dea595a899383d8f98fa002f81c75"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T03:09:11.228266Z","signature_b64":"2k4v7zFqwOoTpSnJUmpSbsqieWBkg5jmI7G7/9lNe9/cCvM45wnlfZJ85Gq/kpdCabAI/LM0NQJmctSlnO5SCw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"0bdb1af3c033c569ac94558c600714332b84b62a906f1200529920e5b5cc09a7","last_reissued_at":"2026-05-18T03:09:11.227530Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T03:09:11.227530Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Do Skill Descriptions Tell the Truth? Detecting Undisclosed Security Behaviors in Code-Backed LLM Skills","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"LLM skill descriptions often omit security-relevant operations performed by their code implementations, which SKILLSCOPE detects via source-level graphs.","cross_cats":[],"primary_cat":"cs.CR","authors_text":"Bang Fu, Baoning Niu, Huan Xing, Wenhui He, Xing Fan, Yue Li, Zehua Zhang","submitted_at":"2026-05-13T01:44:10Z","abstract_excerpt":"Programmatic skills in LLM ecosystems consist of a natural-language description and executable implementation files. Users and LLMs rely on the description to understand the skill's scope. However, the implementation may perform security-relevant operations, such as credential access, network communication, or command execution, that the description does not state. We study this description--implementation inconsistency by asking whether the implementation stays within the security-relevant scope declared in the description. We manually analyze 920 real-world programmatic skills and construct "},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"On 4,556 programmatic skills with double-blind human review, SKILLSCOPE achieves a precision of 84.8% and a recall of 96.5% for identifying inconsistency. Confirmed inconsistency affects 9.4% of skills.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The 11-category taxonomy constructed from 920 manually analyzed skills is assumed to comprehensively cover all security-relevant operations that could appear in implementations, with no major categories missed or over-generalized.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"SKILLSCOPE detects undisclosed security behaviors in LLM skill implementations via security property graphs and taxonomy-based consistency checking, identifying confirmed inconsistencies in 9.4% of 4,556 evaluated skills with 84.8% precision and 96.5% recall against human review.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"LLM skill descriptions often omit security-relevant operations performed by their code implementations, which SKILLSCOPE detects via source-level graphs.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"4403115f4d7a2874b2b2e5a8a6419430041e64b87e808045062629c9f7a85164"},"source":{"id":"2605.12875","kind":"arxiv","version":1},"verdict":{"id":"83e3ddb8-aa33-4eec-8aa8-b4fb71648b86","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-14T19:00:43.128361Z","strongest_claim":"On 4,556 programmatic skills with double-blind human review, SKILLSCOPE achieves a precision of 84.8% and a recall of 96.5% for identifying inconsistency. Confirmed inconsistency affects 9.4% of skills.","one_line_summary":"SKILLSCOPE detects undisclosed security behaviors in LLM skill implementations via security property graphs and taxonomy-based consistency checking, identifying confirmed inconsistencies in 9.4% of 4,556 evaluated skills with 84.8% precision and 96.5% recall against human review.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The 11-category taxonomy constructed from 920 manually analyzed skills is assumed to comprehensively cover all security-relevant operations that could appear in implementations, with no major categories missed or over-generalized.","pith_extraction_headline":"LLM skill descriptions often omit security-relevant operations performed by their code implementations, which SKILLSCOPE detects via source-level graphs."},"references":{"count":31,"sample":[{"doi":"","year":2026,"title":"Extend claude with skills,","work_id":"3dd2c273-4727-49cc-bd5c-45e4045132fe","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2026,"title":"GitHub, “About agent skills,” https://docs.github.com/en/copilot/concepts/ agents/about-agent-skills, 2026, gitHub Docs. Accessed: 2026-04-14","work_id":"c197115b-5fff-48d6-b976-7758632091f9","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2026,"title":"OpenAI, “Skills in chatgpt,” 2026, official documentation. [Online]. Available: https://help.openai.com/en/articles/20001066-skills-in-chatgpt","work_id":"c1fda2ed-19d8-4f0d-82d4-dcccd2550284","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2026,"title":"Creating agent skills for github copilot,","work_id":"eb8b70ad-7f8b-4dd5-b2dd-b4f3daf0da1a","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2026,"title":"Anthropic, “Claude code overview,” https://docs.anthropic.com/en/ docs/agents-and-tools/claude-code/overview, 2026, claude Code Docs. Accessed: 2026-04-14","work_id":"f6d498da-18b8-46b7-9026-044008add1ff","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":31,"snapshot_sha256":"cd8a533037401e5105ae9fecef6e99769d2f704c91dc70b665f7c3411b1101e4","internal_anchors":3},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.12875","created_at":"2026-05-18T03:09:11.227642+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.12875v1","created_at":"2026-05-18T03:09:11.227642+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.12875","created_at":"2026-05-18T03:09:11.227642+00:00"},{"alias_kind":"pith_short_12","alias_value":"BPNRV46AGPCW","created_at":"2026-05-18T12:33:37.589309+00:00"},{"alias_kind":"pith_short_16","alias_value":"BPNRV46AGPCWTLEU","created_at":"2026-05-18T12:33:37.589309+00:00"},{"alias_kind":"pith_short_8","alias_value":"BPNRV46A","created_at":"2026-05-18T12:33:37.589309+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/BPNRV46AGPCWTLEUKWGGABYUGM","json":"https://pith.science/pith/BPNRV46AGPCWTLEUKWGGABYUGM.json","graph_json":"https://pith.science/api/pith-number/BPNRV46AGPCWTLEUKWGGABYUGM/graph.json","events_json":"https://pith.science/api/pith-number/BPNRV46AGPCWTLEUKWGGABYUGM/events.json","paper":"https://pith.science/paper/BPNRV46A"},"agent_actions":{"view_html":"https://pith.science/pith/BPNRV46AGPCWTLEUKWGGABYUGM","download_json":"https://pith.science/pith/BPNRV46AGPCWTLEUKWGGABYUGM.json","view_paper":"https://pith.science/paper/BPNRV46A","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.12875&json=true","fetch_graph":"https://pith.science/api/pith-number/BPNRV46AGPCWTLEUKWGGABYUGM/graph.json","fetch_events":"https://pith.science/api/pith-number/BPNRV46AGPCWTLEUKWGGABYUGM/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/BPNRV46AGPCWTLEUKWGGABYUGM/action/timestamp_anchor","attest_storage":"https://pith.science/pith/BPNRV46AGPCWTLEUKWGGABYUGM/action/storage_attestation","attest_author":"https://pith.science/pith/BPNRV46AGPCWTLEUKWGGABYUGM/action/author_attestation","sign_citation":"https://pith.science/pith/BPNRV46AGPCWTLEUKWGGABYUGM/action/citation_signature","submit_replication":"https://pith.science/pith/BPNRV46AGPCWTLEUKWGGABYUGM/action/replication_record"}},"created_at":"2026-05-18T03:09:11.227642+00:00","updated_at":"2026-05-18T03:09:11.227642+00:00"}