{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:GESJVO7RPBMDAD5P6VMFZDWRML","short_pith_number":"pith:GESJVO7R","schema_version":"1.0","canonical_sha256":"31249abbf17858300faff5585c8ed162ecfa3f0c0fb52f52f3957709c6df9d20","source":{"kind":"arxiv","id":"2605.18693","version":1},"attestation_state":"computed","paper":{"title":"SkillGenBench: Benchmarking Skill Generation Pipelines for LLM Agents","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Huacan Wang, QianyuXu, Qizhen Lan, Ronghao Chen, Sen Hu, Shuo Zhang, Yifan Zhou, Zhangquan Chen, Zhentao Zhang, Zhi Yang, Ziming Cheng","submitted_at":"2026-05-18T17:28:36Z","abstract_excerpt":"As LLM agents are increasingly built around reusable skills, a central challenge is no longer only whether agents can use provided skills, but whether they can generate correct, reusable, and executable skills from repositories and documents. Existing benchmarks primarily evaluate the efficacy of given skills or the ability of agents to solve downstream tasks from raw context, but they do not isolate skill generation itself as the object of study. We introduce SkillGenBench, a benchmark for evaluating skill generation pipelines under a unified and controlled protocol. In SkillGenBench, a gener"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.18693","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-05-18T17:28:36Z","cross_cats_sorted":[],"title_canon_sha256":"007dac7e72680bbb7dc92abdb1c8f2aa8f9e927e544a0de75e1cc3d116283d83","abstract_canon_sha256":"957c449eee581f0ce1aeec599bc6dc9d89ae1ac90c6856fc2144f76c0e1f8bec"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:06:15.395606Z","signature_b64":"XHyJCnpTE7R57CeeRRMj+yIPkWyM/PtFK8NOLJz1Tmn5FvuzU/1+9p268As5uBCyyEcCgM872fUHnE9fnSl8Ag==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"31249abbf17858300faff5585c8ed162ecfa3f0c0fb52f52f3957709c6df9d20","last_reissued_at":"2026-05-20T00:06:15.395017Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:06:15.395017Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"SkillGenBench: Benchmarking Skill Generation Pipelines for LLM Agents","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Huacan Wang, QianyuXu, Qizhen Lan, Ronghao Chen, Sen Hu, Shuo Zhang, Yifan Zhou, Zhangquan Chen, Zhentao Zhang, Zhi Yang, Ziming Cheng","submitted_at":"2026-05-18T17:28:36Z","abstract_excerpt":"As LLM agents are increasingly built around reusable skills, a central challenge is no longer only whether agents can use provided skills, but whether they can generate correct, reusable, and executable skills from repositories and documents. Existing benchmarks primarily evaluate the efficacy of given skills or the ability of agents to solve downstream tasks from raw context, but they do not isolate skill generation itself as the object of study. We introduce SkillGenBench, a benchmark for evaluating skill generation pipelines under a unified and controlled protocol. In SkillGenBench, a gener"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.18693","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.18693/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"claim_evidence","ran_at":"2026-05-20T00:01:59.092560Z","status":"completed","version":"1.0.0","findings_count":0}],"snapshot_sha256":"71bfc84b6a0125d34441cb383d7e2f8421d02c6502ef2e7b7b4dc61f43cdb294"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.18693","created_at":"2026-05-20T00:06:15.395096+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.18693v1","created_at":"2026-05-20T00:06:15.395096+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.18693","created_at":"2026-05-20T00:06:15.395096+00:00"},{"alias_kind":"pith_short_12","alias_value":"GESJVO7RPBMD","created_at":"2026-05-20T00:06:15.395096+00:00"},{"alias_kind":"pith_short_16","alias_value":"GESJVO7RPBMDAD5P","created_at":"2026-05-20T00:06:15.395096+00:00"},{"alias_kind":"pith_short_8","alias_value":"GESJVO7R","created_at":"2026-05-20T00:06:15.395096+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/GESJVO7RPBMDAD5P6VMFZDWRML","json":"https://pith.science/pith/GESJVO7RPBMDAD5P6VMFZDWRML.json","graph_json":"https://pith.science/api/pith-number/GESJVO7RPBMDAD5P6VMFZDWRML/graph.json","events_json":"https://pith.science/api/pith-number/GESJVO7RPBMDAD5P6VMFZDWRML/events.json","paper":"https://pith.science/paper/GESJVO7R"},"agent_actions":{"view_html":"https://pith.science/pith/GESJVO7RPBMDAD5P6VMFZDWRML","download_json":"https://pith.science/pith/GESJVO7RPBMDAD5P6VMFZDWRML.json","view_paper":"https://pith.science/paper/GESJVO7R","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.18693&json=true","fetch_graph":"https://pith.science/api/pith-number/GESJVO7RPBMDAD5P6VMFZDWRML/graph.json","fetch_events":"https://pith.science/api/pith-number/GESJVO7RPBMDAD5P6VMFZDWRML/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/GESJVO7RPBMDAD5P6VMFZDWRML/action/timestamp_anchor","attest_storage":"https://pith.science/pith/GESJVO7RPBMDAD5P6VMFZDWRML/action/storage_attestation","attest_author":"https://pith.science/pith/GESJVO7RPBMDAD5P6VMFZDWRML/action/author_attestation","sign_citation":"https://pith.science/pith/GESJVO7RPBMDAD5P6VMFZDWRML/action/citation_signature","submit_replication":"https://pith.science/pith/GESJVO7RPBMDAD5P6VMFZDWRML/action/replication_record"}},"created_at":"2026-05-20T00:06:15.395096+00:00","updated_at":"2026-05-20T00:06:15.395096+00:00"}