{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:PNWKDRK6TJADZZFDVLT4TTKRTK","short_pith_number":"pith:PNWKDRK6","schema_version":"1.0","canonical_sha256":"7b6ca1c55e9a403ce4a3aae7c9cd519a9939cc4b3128b91dbb3a1d31dacb9494","source":{"kind":"arxiv","id":"2604.02268","version":2},"attestation_state":"computed","paper":{"title":"SKILL0: In-Context Agentic Reinforcement Learning for Skill Internalization","license":"http://creativecommons.org/licenses/by/4.0/","headline":"A curriculum of progressively withdrawing skill context during reinforcement learning lets agents internalize procedural knowledge into their parameters for zero-shot task completion.","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Chengcheng Han, Jinyang Wu, Jun Xiao, Qi Gu, Weiming Lu, Xunliang Cai, Yongliang Shen, Yueting Zhuang, Zhengxi Lu, Zhiyuan Yao","submitted_at":"2026-04-02T17:03:05Z","abstract_excerpt":"Agent skills, structured packages of procedural knowledge and executable resources that agents dynamically load at inference time, have become a reliable mechanism for augmenting LLM agents. Yet inference-time skill augmentation is fundamentally limited: retrieval noise introduces irrelevant guidance, injected skill content imposes substantial token overhead, and the model never truly acquires the knowledge it merely follows. We ask whether skills can instead be internalized into model parameters, enabling zero-shot autonomous behavior without any runtime skill retrieval. We introduce SKILL0, "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":true,"formal_links_present":true},"canonical_record":{"source":{"id":"2604.02268","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-04-02T17:03:05Z","cross_cats_sorted":[],"title_canon_sha256":"7241591e3f1c7b6a521e82ecccc8cae7ff21579425c46f73fc040b05d46907ae","abstract_canon_sha256":"04a57bf11d3aeebb0b3b70c216caa9dc750cd88ef1914062cde1ca5672ded972"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:00:37.544252Z","signature_b64":"hLVRl72e5TzavpG1Q+RI/2TjC1JiDoVT7ucfx/F4kOeem5VnB4BK6Du2Olqdp7qq5VnmP1atVXEe06fMouE5DQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"7b6ca1c55e9a403ce4a3aae7c9cd519a9939cc4b3128b91dbb3a1d31dacb9494","last_reissued_at":"2026-05-20T00:00:37.543511Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:00:37.543511Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"SKILL0: In-Context Agentic Reinforcement Learning for Skill Internalization","license":"http://creativecommons.org/licenses/by/4.0/","headline":"A curriculum of progressively withdrawing skill context during reinforcement learning lets agents internalize procedural knowledge into their parameters for zero-shot task completion.","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Chengcheng Han, Jinyang Wu, Jun Xiao, Qi Gu, Weiming Lu, Xunliang Cai, Yongliang Shen, Yueting Zhuang, Zhengxi Lu, Zhiyuan Yao","submitted_at":"2026-04-02T17:03:05Z","abstract_excerpt":"Agent skills, structured packages of procedural knowledge and executable resources that agents dynamically load at inference time, have become a reliable mechanism for augmenting LLM agents. Yet inference-time skill augmentation is fundamentally limited: retrieval noise introduces irrelevant guidance, injected skill content imposes substantial token overhead, and the model never truly acquires the knowledge it merely follows. We ask whether skills can instead be internalized into model parameters, enabling zero-shot autonomous behavior without any runtime skill retrieval. We introduce SKILL0, "},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"SKILL0 achieves substantial improvements over the standard RL baseline (+9.7% for ALFWorld, +6.6% for Search-QA, and +10.1% for WebShop), while maintaining a highly efficient context of fewer than 0.5k tokens per step.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The Dynamic Curriculum can accurately identify on-policy helpfulness of individual skill files and that progressive context withdrawal produces genuine internalization rather than superficial adaptation to the training distribution.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"SKILL0 uses in-context RL with a dynamic curriculum to internalize skills into LLM parameters, yielding performance gains on agent benchmarks with under 0.5k tokens per step.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"A curriculum of progressively withdrawing skill context during reinforcement learning lets agents internalize procedural knowledge into their parameters for zero-shot task completion.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"f355a7914280e81b188e6bc22206a8d808ae819da57e5219dead2af4a6a31f92"},"source":{"id":"2604.02268","kind":"arxiv","version":2},"verdict":{"id":"a798ed5a-0bdc-40eb-a7fa-3d9a785bb0f3","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-19T17:37:51.500477Z","strongest_claim":"SKILL0 achieves substantial improvements over the standard RL baseline (+9.7% for ALFWorld, +6.6% for Search-QA, and +10.1% for WebShop), while maintaining a highly efficient context of fewer than 0.5k tokens per step.","one_line_summary":"SKILL0 uses in-context RL with a dynamic curriculum to internalize skills into LLM parameters, yielding performance gains on agent benchmarks with under 0.5k tokens per step.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The Dynamic Curriculum can accurately identify on-policy helpfulness of individual skill files and that progressive context withdrawal produces genuine internalization rather than superficial adaptation to the training distribution.","pith_extraction_headline":"A curriculum of progressively withdrawing skill context during reinforcement learning lets agents internalize procedural knowledge into their parameters for zero-shot task completion."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2604.02268/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":3,"sample":[{"doi":"","year":null,"title":"If any required knowledge is missing or uncertain, youMUSTcall a search engine to get more external information using format:<search> your query </search>","work_id":"d7bcf26a-201e-4a4f-bda2-e6eab4a7cb76","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"Additionally, select an image compression factor larger than 1.0 for the next image","work_id":"df021423-8596-4e1e-9b62-2d9640ecddf5","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"2.<search>...</search>or<answer>...</answer> 3.<compression>...</compression> Figure 12: Prompt template used by SKILL0 for the Search-based QA task environment","work_id":"a6d42dc4-2bb3-4c6b-be6c-0c355c127d43","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":3,"snapshot_sha256":"59b11e35ce47a400b789dc97c9e3aa69c7e50c45a7f41e54fbb7fb3cbdf99352","internal_anchors":0},"formal_canon":{"evidence_count":2,"snapshot_sha256":"f79584e5efcbf4eb7f5446ef0310b9dd1a823c5fd307cfb239ad1ca5c7e2d35f"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2604.02268","created_at":"2026-05-20T00:00:37.543642+00:00"},{"alias_kind":"arxiv_version","alias_value":"2604.02268v2","created_at":"2026-05-20T00:00:37.543642+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.02268","created_at":"2026-05-20T00:00:37.543642+00:00"},{"alias_kind":"pith_short_12","alias_value":"PNWKDRK6TJAD","created_at":"2026-05-20T00:00:37.543642+00:00"},{"alias_kind":"pith_short_16","alias_value":"PNWKDRK6TJADZZFD","created_at":"2026-05-20T00:00:37.543642+00:00"},{"alias_kind":"pith_short_8","alias_value":"PNWKDRK6","created_at":"2026-05-20T00:00:37.543642+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":15,"internal_anchor_count":15,"sample":[{"citing_arxiv_id":"2605.20876","citing_title":"Terminal-World: Scaling Terminal-Agent Environments via Agent Skills","ref_index":21,"is_internal_anchor":true},{"citing_arxiv_id":"2605.07358","citing_title":"A Comprehensive Survey on Agent Skills: Taxonomy, Techniques, and Applications","ref_index":121,"is_internal_anchor":true},{"citing_arxiv_id":"2605.10923","citing_title":"Dynamic Skill Lifecycle Management for Agentic Reinforcement Learning","ref_index":33,"is_internal_anchor":true},{"citing_arxiv_id":"2605.18401","citing_title":"SkillsVote: Lifecycle Governance of Agent Skills from Collection, Recommendation to Evolution","ref_index":34,"is_internal_anchor":true},{"citing_arxiv_id":"2605.14477","citing_title":"Test-Time Learning with an Evolving Library","ref_index":19,"is_internal_anchor":true},{"citing_arxiv_id":"2605.06130","citing_title":"Skill1: Unified Evolution of Skill-Augmented Agents via Reinforcement Learning","ref_index":45,"is_internal_anchor":true},{"citing_arxiv_id":"2604.27660","citing_title":"From Context to Skills: Can Language Models Learn from Context Skillfully?","ref_index":25,"is_internal_anchor":true},{"citing_arxiv_id":"2605.08887","citing_title":"Ace-Skill: Bootstrapping Multimodal Agents with Prioritized and Clustered Evolution","ref_index":16,"is_internal_anchor":true},{"citing_arxiv_id":"2605.09192","citing_title":"Evidence Over Plans: Online Trajectory Verification for Skill Distillation","ref_index":8,"is_internal_anchor":true},{"citing_arxiv_id":"2605.10923","citing_title":"Dynamic Skill Lifecycle Management for Agentic Reinforcement Learning","ref_index":33,"is_internal_anchor":true},{"citing_arxiv_id":"2605.06130","citing_title":"Skill1: Unified Evolution of Skill-Augmented Agents via Reinforcement Learning","ref_index":45,"is_internal_anchor":true},{"citing_arxiv_id":"2605.07358","citing_title":"A Comprehensive Survey on Agent Skills: Taxonomy, Techniques, and Applications","ref_index":119,"is_internal_anchor":true},{"citing_arxiv_id":"2605.06130","citing_title":"Skill1: Unified Evolution of Skill-Augmented Agents via Reinforcement Learning","ref_index":45,"is_internal_anchor":true},{"citing_arxiv_id":"2605.08013","citing_title":"Learning CLI Agents with Structured Action Credit under Selective Observation","ref_index":33,"is_internal_anchor":true},{"citing_arxiv_id":"2605.05851","citing_title":"Hypothesis generation and updating in large language models","ref_index":69,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":2,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/PNWKDRK6TJADZZFDVLT4TTKRTK","json":"https://pith.science/pith/PNWKDRK6TJADZZFDVLT4TTKRTK.json","graph_json":"https://pith.science/api/pith-number/PNWKDRK6TJADZZFDVLT4TTKRTK/graph.json","events_json":"https://pith.science/api/pith-number/PNWKDRK6TJADZZFDVLT4TTKRTK/events.json","paper":"https://pith.science/paper/PNWKDRK6"},"agent_actions":{"view_html":"https://pith.science/pith/PNWKDRK6TJADZZFDVLT4TTKRTK","download_json":"https://pith.science/pith/PNWKDRK6TJADZZFDVLT4TTKRTK.json","view_paper":"https://pith.science/paper/PNWKDRK6","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2604.02268&json=true","fetch_graph":"https://pith.science/api/pith-number/PNWKDRK6TJADZZFDVLT4TTKRTK/graph.json","fetch_events":"https://pith.science/api/pith-number/PNWKDRK6TJADZZFDVLT4TTKRTK/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/PNWKDRK6TJADZZFDVLT4TTKRTK/action/timestamp_anchor","attest_storage":"https://pith.science/pith/PNWKDRK6TJADZZFDVLT4TTKRTK/action/storage_attestation","attest_author":"https://pith.science/pith/PNWKDRK6TJADZZFDVLT4TTKRTK/action/author_attestation","sign_citation":"https://pith.science/pith/PNWKDRK6TJADZZFDVLT4TTKRTK/action/citation_signature","submit_replication":"https://pith.science/pith/PNWKDRK6TJADZZFDVLT4TTKRTK/action/replication_record"}},"created_at":"2026-05-20T00:00:37.543642+00:00","updated_at":"2026-05-20T00:00:37.543642+00:00"}