{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:PNWKDRK6TJADZZFDVLT4TTKRTK","short_pith_number":"pith:PNWKDRK6","canonical_record":{"source":{"id":"2604.02268","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-04-02T17:03:05Z","cross_cats_sorted":[],"title_canon_sha256":"7241591e3f1c7b6a521e82ecccc8cae7ff21579425c46f73fc040b05d46907ae","abstract_canon_sha256":"04a57bf11d3aeebb0b3b70c216caa9dc750cd88ef1914062cde1ca5672ded972"},"schema_version":"1.0"},"canonical_sha256":"7b6ca1c55e9a403ce4a3aae7c9cd519a9939cc4b3128b91dbb3a1d31dacb9494","source":{"kind":"arxiv","id":"2604.02268","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2604.02268","created_at":"2026-05-20T00:00:37Z"},{"alias_kind":"arxiv_version","alias_value":"2604.02268v2","created_at":"2026-05-20T00:00:37Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.02268","created_at":"2026-05-20T00:00:37Z"},{"alias_kind":"pith_short_12","alias_value":"PNWKDRK6TJAD","created_at":"2026-05-20T00:00:37Z"},{"alias_kind":"pith_short_16","alias_value":"PNWKDRK6TJADZZFD","created_at":"2026-05-20T00:00:37Z"},{"alias_kind":"pith_short_8","alias_value":"PNWKDRK6","created_at":"2026-05-20T00:00:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:PNWKDRK6TJADZZFDVLT4TTKRTK","target":"record","payload":{"canonical_record":{"source":{"id":"2604.02268","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-04-02T17:03:05Z","cross_cats_sorted":[],"title_canon_sha256":"7241591e3f1c7b6a521e82ecccc8cae7ff21579425c46f73fc040b05d46907ae","abstract_canon_sha256":"04a57bf11d3aeebb0b3b70c216caa9dc750cd88ef1914062cde1ca5672ded972"},"schema_version":"1.0"},"canonical_sha256":"7b6ca1c55e9a403ce4a3aae7c9cd519a9939cc4b3128b91dbb3a1d31dacb9494","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:00:37.544252Z","signature_b64":"hLVRl72e5TzavpG1Q+RI/2TjC1JiDoVT7ucfx/F4kOeem5VnB4BK6Du2Olqdp7qq5VnmP1atVXEe06fMouE5DQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"7b6ca1c55e9a403ce4a3aae7c9cd519a9939cc4b3128b91dbb3a1d31dacb9494","last_reissued_at":"2026-05-20T00:00:37.543511Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:00:37.543511Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2604.02268","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:00:37Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"6UexJemr0WShGwOcHR/lCHS8PIOJfeaJkttOc1LV3zPpPbHZEop4sgrSJdwCn3eOxhDWqeuGcz6c4vBoRku/Cg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-21T11:17:56.345727Z"},"content_sha256":"d253cafeaf5a9fe5e06bc3305b2bb7e8266bbc2b441a276da388a1ecca4d18f0","schema_version":"1.0","event_id":"sha256:d253cafeaf5a9fe5e06bc3305b2bb7e8266bbc2b441a276da388a1ecca4d18f0"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:PNWKDRK6TJADZZFDVLT4TTKRTK","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"SKILL0: In-Context Agentic Reinforcement Learning for Skill Internalization","license":"http://creativecommons.org/licenses/by/4.0/","headline":"A curriculum of progressively withdrawing skill context during reinforcement learning lets agents internalize procedural knowledge into their parameters for zero-shot task completion.","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Chengcheng Han, Jinyang Wu, Jun Xiao, Qi Gu, Weiming Lu, Xunliang Cai, Yongliang Shen, Yueting Zhuang, Zhengxi Lu, Zhiyuan Yao","submitted_at":"2026-04-02T17:03:05Z","abstract_excerpt":"Agent skills, structured packages of procedural knowledge and executable resources that agents dynamically load at inference time, have become a reliable mechanism for augmenting LLM agents. Yet inference-time skill augmentation is fundamentally limited: retrieval noise introduces irrelevant guidance, injected skill content imposes substantial token overhead, and the model never truly acquires the knowledge it merely follows. We ask whether skills can instead be internalized into model parameters, enabling zero-shot autonomous behavior without any runtime skill retrieval. We introduce SKILL0, "},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"SKILL0 achieves substantial improvements over the standard RL baseline (+9.7% for ALFWorld, +6.6% for Search-QA, and +10.1% for WebShop), while maintaining a highly efficient context of fewer than 0.5k tokens per step.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The Dynamic Curriculum can accurately identify on-policy helpfulness of individual skill files and that progressive context withdrawal produces genuine internalization rather than superficial adaptation to the training distribution.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"SKILL0 uses in-context RL with a dynamic curriculum to internalize skills into LLM parameters, yielding performance gains on agent benchmarks with under 0.5k tokens per step.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"A curriculum of progressively withdrawing skill context during reinforcement learning lets agents internalize procedural knowledge into their parameters for zero-shot task completion.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"f355a7914280e81b188e6bc22206a8d808ae819da57e5219dead2af4a6a31f92"},"source":{"id":"2604.02268","kind":"arxiv","version":2},"verdict":{"id":"a798ed5a-0bdc-40eb-a7fa-3d9a785bb0f3","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-19T17:37:51.500477Z","strongest_claim":"SKILL0 achieves substantial improvements over the standard RL baseline (+9.7% for ALFWorld, +6.6% for Search-QA, and +10.1% for WebShop), while maintaining a highly efficient context of fewer than 0.5k tokens per step.","one_line_summary":"SKILL0 uses in-context RL with a dynamic curriculum to internalize skills into LLM parameters, yielding performance gains on agent benchmarks with under 0.5k tokens per step.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The Dynamic Curriculum can accurately identify on-policy helpfulness of individual skill files and that progressive context withdrawal produces genuine internalization rather than superficial adaptation to the training distribution.","pith_extraction_headline":"A curriculum of progressively withdrawing skill context during reinforcement learning lets agents internalize procedural knowledge into their parameters for zero-shot task completion."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2604.02268/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":3,"sample":[{"doi":"","year":null,"title":"If any required knowledge is missing or uncertain, youMUSTcall a search engine to get more external information using format:<search> your query </search>","work_id":"d7bcf26a-201e-4a4f-bda2-e6eab4a7cb76","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"Additionally, select an image compression factor larger than 1.0 for the next image","work_id":"df021423-8596-4e1e-9b62-2d9640ecddf5","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"2.<search>...</search>or<answer>...</answer> 3.<compression>...</compression> Figure 12: Prompt template used by SKILL0 for the Search-based QA task environment","work_id":"a6d42dc4-2bb3-4c6b-be6c-0c355c127d43","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":3,"snapshot_sha256":"59b11e35ce47a400b789dc97c9e3aa69c7e50c45a7f41e54fbb7fb3cbdf99352","internal_anchors":0},"formal_canon":{"evidence_count":2,"snapshot_sha256":"f79584e5efcbf4eb7f5446ef0310b9dd1a823c5fd307cfb239ad1ca5c7e2d35f"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"a798ed5a-0bdc-40eb-a7fa-3d9a785bb0f3"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:00:37Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"6P1CeNOUg72rycIJIgxitZJiNPzzcMv4mgtQEoEPFsYWkKPLBmKH9a8mT6Zsk9VD+fARdEGyViAd4zDmwnYgAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-21T11:17:56.346250Z"},"content_sha256":"0c0c60707d7b81f35693cecf0884d52308172690f23a53831153a98587935429","schema_version":"1.0","event_id":"sha256:0c0c60707d7b81f35693cecf0884d52308172690f23a53831153a98587935429"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/PNWKDRK6TJADZZFDVLT4TTKRTK/bundle.json","state_url":"https://pith.science/pith/PNWKDRK6TJADZZFDVLT4TTKRTK/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/PNWKDRK6TJADZZFDVLT4TTKRTK/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-21T11:17:56Z","links":{"resolver":"https://pith.science/pith/PNWKDRK6TJADZZFDVLT4TTKRTK","bundle":"https://pith.science/pith/PNWKDRK6TJADZZFDVLT4TTKRTK/bundle.json","state":"https://pith.science/pith/PNWKDRK6TJADZZFDVLT4TTKRTK/state.json","well_known_bundle":"https://pith.science/.well-known/pith/PNWKDRK6TJADZZFDVLT4TTKRTK/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:PNWKDRK6TJADZZFDVLT4TTKRTK","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"04a57bf11d3aeebb0b3b70c216caa9dc750cd88ef1914062cde1ca5672ded972","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-04-02T17:03:05Z","title_canon_sha256":"7241591e3f1c7b6a521e82ecccc8cae7ff21579425c46f73fc040b05d46907ae"},"schema_version":"1.0","source":{"id":"2604.02268","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2604.02268","created_at":"2026-05-20T00:00:37Z"},{"alias_kind":"arxiv_version","alias_value":"2604.02268v2","created_at":"2026-05-20T00:00:37Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.02268","created_at":"2026-05-20T00:00:37Z"},{"alias_kind":"pith_short_12","alias_value":"PNWKDRK6TJAD","created_at":"2026-05-20T00:00:37Z"},{"alias_kind":"pith_short_16","alias_value":"PNWKDRK6TJADZZFD","created_at":"2026-05-20T00:00:37Z"},{"alias_kind":"pith_short_8","alias_value":"PNWKDRK6","created_at":"2026-05-20T00:00:37Z"}],"graph_snapshots":[{"event_id":"sha256:0c0c60707d7b81f35693cecf0884d52308172690f23a53831153a98587935429","target":"graph","created_at":"2026-05-20T00:00:37Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"SKILL0 achieves substantial improvements over the standard RL baseline (+9.7% for ALFWorld, +6.6% for Search-QA, and +10.1% for WebShop), while maintaining a highly efficient context of fewer than 0.5k tokens per step."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The Dynamic Curriculum can accurately identify on-policy helpfulness of individual skill files and that progressive context withdrawal produces genuine internalization rather than superficial adaptation to the training distribution."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"SKILL0 uses in-context RL with a dynamic curriculum to internalize skills into LLM parameters, yielding performance gains on agent benchmarks with under 0.5k tokens per step."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"A curriculum of progressively withdrawing skill context during reinforcement learning lets agents internalize procedural knowledge into their parameters for zero-shot task completion."}],"snapshot_sha256":"f355a7914280e81b188e6bc22206a8d808ae819da57e5219dead2af4a6a31f92"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"f79584e5efcbf4eb7f5446ef0310b9dd1a823c5fd307cfb239ad1ca5c7e2d35f"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2604.02268/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Agent skills, structured packages of procedural knowledge and executable resources that agents dynamically load at inference time, have become a reliable mechanism for augmenting LLM agents. Yet inference-time skill augmentation is fundamentally limited: retrieval noise introduces irrelevant guidance, injected skill content imposes substantial token overhead, and the model never truly acquires the knowledge it merely follows. We ask whether skills can instead be internalized into model parameters, enabling zero-shot autonomous behavior without any runtime skill retrieval. We introduce SKILL0, ","authors_text":"Chengcheng Han, Jinyang Wu, Jun Xiao, Qi Gu, Weiming Lu, Xunliang Cai, Yongliang Shen, Yueting Zhuang, Zhengxi Lu, Zhiyuan Yao","cross_cats":[],"headline":"A curriculum of progressively withdrawing skill context during reinforcement learning lets agents internalize procedural knowledge into their parameters for zero-shot task completion.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-04-02T17:03:05Z","title":"SKILL0: In-Context Agentic Reinforcement Learning for Skill Internalization"},"references":{"count":3,"internal_anchors":0,"resolved_work":3,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"If any required knowledge is missing or uncertain, youMUSTcall a search engine to get more external information using format:<search> your query </search>","work_id":"d7bcf26a-201e-4a4f-bda2-e6eab4a7cb76","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"Additionally, select an image compression factor larger than 1.0 for the next image","work_id":"df021423-8596-4e1e-9b62-2d9640ecddf5","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"2.<search>...</search>or<answer>...</answer> 3.<compression>...</compression> Figure 12: Prompt template used by SKILL0 for the Search-based QA task environment","work_id":"a6d42dc4-2bb3-4c6b-be6c-0c355c127d43","year":null}],"snapshot_sha256":"59b11e35ce47a400b789dc97c9e3aa69c7e50c45a7f41e54fbb7fb3cbdf99352"},"source":{"id":"2604.02268","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-19T17:37:51.500477Z","id":"a798ed5a-0bdc-40eb-a7fa-3d9a785bb0f3","model_set":{"reader":"grok-4.3"},"one_line_summary":"SKILL0 uses in-context RL with a dynamic curriculum to internalize skills into LLM parameters, yielding performance gains on agent benchmarks with under 0.5k tokens per step.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"A curriculum of progressively withdrawing skill context during reinforcement learning lets agents internalize procedural knowledge into their parameters for zero-shot task completion.","strongest_claim":"SKILL0 achieves substantial improvements over the standard RL baseline (+9.7% for ALFWorld, +6.6% for Search-QA, and +10.1% for WebShop), while maintaining a highly efficient context of fewer than 0.5k tokens per step.","weakest_assumption":"The Dynamic Curriculum can accurately identify on-policy helpfulness of individual skill files and that progressive context withdrawal produces genuine internalization rather than superficial adaptation to the training distribution."}},"verdict_id":"a798ed5a-0bdc-40eb-a7fa-3d9a785bb0f3"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:d253cafeaf5a9fe5e06bc3305b2bb7e8266bbc2b441a276da388a1ecca4d18f0","target":"record","created_at":"2026-05-20T00:00:37Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"04a57bf11d3aeebb0b3b70c216caa9dc750cd88ef1914062cde1ca5672ded972","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-04-02T17:03:05Z","title_canon_sha256":"7241591e3f1c7b6a521e82ecccc8cae7ff21579425c46f73fc040b05d46907ae"},"schema_version":"1.0","source":{"id":"2604.02268","kind":"arxiv","version":2}},"canonical_sha256":"7b6ca1c55e9a403ce4a3aae7c9cd519a9939cc4b3128b91dbb3a1d31dacb9494","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"7b6ca1c55e9a403ce4a3aae7c9cd519a9939cc4b3128b91dbb3a1d31dacb9494","first_computed_at":"2026-05-20T00:00:37.543511Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-20T00:00:37.543511Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"hLVRl72e5TzavpG1Q+RI/2TjC1JiDoVT7ucfx/F4kOeem5VnB4BK6Du2Olqdp7qq5VnmP1atVXEe06fMouE5DQ==","signature_status":"signed_v1","signed_at":"2026-05-20T00:00:37.544252Z","signed_message":"canonical_sha256_bytes"},"source_id":"2604.02268","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:d253cafeaf5a9fe5e06bc3305b2bb7e8266bbc2b441a276da388a1ecca4d18f0","sha256:0c0c60707d7b81f35693cecf0884d52308172690f23a53831153a98587935429"],"state_sha256":"68665ab7927b05a6b9718a7b1b49df7946ad9065e64934b2d7462c32f2209919"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"aZyhvHCWm1/qtfm6HDJDmevJZ6i/FO110JWoM8YAj/FteBQXRAczew3yJdfLjEWtATocYV9ElJI3nLqmHI+VCQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-21T11:17:56.348707Z","bundle_sha256":"c34175006472bda0d3e9ef67501dfb5d3f1da2035c579338db308584a14bd9c0"}}