{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:FW7ADB3RSNC6OPWWYJAQIEKTJF","short_pith_number":"pith:FW7ADB3R","canonical_record":{"source":{"id":"2604.20572","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-04-22T13:50:55Z","cross_cats_sorted":[],"title_canon_sha256":"e23b7b0bb9a0366ba35570c56192f32eb233bd58a956db37f7065034a8374401","abstract_canon_sha256":"24b3dba2d40e23bad40fbcadfa9276a427f16610a62637820fd0338a20c6b03c"},"schema_version":"1.0"},"canonical_sha256":"2dbe0187719345e73ed6c241041153494a4002ef49e91e88b192ec24a8d0e021","source":{"kind":"arxiv","id":"2604.20572","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2604.20572","created_at":"2026-06-05T01:14:39Z"},{"alias_kind":"arxiv_version","alias_value":"2604.20572v2","created_at":"2026-06-05T01:14:39Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.20572","created_at":"2026-06-05T01:14:39Z"},{"alias_kind":"pith_short_12","alias_value":"FW7ADB3RSNC6","created_at":"2026-06-05T01:14:39Z"},{"alias_kind":"pith_short_16","alias_value":"FW7ADB3RSNC6OPWW","created_at":"2026-06-05T01:14:39Z"},{"alias_kind":"pith_short_8","alias_value":"FW7ADB3R","created_at":"2026-06-05T01:14:39Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:FW7ADB3RSNC6OPWWYJAQIEKTJF","target":"record","payload":{"canonical_record":{"source":{"id":"2604.20572","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-04-22T13:50:55Z","cross_cats_sorted":[],"title_canon_sha256":"e23b7b0bb9a0366ba35570c56192f32eb233bd58a956db37f7065034a8374401","abstract_canon_sha256":"24b3dba2d40e23bad40fbcadfa9276a427f16610a62637820fd0338a20c6b03c"},"schema_version":"1.0"},"canonical_sha256":"2dbe0187719345e73ed6c241041153494a4002ef49e91e88b192ec24a8d0e021","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-05T01:14:39.278146Z","signature_b64":"JiWhFGl6HLU19rVt6uYVlx/yW+xBuvwJurxlfwlMjIwxx5cT5VWyC/fWaDiPfoqBZVUBHZebB6V+62gG9hVeDw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"2dbe0187719345e73ed6c241041153494a4002ef49e91e88b192ec24a8d0e021","last_reissued_at":"2026-06-05T01:14:39.277588Z","signature_status":"signed_v1","first_computed_at":"2026-06-05T01:14:39.277588Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2604.20572","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-05T01:14:39Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"O+EAOUmBl9YKYbCTXwbjQyzIVrwVZLXlYTBzXhFirSjBVI9z5h2SsxkPJpSUlInakOyDFlz73eSb9zMFBx1LBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-29T21:23:59.557176Z"},"content_sha256":"825c1a1c856c7060b9c5bfa24b03176a5934630b56782e88bcbeb8f4aff7a1f6","schema_version":"1.0","event_id":"sha256:825c1a1c856c7060b9c5bfa24b03176a5934630b56782e88bcbeb8f4aff7a1f6"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:FW7ADB3RSNC6OPWWYJAQIEKTJF","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Ask Only When Needed: Proactive Retrieval from Memory and Skills for Experience-Driven Lifelong Agents","license":"http://creativecommons.org/licenses/by/4.0/","headline":"Lifelong agents learn an explicit policy for retrieving past experience only when it improves the next decision.","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Bo Zhang, Jie Zhou, Liang He, Qin Chen, Wei Li, Xin Li, Yuxuan Cai","submitted_at":"2026-04-22T13:50:55Z","abstract_excerpt":"Online lifelong learning agents must decide not only how to act but also when to consult prior experience to continually improve on long-horizon tasks. Existing methods typically retrieve memories passively, such as at task initialization or after each step, and therefore miss knowledge gaps that arise during interaction. We propose ProactAgent, an experience-driven lifelong learning framework for proactive retrieval over a structured Experience Base. ProactAgent continually improves through ExpOnEvo, which jointly updates policies and refines memory, organizing past interactions into factual,"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Experiments on SciWorld, AlfWorld, and StuLife show that ProactAgent consistently improves lifelong agent performance, achieving success rates of 73.50% on SciWorld and 71.28% on AlfWorld while substantially reducing retrieval overhead, and attains performance competitive with proprietary models on StuLife.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That comparing continuations from identical interaction prefixes with and without retrieval supplies unbiased, step-level supervision for the retrieval policy without introducing selection bias or reward hacking.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"ProactAgent learns a proactive retrieval policy via reinforcement learning on paired task continuations, improving lifelong agent performance and cutting retrieval overhead on SciWorld, AlfWorld, and StuLife.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Lifelong agents learn an explicit policy for retrieving past experience only when it improves the next decision.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"8a7cb05611f2d8c9456dfe5c33acb1b2e8ff935c897006d0f459eedffcd23c82"},"source":{"id":"2604.20572","kind":"arxiv","version":2},"verdict":{"id":"11506b0a-28db-492d-8630-dae2690417dc","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-10T00:05:19.666653Z","strongest_claim":"Experiments on SciWorld, AlfWorld, and StuLife show that ProactAgent consistently improves lifelong agent performance, achieving success rates of 73.50% on SciWorld and 71.28% on AlfWorld while substantially reducing retrieval overhead, and attains performance competitive with proprietary models on StuLife.","one_line_summary":"ProactAgent learns a proactive retrieval policy via reinforcement learning on paired task continuations, improving lifelong agent performance and cutting retrieval overhead on SciWorld, AlfWorld, and StuLife.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That comparing continuations from identical interaction prefixes with and without retrieval supplies unbiased, step-level supervision for the retrieval policy without introducing selection bias or reward hacking.","pith_extraction_headline":"Lifelong agents learn an explicit policy for retrieving past experience only when it improves the next decision."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2604.20572/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"ai_meta_artifact","ran_at":"2026-05-21T14:37:15.404994Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"doi_compliance","ran_at":"2026-05-20T01:48:29.508608Z","status":"completed","version":"1.0.0","findings_count":0}],"snapshot_sha256":"d238e156527a364c78cc3623615f5638b714d52b0fb1c40e00b7cb22742ced32"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"11506b0a-28db-492d-8630-dae2690417dc"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-05T01:14:39Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"mu+Z7PHQQgfKCY2HG0tBRc7duy9Kgmymh+8QxKUtFIS5kzHOSVPiHmPK7ig9H1fN6wqtE1N9rmVuku+1O0zPAQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-29T21:23:59.557665Z"},"content_sha256":"72712ec9aa01a76dd591f1325994d0f41ddf245933851d8bcb05b5b029f00fc7","schema_version":"1.0","event_id":"sha256:72712ec9aa01a76dd591f1325994d0f41ddf245933851d8bcb05b5b029f00fc7"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/FW7ADB3RSNC6OPWWYJAQIEKTJF/bundle.json","state_url":"https://pith.science/pith/FW7ADB3RSNC6OPWWYJAQIEKTJF/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/FW7ADB3RSNC6OPWWYJAQIEKTJF/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-29T21:23:59Z","links":{"resolver":"https://pith.science/pith/FW7ADB3RSNC6OPWWYJAQIEKTJF","bundle":"https://pith.science/pith/FW7ADB3RSNC6OPWWYJAQIEKTJF/bundle.json","state":"https://pith.science/pith/FW7ADB3RSNC6OPWWYJAQIEKTJF/state.json","well_known_bundle":"https://pith.science/.well-known/pith/FW7ADB3RSNC6OPWWYJAQIEKTJF/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:FW7ADB3RSNC6OPWWYJAQIEKTJF","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"24b3dba2d40e23bad40fbcadfa9276a427f16610a62637820fd0338a20c6b03c","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-04-22T13:50:55Z","title_canon_sha256":"e23b7b0bb9a0366ba35570c56192f32eb233bd58a956db37f7065034a8374401"},"schema_version":"1.0","source":{"id":"2604.20572","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2604.20572","created_at":"2026-06-05T01:14:39Z"},{"alias_kind":"arxiv_version","alias_value":"2604.20572v2","created_at":"2026-06-05T01:14:39Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.20572","created_at":"2026-06-05T01:14:39Z"},{"alias_kind":"pith_short_12","alias_value":"FW7ADB3RSNC6","created_at":"2026-06-05T01:14:39Z"},{"alias_kind":"pith_short_16","alias_value":"FW7ADB3RSNC6OPWW","created_at":"2026-06-05T01:14:39Z"},{"alias_kind":"pith_short_8","alias_value":"FW7ADB3R","created_at":"2026-06-05T01:14:39Z"}],"graph_snapshots":[{"event_id":"sha256:72712ec9aa01a76dd591f1325994d0f41ddf245933851d8bcb05b5b029f00fc7","target":"graph","created_at":"2026-06-05T01:14:39Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Experiments on SciWorld, AlfWorld, and StuLife show that ProactAgent consistently improves lifelong agent performance, achieving success rates of 73.50% on SciWorld and 71.28% on AlfWorld while substantially reducing retrieval overhead, and attains performance competitive with proprietary models on StuLife."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That comparing continuations from identical interaction prefixes with and without retrieval supplies unbiased, step-level supervision for the retrieval policy without introducing selection bias or reward hacking."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"ProactAgent learns a proactive retrieval policy via reinforcement learning on paired task continuations, improving lifelong agent performance and cutting retrieval overhead on SciWorld, AlfWorld, and StuLife."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Lifelong agents learn an explicit policy for retrieving past experience only when it improves the next decision."}],"snapshot_sha256":"8a7cb05611f2d8c9456dfe5c33acb1b2e8ff935c897006d0f459eedffcd23c82"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[{"findings_count":0,"name":"ai_meta_artifact","ran_at":"2026-05-21T14:37:15.404994Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"doi_compliance","ran_at":"2026-05-20T01:48:29.508608Z","status":"completed","version":"1.0.0"}],"endpoint":"/pith/2604.20572/integrity.json","findings":[],"snapshot_sha256":"d238e156527a364c78cc3623615f5638b714d52b0fb1c40e00b7cb22742ced32","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Online lifelong learning agents must decide not only how to act but also when to consult prior experience to continually improve on long-horizon tasks. Existing methods typically retrieve memories passively, such as at task initialization or after each step, and therefore miss knowledge gaps that arise during interaction. We propose ProactAgent, an experience-driven lifelong learning framework for proactive retrieval over a structured Experience Base. ProactAgent continually improves through ExpOnEvo, which jointly updates policies and refines memory, organizing past interactions into factual,","authors_text":"Bo Zhang, Jie Zhou, Liang He, Qin Chen, Wei Li, Xin Li, Yuxuan Cai","cross_cats":[],"headline":"Lifelong agents learn an explicit policy for retrieving past experience only when it improves the next decision.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-04-22T13:50:55Z","title":"Ask Only When Needed: Proactive Retrieval from Memory and Skills for Experience-Driven Lifelong Agents"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2604.20572","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-10T00:05:19.666653Z","id":"11506b0a-28db-492d-8630-dae2690417dc","model_set":{"reader":"grok-4.3"},"one_line_summary":"ProactAgent learns a proactive retrieval policy via reinforcement learning on paired task continuations, improving lifelong agent performance and cutting retrieval overhead on SciWorld, AlfWorld, and StuLife.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Lifelong agents learn an explicit policy for retrieving past experience only when it improves the next decision.","strongest_claim":"Experiments on SciWorld, AlfWorld, and StuLife show that ProactAgent consistently improves lifelong agent performance, achieving success rates of 73.50% on SciWorld and 71.28% on AlfWorld while substantially reducing retrieval overhead, and attains performance competitive with proprietary models on StuLife.","weakest_assumption":"That comparing continuations from identical interaction prefixes with and without retrieval supplies unbiased, step-level supervision for the retrieval policy without introducing selection bias or reward hacking."}},"verdict_id":"11506b0a-28db-492d-8630-dae2690417dc"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:825c1a1c856c7060b9c5bfa24b03176a5934630b56782e88bcbeb8f4aff7a1f6","target":"record","created_at":"2026-06-05T01:14:39Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"24b3dba2d40e23bad40fbcadfa9276a427f16610a62637820fd0338a20c6b03c","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-04-22T13:50:55Z","title_canon_sha256":"e23b7b0bb9a0366ba35570c56192f32eb233bd58a956db37f7065034a8374401"},"schema_version":"1.0","source":{"id":"2604.20572","kind":"arxiv","version":2}},"canonical_sha256":"2dbe0187719345e73ed6c241041153494a4002ef49e91e88b192ec24a8d0e021","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"2dbe0187719345e73ed6c241041153494a4002ef49e91e88b192ec24a8d0e021","first_computed_at":"2026-06-05T01:14:39.277588Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-05T01:14:39.277588Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"JiWhFGl6HLU19rVt6uYVlx/yW+xBuvwJurxlfwlMjIwxx5cT5VWyC/fWaDiPfoqBZVUBHZebB6V+62gG9hVeDw==","signature_status":"signed_v1","signed_at":"2026-06-05T01:14:39.278146Z","signed_message":"canonical_sha256_bytes"},"source_id":"2604.20572","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:825c1a1c856c7060b9c5bfa24b03176a5934630b56782e88bcbeb8f4aff7a1f6","sha256:72712ec9aa01a76dd591f1325994d0f41ddf245933851d8bcb05b5b029f00fc7"],"state_sha256":"114369aa0a6fc401a5e03a47f1b40ffb07e8e983638c0666cddada35cd9281cb"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"LPVr1MLHfT70OSFEkD3czaLYYaD8OosudNj9uj6ugA1ESgnCP1Ln4k65j7j9W/nAPie2cbCK5b+Sn5DsKXJeBw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-29T21:23:59.559851Z","bundle_sha256":"5ae0a051d6436ba01ae0458cf41de72b4ed8d7525e7f9745c8ca475ef7f1e7c4"}}