{"work":{"id":"0ab11b49-e934-49e1-9eaf-a25fb7343010","openalex_id":null,"doi":null,"arxiv_id":"2601.03192","raw_key":null,"title":"MemRL: Self-Evolving Agents via Runtime Reinforcement Learning on Episodic Memory","authors":null,"authors_text":"Shengtao Zhang, Jiaqian Wang, Ruiwen Zhou, Junwei Liao, Yuchen Feng, Zhuo Li","year":2026,"venue":"cs.CL","abstract":"The hallmark of human intelligence is the self-evolving ability to master new skills by learning from past experiences. However, current AI agents struggle to emulate this self-evolution: fine-tuning is computationally expensive and prone to catastrophic forgetting, while existing memory-based methods rely on passive semantic matching that often retrieves noise. To address these challenges, we propose MemRL, a non-parametric approach that evolves via reinforcement learning on episodic memory. By decoupling stable reasoning from plastic memory, MemRL employs a Two-Phase Retrieval mechanism to filter noise and identify high-utility strategies through environmental feedback. Extensive experiments on HLE, BigCodeBench, ALFWorld, and Lifelong Agent Bench demonstrate that MemRL significantly outperforms state-of-the-art baselines, confirming that MemRL effectively reconciles the stability-plasticity dilemma, enabling continuous runtime improvement without weight updates. Code is available at https://github.com/MemTensor/MemRL.","external_url":"https://arxiv.org/abs/2601.03192","cited_by_count":null,"metadata_source":"pith","metadata_fetched_at":"2026-05-21T14:34:12.910480+00:00","pith_arxiv_id":"2601.03192","created_at":"2026-05-09T22:29:06.446023+00:00","updated_at":"2026-05-21T14:34:12.910480+00:00","title_quality_ok":true,"display_title":"MemRL: Self-Evolving Agents via Runtime Reinforcement Learning on Episodic Memory","render_title":"MemRL: Self-Evolving Agents via Runtime Reinforcement Learning on Episodic Memory"},"hub":{"state":{"work_id":"0ab11b49-e934-49e1-9eaf-a25fb7343010","tier":"hub","tier_reason":"10+ Pith inbound or 1,000+ external citations","pith_inbound_count":20,"external_cited_by_count":null,"distinct_field_count":5,"first_pith_cited_at":"2026-01-18T13:09:25+00:00","last_pith_cited_at":"2026-05-20T02:03:34+00:00","author_build_status":"not_needed","summary_status":"needed","contexts_status":"needed","graph_status":"needed","ask_index_status":"not_needed","reader_status":"not_needed","recognition_status":"not_needed","updated_at":"2026-06-05T02:28:18.599412+00:00","tier_text":"hub"},"tier":"hub","role_counts":[{"context_role":"background","n":3},{"context_role":"baseline","n":2}],"polarity_counts":[{"context_polarity":"background","n":3},{"context_polarity":"baseline","n":2}],"runs":{},"summary":{},"graph":{},"authors":[]}}