{"work":{"id":"2a070440-2167-4398-be97-c2d4c3ee3541","openalex_id":null,"doi":null,"arxiv_id":"2508.19828","raw_key":null,"title":"Memory-R1: Enhancing Large Language Model Agents to Manage and Utilize Memories via Reinforcement Learning","authors":null,"authors_text":"Sikuan Yan, Xiufeng Yang, Zuchao Huang, Ercong Nie, Zifeng Ding, Zonggen Li","year":2025,"venue":"cs.CL","abstract":"Large Language Models (LLMs) have demonstrated impressive capabilities across a wide range of NLP tasks, but they remain fundamentally stateless, constrained by limited context windows that hinder long-horizon reasoning. Recent efforts to address this limitation often augment LLMs with an external memory bank, yet most existing pipelines are static and heuristic-driven, lacking a learned mechanism for deciding what to store, update, or retrieve. We present Memory-R1, a reinforcement learning (RL) framework that equips LLMs with the ability to actively manage and utilize external memory through two specialized agents: a Memory Manager that learns structured operations, including ADD, UPDATE, DELETE, and NOOP; and an Answer Agent that pre-selects and reasons over relevant entries. Both agents are fine-tuned with outcome-driven RL (PPO and GRPO), enabling adaptive memory management with minimal supervision. With only 152 training QA pairs, Memory-R1 outperforms strong baselines and generalizes across diverse question types, three benchmarks (LoCoMo, MSC, LongMemEval), and multiple model scales (3B-14B).","external_url":"https://arxiv.org/abs/2508.19828","cited_by_count":null,"metadata_source":"pith","metadata_fetched_at":"2026-05-25T05:30:23.046332+00:00","pith_arxiv_id":"2508.19828","created_at":"2026-05-09T06:20:42.288540+00:00","updated_at":"2026-06-05T21:23:00.469572+00:00","title_quality_ok":true,"display_title":"Memory-R1: Enhancing Large Language Model Agents to Manage and Utilize Memories via Reinforcement Learning","render_title":"Memory-R1: Enhancing Large Language Model Agents to Manage and Utilize Memories via Reinforcement Learning"},"hub":{"state":{"work_id":"2a070440-2167-4398-be97-c2d4c3ee3541","tier":"hub","tier_reason":"10+ Pith inbound or 1,000+ external citations","pith_inbound_count":46,"external_cited_by_count":null,"distinct_field_count":8,"first_pith_cited_at":"2025-08-10T16:07:32+00:00","last_pith_cited_at":"2026-05-21T21:58:10+00:00","author_build_status":"not_needed","summary_status":"needed","contexts_status":"needed","graph_status":"needed","ask_index_status":"not_needed","reader_status":"not_needed","recognition_status":"not_needed","updated_at":"2026-06-09T02:54:38.990196+00:00","tier_text":"hub"},"tier":"hub","role_counts":[{"context_role":"background","n":10}],"polarity_counts":[{"context_polarity":"background","n":10}],"runs":{},"summary":{},"graph":{},"authors":[]}}