{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2025:RZ7367QXH25YUOKYP43PJPHK2O","short_pith_number":"pith:RZ7367QX","canonical_record":{"source":{"id":"2511.20857","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2025-11-25T21:08:07Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"0d818bd916402e6653c573575779d522ab3b6ccd03e7edcf7f20c510c04d1e7e","abstract_canon_sha256":"91510619ee77a1e8bb1dbe58ace5f7ed30ee2190b6f2fa018506d5ca6f3c0544"},"schema_version":"1.0"},"canonical_sha256":"8e7fbf7e173ebb8a39587f36f4bcead394069ddcc3d7107926de73ed48948a0c","source":{"kind":"arxiv","id":"2511.20857","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2511.20857","created_at":"2026-05-17T23:39:19Z"},{"alias_kind":"arxiv_version","alias_value":"2511.20857v1","created_at":"2026-05-17T23:39:19Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2511.20857","created_at":"2026-05-17T23:39:19Z"},{"alias_kind":"pith_short_12","alias_value":"RZ7367QXH25Y","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"RZ7367QXH25YUOKY","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"RZ7367QX","created_at":"2026-05-18T12:33:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2025:RZ7367QXH25YUOKYP43PJPHK2O","target":"record","payload":{"canonical_record":{"source":{"id":"2511.20857","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2025-11-25T21:08:07Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"0d818bd916402e6653c573575779d522ab3b6ccd03e7edcf7f20c510c04d1e7e","abstract_canon_sha256":"91510619ee77a1e8bb1dbe58ace5f7ed30ee2190b6f2fa018506d5ca6f3c0544"},"schema_version":"1.0"},"canonical_sha256":"8e7fbf7e173ebb8a39587f36f4bcead394069ddcc3d7107926de73ed48948a0c","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:39:19.897856Z","signature_b64":"N2Dh92hhcQCcpTGW7kj9eE9Vs7ANziGrEmsbXs95gEmQ/Jye9lIwXnMY7KHA4bzOMSUcZdRzSwWwjGBX/R4sDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"8e7fbf7e173ebb8a39587f36f4bcead394069ddcc3d7107926de73ed48948a0c","last_reissued_at":"2026-05-17T23:39:19.896925Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:39:19.896925Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2511.20857","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:39:19Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"nuGclTZSAL8Myt5d86tJD6oDvQnyuSsNyVJIMToh2dks5foyeVy6QDA/afK/jSQBeQeJ5ATTOK0leV6tTQnCBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T20:17:24.683579Z"},"content_sha256":"88bc16b20e31ba586f88adef513ad079ce9b8e857b5530e4ddac7c83adc8bc63","schema_version":"1.0","event_id":"sha256:88bc16b20e31ba586f88adef513ad079ce9b8e857b5530e4ddac7c83adc8bc63"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2025:RZ7367QXH25YUOKYP43PJPHK2O","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Evo-Memory: Benchmarking LLM Agent Test-time Learning with Self-Evolving Memory","license":"http://creativecommons.org/licenses/by/4.0/","headline":"LLM agents achieve continual improvement on streaming tasks by using the ReMem pipeline to integrate reasoning, actions, and memory updates.","cross_cats":["cs.AI"],"primary_cat":"cs.CL","authors_text":"Benjamin Coleman, Chi Wang, Derek Zhiyuan Cheng, Ed H. Chi, Fernando Pereira, Jingrui He, Mengting Ai, Noveen Sachdeva, Shuo Chen, Tianxin Wei, Wang-Cheng Kang, Xuying Ning, Yuanchen Bei, Yunzhe Li, Zhankui He","submitted_at":"2025-11-25T21:08:07Z","abstract_excerpt":"Statefulness is essential for large language model (LLM) agents to perform long-term planning and problem-solving. This makes memory a critical component, yet its management and evolution remain largely underexplored. Existing evaluations mostly focus on static conversational settings, where memory is passively retrieved from dialogue to answer queries, overlooking the dynamic ability to accumulate and reuse experience across evolving task streams. In real-world environments such as interactive problem assistants or embodied agents, LLMs are required to handle continuous task streams, yet ofte"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"ReMem, an action-think-memory refine pipeline, tightly integrates reasoning, task actions, and memory updates to achieve continual improvement in LLM agents on streaming tasks.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That the chosen sequential task streams and the implemented memory modules faithfully capture the dynamics of real-world continuous interactions where memory evolution is required, without hidden implementation biases affecting the comparisons.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"Evo-Memory is a new benchmark for self-evolving memory in LLM agents across task streams, with baseline ExpRAG and proposed ReMem method that integrates reasoning, actions, and memory updates for continual improvement.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"LLM agents achieve continual improvement on streaming tasks by using the ReMem pipeline to integrate reasoning, actions, and memory updates.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"0ea7554e0285eee97172779bf5efa5883de6325821b2fa6e9aa4987c291f81aa"},"source":{"id":"2511.20857","kind":"arxiv","version":1},"verdict":{"id":"af7e0477-7a68-4bdb-a637-7436043acc6f","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-14T23:08:31.892284Z","strongest_claim":"ReMem, an action-think-memory refine pipeline, tightly integrates reasoning, task actions, and memory updates to achieve continual improvement in LLM agents on streaming tasks.","one_line_summary":"Evo-Memory is a new benchmark for self-evolving memory in LLM agents across task streams, with baseline ExpRAG and proposed ReMem method that integrates reasoning, actions, and memory updates for continual improvement.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That the chosen sequential task streams and the implemented memory modules faithfully capture the dynamics of real-world continuous interactions where memory evolution is required, without hidden implementation biases affecting the comparisons.","pith_extraction_headline":"LLM agents achieve continual improvement on streaming tasks by using the ReMem pipeline to integrate reasoning, actions, and memory updates."},"references":{"count":299,"sample":[{"doi":"","year":2009,"title":"Measuring Massive Multitask Language Understanding","work_id":"e87ec49a-544b-4ec8-8991-75298c64ff5e","ref_index":1,"cited_arxiv_id":"2009.03300","is_internal_anchor":true},{"doi":"","year":null,"title":"International Conference on Learning Representations (ICLR) , year=","work_id":"1852f1a8-2303-4108-a8a5-0562f7716a9f","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"Advances in Neural Information Processing Systems (NeurIPS) , year=","work_id":"0cb97455-c4bf-4962-a363-31b7fd9dc41b","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"Advances in Neural Information Processing Systems (NeurIPS) , year=","work_id":"fda20f90-227f-46ae-9d68-9c841c704211","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"International Conference on Machine Learning (ICML) , year=","work_id":"98f812e7-24ab-4f7b-a3df-b17d84a7b2e4","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":299,"snapshot_sha256":"053a4e2e41893da11f3db45f136b055dc708cc66c3dd725bf6e47a3ff4a38303","internal_anchors":36},"formal_canon":{"evidence_count":1,"snapshot_sha256":"ab70ab64680b2b0ec733a08592583ffb6ace64537130a4bf27dc69f776abcc09"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"af7e0477-7a68-4bdb-a637-7436043acc6f"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:39:19Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"7Iq3fSo941ExHH61UcYMNiCHTpstoy3FFd7dNG8gxyBhxlq5tJbRzC6WqQuKdU37AJIva2Excp9AakOOoy3PDw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T20:17:24.684257Z"},"content_sha256":"a559ab9696cfa89bd8a5701c2992e2aff01d7fb956d9e9072567874b65de4b08","schema_version":"1.0","event_id":"sha256:a559ab9696cfa89bd8a5701c2992e2aff01d7fb956d9e9072567874b65de4b08"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/RZ7367QXH25YUOKYP43PJPHK2O/bundle.json","state_url":"https://pith.science/pith/RZ7367QXH25YUOKYP43PJPHK2O/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/RZ7367QXH25YUOKYP43PJPHK2O/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-25T20:17:24Z","links":{"resolver":"https://pith.science/pith/RZ7367QXH25YUOKYP43PJPHK2O","bundle":"https://pith.science/pith/RZ7367QXH25YUOKYP43PJPHK2O/bundle.json","state":"https://pith.science/pith/RZ7367QXH25YUOKYP43PJPHK2O/state.json","well_known_bundle":"https://pith.science/.well-known/pith/RZ7367QXH25YUOKYP43PJPHK2O/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:RZ7367QXH25YUOKYP43PJPHK2O","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"91510619ee77a1e8bb1dbe58ace5f7ed30ee2190b6f2fa018506d5ca6f3c0544","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2025-11-25T21:08:07Z","title_canon_sha256":"0d818bd916402e6653c573575779d522ab3b6ccd03e7edcf7f20c510c04d1e7e"},"schema_version":"1.0","source":{"id":"2511.20857","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2511.20857","created_at":"2026-05-17T23:39:19Z"},{"alias_kind":"arxiv_version","alias_value":"2511.20857v1","created_at":"2026-05-17T23:39:19Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2511.20857","created_at":"2026-05-17T23:39:19Z"},{"alias_kind":"pith_short_12","alias_value":"RZ7367QXH25Y","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"RZ7367QXH25YUOKY","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"RZ7367QX","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:a559ab9696cfa89bd8a5701c2992e2aff01d7fb956d9e9072567874b65de4b08","target":"graph","created_at":"2026-05-17T23:39:19Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"ReMem, an action-think-memory refine pipeline, tightly integrates reasoning, task actions, and memory updates to achieve continual improvement in LLM agents on streaming tasks."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That the chosen sequential task streams and the implemented memory modules faithfully capture the dynamics of real-world continuous interactions where memory evolution is required, without hidden implementation biases affecting the comparisons."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"Evo-Memory is a new benchmark for self-evolving memory in LLM agents across task streams, with baseline ExpRAG and proposed ReMem method that integrates reasoning, actions, and memory updates for continual improvement."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"LLM agents achieve continual improvement on streaming tasks by using the ReMem pipeline to integrate reasoning, actions, and memory updates."}],"snapshot_sha256":"0ea7554e0285eee97172779bf5efa5883de6325821b2fa6e9aa4987c291f81aa"},"formal_canon":{"evidence_count":1,"snapshot_sha256":"ab70ab64680b2b0ec733a08592583ffb6ace64537130a4bf27dc69f776abcc09"},"paper":{"abstract_excerpt":"Statefulness is essential for large language model (LLM) agents to perform long-term planning and problem-solving. This makes memory a critical component, yet its management and evolution remain largely underexplored. Existing evaluations mostly focus on static conversational settings, where memory is passively retrieved from dialogue to answer queries, overlooking the dynamic ability to accumulate and reuse experience across evolving task streams. In real-world environments such as interactive problem assistants or embodied agents, LLMs are required to handle continuous task streams, yet ofte","authors_text":"Benjamin Coleman, Chi Wang, Derek Zhiyuan Cheng, Ed H. Chi, Fernando Pereira, Jingrui He, Mengting Ai, Noveen Sachdeva, Shuo Chen, Tianxin Wei, Wang-Cheng Kang, Xuying Ning, Yuanchen Bei, Yunzhe Li, Zhankui He","cross_cats":["cs.AI"],"headline":"LLM agents achieve continual improvement on streaming tasks by using the ReMem pipeline to integrate reasoning, actions, and memory updates.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2025-11-25T21:08:07Z","title":"Evo-Memory: Benchmarking LLM Agent Test-time Learning with Self-Evolving Memory"},"references":{"count":299,"internal_anchors":36,"resolved_work":299,"sample":[{"cited_arxiv_id":"2009.03300","doi":"","is_internal_anchor":true,"ref_index":1,"title":"Measuring Massive Multitask Language Understanding","work_id":"e87ec49a-544b-4ec8-8991-75298c64ff5e","year":2009},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"International Conference on Learning Representations (ICLR) , year=","work_id":"1852f1a8-2303-4108-a8a5-0562f7716a9f","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"Advances in Neural Information Processing Systems (NeurIPS) , year=","work_id":"0cb97455-c4bf-4962-a363-31b7fd9dc41b","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"Advances in Neural Information Processing Systems (NeurIPS) , year=","work_id":"fda20f90-227f-46ae-9d68-9c841c704211","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"International Conference on Machine Learning (ICML) , year=","work_id":"98f812e7-24ab-4f7b-a3df-b17d84a7b2e4","year":null}],"snapshot_sha256":"053a4e2e41893da11f3db45f136b055dc708cc66c3dd725bf6e47a3ff4a38303"},"source":{"id":"2511.20857","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-14T23:08:31.892284Z","id":"af7e0477-7a68-4bdb-a637-7436043acc6f","model_set":{"reader":"grok-4.3"},"one_line_summary":"Evo-Memory is a new benchmark for self-evolving memory in LLM agents across task streams, with baseline ExpRAG and proposed ReMem method that integrates reasoning, actions, and memory updates for continual improvement.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"LLM agents achieve continual improvement on streaming tasks by using the ReMem pipeline to integrate reasoning, actions, and memory updates.","strongest_claim":"ReMem, an action-think-memory refine pipeline, tightly integrates reasoning, task actions, and memory updates to achieve continual improvement in LLM agents on streaming tasks.","weakest_assumption":"That the chosen sequential task streams and the implemented memory modules faithfully capture the dynamics of real-world continuous interactions where memory evolution is required, without hidden implementation biases affecting the comparisons."}},"verdict_id":"af7e0477-7a68-4bdb-a637-7436043acc6f"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:88bc16b20e31ba586f88adef513ad079ce9b8e857b5530e4ddac7c83adc8bc63","target":"record","created_at":"2026-05-17T23:39:19Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"91510619ee77a1e8bb1dbe58ace5f7ed30ee2190b6f2fa018506d5ca6f3c0544","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2025-11-25T21:08:07Z","title_canon_sha256":"0d818bd916402e6653c573575779d522ab3b6ccd03e7edcf7f20c510c04d1e7e"},"schema_version":"1.0","source":{"id":"2511.20857","kind":"arxiv","version":1}},"canonical_sha256":"8e7fbf7e173ebb8a39587f36f4bcead394069ddcc3d7107926de73ed48948a0c","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"8e7fbf7e173ebb8a39587f36f4bcead394069ddcc3d7107926de73ed48948a0c","first_computed_at":"2026-05-17T23:39:19.896925Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:39:19.896925Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"N2Dh92hhcQCcpTGW7kj9eE9Vs7ANziGrEmsbXs95gEmQ/Jye9lIwXnMY7KHA4bzOMSUcZdRzSwWwjGBX/R4sDQ==","signature_status":"signed_v1","signed_at":"2026-05-17T23:39:19.897856Z","signed_message":"canonical_sha256_bytes"},"source_id":"2511.20857","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:88bc16b20e31ba586f88adef513ad079ce9b8e857b5530e4ddac7c83adc8bc63","sha256:a559ab9696cfa89bd8a5701c2992e2aff01d7fb956d9e9072567874b65de4b08"],"state_sha256":"366af46f3262defdacc2b1676ce49e2057a47b50e5aec7589ca130711b75c2dd"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"uDL2RmlntjeWVnLFVlLsNJFbiL/WUfndtw8x0kHRzwTnU8V0kpsQPwxrXICrSP2kuhqg2Cy1Gi8j3sMNGoRzDg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-25T20:17:24.687944Z","bundle_sha256":"1647404896bf7cd403ed5e291c1cd2357c52bda54d2bb0fb1a67c33768f25b2b"}}