{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:MWOIKIAWF6IVDXYPJVQRWWCZES","short_pith_number":"pith:MWOIKIAW","schema_version":"1.0","canonical_sha256":"659c8520162f9151df0f4d611b58592492dfd37349ceaf2ba90448b6649e13ce","source":{"kind":"arxiv","id":"2606.13681","version":1},"attestation_state":"computed","paper":{"title":"EvoArena: Tracking Memory Evolution for Robust LLM Agents in Dynamic Environments","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Anh Tuan Luu, Bowen Jiang, Bryan Hooi, Caiming Xiong, Hae Won Park, Huichi Zhou, Jiaying Wu, Jundong Xu, Jun Wang, Lei Wang, Qingchuan Li, Shuyue Stella Li, Yihuai Lan, Zhiyuan Hu","submitted_at":"2026-06-11T17:59:59Z","abstract_excerpt":"Large language model (LLM) agents have achieved strong performance on a wide range of benchmarks, yet most evaluations assume static environments. In contrast, real-world deployment is inherently dynamic, requiring agents to continually align their knowledge, skills, and behavior with changing environments and updated task conditions. To address this gap, we introduce EvoArena, a benchmark suite that models environment changes as sequences of progressive updates across terminal, software, and social domains. We further propose EvoMem, a patch-based memory paradigm that records memory evolution"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.13681","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-11T17:59:59Z","cross_cats_sorted":[],"title_canon_sha256":"6cfec68d58c4aa1b25958cd63fc7f5e386dc25b2ec82324959eaf278d9eb3601","abstract_canon_sha256":"93960af932020c96befbfc0983a1dd49d7942fab41847a43da18fcf4b3c35940"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-12T01:10:23.345860Z","signature_b64":"fjgZTAa9ZdCh+fIx3EPFm0zSIvwVEkeIHS0CwQ+3IuZVgZfEtXlRvCuWFuAvkLLND03X5MAUppR7lsidhOabDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"659c8520162f9151df0f4d611b58592492dfd37349ceaf2ba90448b6649e13ce","last_reissued_at":"2026-06-12T01:10:23.344918Z","signature_status":"signed_v1","first_computed_at":"2026-06-12T01:10:23.344918Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"EvoArena: Tracking Memory Evolution for Robust LLM Agents in Dynamic Environments","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Anh Tuan Luu, Bowen Jiang, Bryan Hooi, Caiming Xiong, Hae Won Park, Huichi Zhou, Jiaying Wu, Jundong Xu, Jun Wang, Lei Wang, Qingchuan Li, Shuyue Stella Li, Yihuai Lan, Zhiyuan Hu","submitted_at":"2026-06-11T17:59:59Z","abstract_excerpt":"Large language model (LLM) agents have achieved strong performance on a wide range of benchmarks, yet most evaluations assume static environments. In contrast, real-world deployment is inherently dynamic, requiring agents to continually align their knowledge, skills, and behavior with changing environments and updated task conditions. To address this gap, we introduce EvoArena, a benchmark suite that models environment changes as sequences of progressive updates across terminal, software, and social domains. We further propose EvoMem, a patch-based memory paradigm that records memory evolution"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.13681","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.13681/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.13681","created_at":"2026-06-12T01:10:23.345078+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.13681v1","created_at":"2026-06-12T01:10:23.345078+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.13681","created_at":"2026-06-12T01:10:23.345078+00:00"},{"alias_kind":"pith_short_12","alias_value":"MWOIKIAWF6IV","created_at":"2026-06-12T01:10:23.345078+00:00"},{"alias_kind":"pith_short_16","alias_value":"MWOIKIAWF6IVDXYP","created_at":"2026-06-12T01:10:23.345078+00:00"},{"alias_kind":"pith_short_8","alias_value":"MWOIKIAW","created_at":"2026-06-12T01:10:23.345078+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/MWOIKIAWF6IVDXYPJVQRWWCZES","json":"https://pith.science/pith/MWOIKIAWF6IVDXYPJVQRWWCZES.json","graph_json":"https://pith.science/api/pith-number/MWOIKIAWF6IVDXYPJVQRWWCZES/graph.json","events_json":"https://pith.science/api/pith-number/MWOIKIAWF6IVDXYPJVQRWWCZES/events.json","paper":"https://pith.science/paper/MWOIKIAW"},"agent_actions":{"view_html":"https://pith.science/pith/MWOIKIAWF6IVDXYPJVQRWWCZES","download_json":"https://pith.science/pith/MWOIKIAWF6IVDXYPJVQRWWCZES.json","view_paper":"https://pith.science/paper/MWOIKIAW","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.13681&json=true","fetch_graph":"https://pith.science/api/pith-number/MWOIKIAWF6IVDXYPJVQRWWCZES/graph.json","fetch_events":"https://pith.science/api/pith-number/MWOIKIAWF6IVDXYPJVQRWWCZES/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/MWOIKIAWF6IVDXYPJVQRWWCZES/action/timestamp_anchor","attest_storage":"https://pith.science/pith/MWOIKIAWF6IVDXYPJVQRWWCZES/action/storage_attestation","attest_author":"https://pith.science/pith/MWOIKIAWF6IVDXYPJVQRWWCZES/action/author_attestation","sign_citation":"https://pith.science/pith/MWOIKIAWF6IVDXYPJVQRWWCZES/action/citation_signature","submit_replication":"https://pith.science/pith/MWOIKIAWF6IVDXYPJVQRWWCZES/action/replication_record"}},"created_at":"2026-06-12T01:10:23.345078+00:00","updated_at":"2026-06-12T01:10:23.345078+00:00"}