{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:ICMJAEEWZWDS7ITOTOT2QZ3NPV","short_pith_number":"pith:ICMJAEEW","schema_version":"1.0","canonical_sha256":"4098901096cd872fa26e9ba7a8676d7d7455e1b145c805b1c52a6528869eca4c","source":{"kind":"arxiv","id":"2605.20833","version":1},"attestation_state":"computed","paper":{"title":"MemGym: a Long-Horizon Memory Environment for LLM Agents","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Dimitris N. Metaxas, Han Zhang, Kai Mei, Kaiqu Liang, Mingyu Jin, Sambit Sahu, Shi-Xiong Zhang, Wenyue Hua, Wujiang Xu, Yu Wang, Zhenting Wang","submitted_at":"2026-05-20T07:25:33Z","abstract_excerpt":"Memory is a central capability for LLM agents operating across long-horizon tasks. Existing memory benchmarks predominantly evaluate retention of personalized information in multi-turn chat scenarios, overlooking the dynamic memory formation that occurs during extended agent execution. Consequently, the memory systems they produce transfer poorly to realistic agentic environments, such as coding and web navigation. We present MemGym, a benchmark for agentic memory that unifies existing agent gyms and in-house memory-grounded pipelines behind one memory-reasoning interface. MemGym spans five ev"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.20833","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-05-20T07:25:33Z","cross_cats_sorted":[],"title_canon_sha256":"b2f0aabf04ddd6406c7301b9f25bf154cf0dd5bc88a7b5e95b719119dcb5957b","abstract_canon_sha256":"4c8c4111ba0422eb907924f01f4278902b722c67022d47542ea576a421ab14fe"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-21T01:05:23.679149Z","signature_b64":"XksXkjrn+Qu1ObtoBGUAVSXfLQVzh5udQku899taSiwIEDD4RqD1U5r/USVjR3kmRcMEeTwHkli4uiNJ5MV/DA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"4098901096cd872fa26e9ba7a8676d7d7455e1b145c805b1c52a6528869eca4c","last_reissued_at":"2026-05-21T01:05:23.678298Z","signature_status":"signed_v1","first_computed_at":"2026-05-21T01:05:23.678298Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"MemGym: a Long-Horizon Memory Environment for LLM Agents","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Dimitris N. Metaxas, Han Zhang, Kai Mei, Kaiqu Liang, Mingyu Jin, Sambit Sahu, Shi-Xiong Zhang, Wenyue Hua, Wujiang Xu, Yu Wang, Zhenting Wang","submitted_at":"2026-05-20T07:25:33Z","abstract_excerpt":"Memory is a central capability for LLM agents operating across long-horizon tasks. Existing memory benchmarks predominantly evaluate retention of personalized information in multi-turn chat scenarios, overlooking the dynamic memory formation that occurs during extended agent execution. Consequently, the memory systems they produce transfer poorly to realistic agentic environments, such as coding and web navigation. We present MemGym, a benchmark for agentic memory that unifies existing agent gyms and in-house memory-grounded pipelines behind one memory-reasoning interface. MemGym spans five ev"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.20833","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.20833/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.20833","created_at":"2026-05-21T01:05:23.678436+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.20833v1","created_at":"2026-05-21T01:05:23.678436+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.20833","created_at":"2026-05-21T01:05:23.678436+00:00"},{"alias_kind":"pith_short_12","alias_value":"ICMJAEEWZWDS","created_at":"2026-05-21T01:05:23.678436+00:00"},{"alias_kind":"pith_short_16","alias_value":"ICMJAEEWZWDS7ITO","created_at":"2026-05-21T01:05:23.678436+00:00"},{"alias_kind":"pith_short_8","alias_value":"ICMJAEEW","created_at":"2026-05-21T01:05:23.678436+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/ICMJAEEWZWDS7ITOTOT2QZ3NPV","json":"https://pith.science/pith/ICMJAEEWZWDS7ITOTOT2QZ3NPV.json","graph_json":"https://pith.science/api/pith-number/ICMJAEEWZWDS7ITOTOT2QZ3NPV/graph.json","events_json":"https://pith.science/api/pith-number/ICMJAEEWZWDS7ITOTOT2QZ3NPV/events.json","paper":"https://pith.science/paper/ICMJAEEW"},"agent_actions":{"view_html":"https://pith.science/pith/ICMJAEEWZWDS7ITOTOT2QZ3NPV","download_json":"https://pith.science/pith/ICMJAEEWZWDS7ITOTOT2QZ3NPV.json","view_paper":"https://pith.science/paper/ICMJAEEW","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.20833&json=true","fetch_graph":"https://pith.science/api/pith-number/ICMJAEEWZWDS7ITOTOT2QZ3NPV/graph.json","fetch_events":"https://pith.science/api/pith-number/ICMJAEEWZWDS7ITOTOT2QZ3NPV/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/ICMJAEEWZWDS7ITOTOT2QZ3NPV/action/timestamp_anchor","attest_storage":"https://pith.science/pith/ICMJAEEWZWDS7ITOTOT2QZ3NPV/action/storage_attestation","attest_author":"https://pith.science/pith/ICMJAEEWZWDS7ITOTOT2QZ3NPV/action/author_attestation","sign_citation":"https://pith.science/pith/ICMJAEEWZWDS7ITOTOT2QZ3NPV/action/citation_signature","submit_replication":"https://pith.science/pith/ICMJAEEWZWDS7ITOTOT2QZ3NPV/action/replication_record"}},"created_at":"2026-05-21T01:05:23.678436+00:00","updated_at":"2026-05-21T01:05:23.678436+00:00"}