{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:YTJJXWC4HXVEG23P7X3BFYPILP","short_pith_number":"pith:YTJJXWC4","canonical_record":{"source":{"id":"2604.15774","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-04-17T07:29:52Z","cross_cats_sorted":[],"title_canon_sha256":"70f246af957e25df222467d9aa1bfeb9451a4ca0ee4974bcecddfbd4c7072abf","abstract_canon_sha256":"ae9f0c8aee4c5a3d35f4f5fbc67d57b8c10684f2cc7ca1a8bb9be6c130951780"},"schema_version":"1.0"},"canonical_sha256":"c4d29bd85c3dea436b6ffdf612e1e85bf51ab9afe7c39cc7e13ec914e2f4561f","source":{"kind":"arxiv","id":"2604.15774","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2604.15774","created_at":"2026-05-22T01:04:02Z"},{"alias_kind":"arxiv_version","alias_value":"2604.15774v2","created_at":"2026-05-22T01:04:02Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.15774","created_at":"2026-05-22T01:04:02Z"},{"alias_kind":"pith_short_12","alias_value":"YTJJXWC4HXVE","created_at":"2026-05-22T01:04:02Z"},{"alias_kind":"pith_short_16","alias_value":"YTJJXWC4HXVEG23P","created_at":"2026-05-22T01:04:02Z"},{"alias_kind":"pith_short_8","alias_value":"YTJJXWC4","created_at":"2026-05-22T01:04:02Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:YTJJXWC4HXVEG23P7X3BFYPILP","target":"record","payload":{"canonical_record":{"source":{"id":"2604.15774","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-04-17T07:29:52Z","cross_cats_sorted":[],"title_canon_sha256":"70f246af957e25df222467d9aa1bfeb9451a4ca0ee4974bcecddfbd4c7072abf","abstract_canon_sha256":"ae9f0c8aee4c5a3d35f4f5fbc67d57b8c10684f2cc7ca1a8bb9be6c130951780"},"schema_version":"1.0"},"canonical_sha256":"c4d29bd85c3dea436b6ffdf612e1e85bf51ab9afe7c39cc7e13ec914e2f4561f","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-22T01:04:02.552796Z","signature_b64":"naaybD/91wL1svpNGhVlEeAVZIx8mfSCgboYzT/W5ALB4QuwakUG7wonpr+lwWNZP8WGVAHtP9DgVPzh7tqhAA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"c4d29bd85c3dea436b6ffdf612e1e85bf51ab9afe7c39cc7e13ec914e2f4561f","last_reissued_at":"2026-05-22T01:04:02.552031Z","signature_status":"signed_v1","first_computed_at":"2026-05-22T01:04:02.552031Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2604.15774","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-22T01:04:02Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"TEzh+9FWoB4bXplwnxZnXnrk/w+hKm7552sgExtoQ/Ek3hgPnUaRkL5Molev4uNPZCwcDWbFYrD3vhmDQ6ogBQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-29T19:28:52.646676Z"},"content_sha256":"b15d6ecd2c32f885a86b3e3998d57fa0267a361cec96572e1abf6d95afa06b98","schema_version":"1.0","event_id":"sha256:b15d6ecd2c32f885a86b3e3998d57fa0267a361cec96572e1abf6d95afa06b98"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:YTJJXWC4HXVEG23P7X3BFYPILP","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"MemEvoBench: Benchmarking Safety Risks from Memory Misevolution in LLM Agents","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"Biased memory updates cause substantial safety degradation in LLM agents.","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Fan Zhang, Junchi Yan, Lizhuang Ma, Qibing Ren, Shaoxiong Guo, Tian Xia, Weiwei Xie, Xue Yang","submitted_at":"2026-04-17T07:29:52Z","abstract_excerpt":"Equipping Large Language Models (LLMs) with persistent memory enhances interaction continuity and personalization but introduces new safety risks. Specifically, contaminated or biased memory accumulation can trigger abnormal agent behaviors. Existing evaluation methods have not yet established a standardized framework for measuring memory misevolution. This phenomenon refers to the gradual behavioral drift resulting from repeated exposure to misleading information. To address this gap, we introduce MemEvoBench, the first benchmark evaluating long-horizon memory safety in LLM agents against adv"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Experiments on representative models reveal substantial safety degradation under biased memory updates. Our analysis suggests that memory evolution is a significant contributor to these failures. Furthermore, static prompt-based defenses prove insufficient.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That the constructed mixed benign and misleading memory pools in multi-round interactions accurately simulate real-world memory evolution and its safety impacts in deployed LLM agents.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"MemEvoBench is the first benchmark for long-horizon memory safety in LLM agents, using QA tasks across 7 domains and 36 risks plus workflow tasks with noisy tools to measure behavioral drift from biased memory updates.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Biased memory updates cause substantial safety degradation in LLM agents.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"630b13afd70cfd254270074f27a181d5be59ec630f94e627b74402176614a4d8"},"source":{"id":"2604.15774","kind":"arxiv","version":2},"verdict":{"id":"5d6dc5a5-4eeb-4269-b271-64e4413c03ed","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-10T08:33:04.427000Z","strongest_claim":"Experiments on representative models reveal substantial safety degradation under biased memory updates. Our analysis suggests that memory evolution is a significant contributor to these failures. Furthermore, static prompt-based defenses prove insufficient.","one_line_summary":"MemEvoBench is the first benchmark for long-horizon memory safety in LLM agents, using QA tasks across 7 domains and 36 risks plus workflow tasks with noisy tools to measure behavioral drift from biased memory updates.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That the constructed mixed benign and misleading memory pools in multi-round interactions accurately simulate real-world memory evolution and its safety impacts in deployed LLM agents.","pith_extraction_headline":"Biased memory updates cause substantial safety degradation in LLM agents."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2604.15774/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"5d6dc5a5-4eeb-4269-b271-64e4413c03ed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-22T01:04:02Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"mqFses1afQOsfnIIAZYanJ6ydvnUfI0o8DdvBHwZ9ihjJjCEkwJizo8uVmlBY6iEVEffg7HuNFomUWq5bw1gCg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-29T19:28:52.647113Z"},"content_sha256":"fabcde5afd67d0a2f8b35525f77fc8fa6ccc0d51dd078c445e17ed933220c3b7","schema_version":"1.0","event_id":"sha256:fabcde5afd67d0a2f8b35525f77fc8fa6ccc0d51dd078c445e17ed933220c3b7"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/YTJJXWC4HXVEG23P7X3BFYPILP/bundle.json","state_url":"https://pith.science/pith/YTJJXWC4HXVEG23P7X3BFYPILP/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/YTJJXWC4HXVEG23P7X3BFYPILP/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-29T19:28:52Z","links":{"resolver":"https://pith.science/pith/YTJJXWC4HXVEG23P7X3BFYPILP","bundle":"https://pith.science/pith/YTJJXWC4HXVEG23P7X3BFYPILP/bundle.json","state":"https://pith.science/pith/YTJJXWC4HXVEG23P7X3BFYPILP/state.json","well_known_bundle":"https://pith.science/.well-known/pith/YTJJXWC4HXVEG23P7X3BFYPILP/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:YTJJXWC4HXVEG23P7X3BFYPILP","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"ae9f0c8aee4c5a3d35f4f5fbc67d57b8c10684f2cc7ca1a8bb9be6c130951780","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-04-17T07:29:52Z","title_canon_sha256":"70f246af957e25df222467d9aa1bfeb9451a4ca0ee4974bcecddfbd4c7072abf"},"schema_version":"1.0","source":{"id":"2604.15774","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2604.15774","created_at":"2026-05-22T01:04:02Z"},{"alias_kind":"arxiv_version","alias_value":"2604.15774v2","created_at":"2026-05-22T01:04:02Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.15774","created_at":"2026-05-22T01:04:02Z"},{"alias_kind":"pith_short_12","alias_value":"YTJJXWC4HXVE","created_at":"2026-05-22T01:04:02Z"},{"alias_kind":"pith_short_16","alias_value":"YTJJXWC4HXVEG23P","created_at":"2026-05-22T01:04:02Z"},{"alias_kind":"pith_short_8","alias_value":"YTJJXWC4","created_at":"2026-05-22T01:04:02Z"}],"graph_snapshots":[{"event_id":"sha256:fabcde5afd67d0a2f8b35525f77fc8fa6ccc0d51dd078c445e17ed933220c3b7","target":"graph","created_at":"2026-05-22T01:04:02Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Experiments on representative models reveal substantial safety degradation under biased memory updates. Our analysis suggests that memory evolution is a significant contributor to these failures. Furthermore, static prompt-based defenses prove insufficient."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That the constructed mixed benign and misleading memory pools in multi-round interactions accurately simulate real-world memory evolution and its safety impacts in deployed LLM agents."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"MemEvoBench is the first benchmark for long-horizon memory safety in LLM agents, using QA tasks across 7 domains and 36 risks plus workflow tasks with noisy tools to measure behavioral drift from biased memory updates."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Biased memory updates cause substantial safety degradation in LLM agents."}],"snapshot_sha256":"630b13afd70cfd254270074f27a181d5be59ec630f94e627b74402176614a4d8"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2604.15774/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Equipping Large Language Models (LLMs) with persistent memory enhances interaction continuity and personalization but introduces new safety risks. Specifically, contaminated or biased memory accumulation can trigger abnormal agent behaviors. Existing evaluation methods have not yet established a standardized framework for measuring memory misevolution. This phenomenon refers to the gradual behavioral drift resulting from repeated exposure to misleading information. To address this gap, we introduce MemEvoBench, the first benchmark evaluating long-horizon memory safety in LLM agents against adv","authors_text":"Fan Zhang, Junchi Yan, Lizhuang Ma, Qibing Ren, Shaoxiong Guo, Tian Xia, Weiwei Xie, Xue Yang","cross_cats":[],"headline":"Biased memory updates cause substantial safety degradation in LLM agents.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-04-17T07:29:52Z","title":"MemEvoBench: Benchmarking Safety Risks from Memory Misevolution in LLM Agents"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2604.15774","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-10T08:33:04.427000Z","id":"5d6dc5a5-4eeb-4269-b271-64e4413c03ed","model_set":{"reader":"grok-4.3"},"one_line_summary":"MemEvoBench is the first benchmark for long-horizon memory safety in LLM agents, using QA tasks across 7 domains and 36 risks plus workflow tasks with noisy tools to measure behavioral drift from biased memory updates.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Biased memory updates cause substantial safety degradation in LLM agents.","strongest_claim":"Experiments on representative models reveal substantial safety degradation under biased memory updates. Our analysis suggests that memory evolution is a significant contributor to these failures. Furthermore, static prompt-based defenses prove insufficient.","weakest_assumption":"That the constructed mixed benign and misleading memory pools in multi-round interactions accurately simulate real-world memory evolution and its safety impacts in deployed LLM agents."}},"verdict_id":"5d6dc5a5-4eeb-4269-b271-64e4413c03ed"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:b15d6ecd2c32f885a86b3e3998d57fa0267a361cec96572e1abf6d95afa06b98","target":"record","created_at":"2026-05-22T01:04:02Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"ae9f0c8aee4c5a3d35f4f5fbc67d57b8c10684f2cc7ca1a8bb9be6c130951780","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-04-17T07:29:52Z","title_canon_sha256":"70f246af957e25df222467d9aa1bfeb9451a4ca0ee4974bcecddfbd4c7072abf"},"schema_version":"1.0","source":{"id":"2604.15774","kind":"arxiv","version":2}},"canonical_sha256":"c4d29bd85c3dea436b6ffdf612e1e85bf51ab9afe7c39cc7e13ec914e2f4561f","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"c4d29bd85c3dea436b6ffdf612e1e85bf51ab9afe7c39cc7e13ec914e2f4561f","first_computed_at":"2026-05-22T01:04:02.552031Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-22T01:04:02.552031Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"naaybD/91wL1svpNGhVlEeAVZIx8mfSCgboYzT/W5ALB4QuwakUG7wonpr+lwWNZP8WGVAHtP9DgVPzh7tqhAA==","signature_status":"signed_v1","signed_at":"2026-05-22T01:04:02.552796Z","signed_message":"canonical_sha256_bytes"},"source_id":"2604.15774","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:b15d6ecd2c32f885a86b3e3998d57fa0267a361cec96572e1abf6d95afa06b98","sha256:fabcde5afd67d0a2f8b35525f77fc8fa6ccc0d51dd078c445e17ed933220c3b7"],"state_sha256":"640b599828b069a6d195b3235bea5bbcab08271c26cd1526b8fd56b8c5376171"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"2e9LLWPxErnhdw2D8WpFM1t8us01QABI24Q6exXldeHfNrHoX8AFNok6zJoRG3TUlXKkRiz2LIILCiyv93qICg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-29T19:28:52.649434Z","bundle_sha256":"b5bf951fddbb1d74c888fdfcce82b8052f99ddcdc6b763e82bbcf390932be912"}}