{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:NPL33ZPQCUJ6KSP7ILOSYGNXV6","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"1f0afb371951d3792c25b4f9c28415ea6e7b2a0aa4a277b84ae261be92edc42e","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-06-16T05:50:55Z","title_canon_sha256":"3b1d042c61b9817c6542734922926902e103366eb974fe1ae0c7e5f822af2a2f"},"schema_version":"1.0","source":{"id":"2606.17546","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.17546","created_at":"2026-06-19T16:10:15Z"},{"alias_kind":"arxiv_version","alias_value":"2606.17546v1","created_at":"2026-06-19T16:10:15Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.17546","created_at":"2026-06-19T16:10:15Z"},{"alias_kind":"pith_short_12","alias_value":"NPL33ZPQCUJ6","created_at":"2026-06-19T16:10:15Z"},{"alias_kind":"pith_short_16","alias_value":"NPL33ZPQCUJ6KSP7","created_at":"2026-06-19T16:10:15Z"},{"alias_kind":"pith_short_8","alias_value":"NPL33ZPQ","created_at":"2026-06-19T16:10:15Z"}],"graph_snapshots":[{"event_id":"sha256:fb32e11db6cd5f4da3d4f70345f20aa1a185d172790171381cc12bc93ba23bf5","target":"graph","created_at":"2026-06-19T16:10:15Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.17546/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Self-evolving LLM-based agents improve mainly by changing their agent harness: the structured execution layer around a base model, including prompts, memory, tools, middleware, runtime state, and the model-tool interaction loop. Existing evaluations often reduce this process to isolated task scores or a single sequential curve, obscuring whether an update produces reusable improvement, overfits recent tasks, increases cost, or harms older behavior. We introduce SEAGym, an evaluation environment for measuring agent harness updates across training, validation, test, replay, and cost records. SEA","authors_text":"Bin Liang, Changshui Zhang, Chuanyi Xue, Congjie Zheng, Jun Yang","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-06-16T05:50:55Z","title":"SEAGym: An Evaluation Environment for Self-Evolving LLM Agents"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.17546","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:4024682ff9c29ee97749bb6925e36cf5c6ed6a58ea03043ec4c5a0dcd2230a82","target":"record","created_at":"2026-06-19T16:10:15Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"1f0afb371951d3792c25b4f9c28415ea6e7b2a0aa4a277b84ae261be92edc42e","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-06-16T05:50:55Z","title_canon_sha256":"3b1d042c61b9817c6542734922926902e103366eb974fe1ae0c7e5f822af2a2f"},"schema_version":"1.0","source":{"id":"2606.17546","kind":"arxiv","version":1}},"canonical_sha256":"6bd7bde5f01513e549ff42dd2c19b7af9f6c8e36f84b5d27c9608b2ad9c9483d","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"6bd7bde5f01513e549ff42dd2c19b7af9f6c8e36f84b5d27c9608b2ad9c9483d","first_computed_at":"2026-06-19T16:10:15.700938Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-19T16:10:15.700938Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"ELbQtmltSDOPQkronc6QdY8OEmk90vwwesWSWQIWO2GL3glga2wk+gDuDPFCO8bRt4UF+iGtCSjkUNtbeZDaBw==","signature_status":"signed_v1","signed_at":"2026-06-19T16:10:15.701282Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.17546","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:4024682ff9c29ee97749bb6925e36cf5c6ed6a58ea03043ec4c5a0dcd2230a82","sha256:fb32e11db6cd5f4da3d4f70345f20aa1a185d172790171381cc12bc93ba23bf5"],"state_sha256":"a139d29c15d22c2459f8c7ecbba740f8c12801f900e3d3ecf45f4aa1e4cea93a"}