{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:RX7QBDNDP4LMNBZV7DVGOHQLGN","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"98c836dca69a8d56efc728236eaa7575b4c368ab0a19c8de6c8e5c2ab2ff65ea","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-03-09T16:23:33Z","title_canon_sha256":"a49aa9a10dfccef67588d36858d226a7720ee81b28ba216be5fd1773ca3cbe30"},"schema_version":"1.0","source":{"id":"2603.08561","kind":"arxiv","version":6}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2603.08561","created_at":"2026-06-10T01:09:58Z"},{"alias_kind":"arxiv_version","alias_value":"2603.08561v6","created_at":"2026-06-10T01:09:58Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2603.08561","created_at":"2026-06-10T01:09:58Z"},{"alias_kind":"pith_short_12","alias_value":"RX7QBDNDP4LM","created_at":"2026-06-10T01:09:58Z"},{"alias_kind":"pith_short_16","alias_value":"RX7QBDNDP4LMNBZV","created_at":"2026-06-10T01:09:58Z"},{"alias_kind":"pith_short_8","alias_value":"RX7QBDND","created_at":"2026-06-10T01:09:58Z"}],"graph_snapshots":[{"event_id":"sha256:13b7caa925f4b8714d75996da0e366d4223acd28915a7f77255dbb5520cafbab","target":"graph","created_at":"2026-06-10T01:09:58Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2603.08561/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Standard reinforcement learning (RL) for large language model (LLM) agents primarily optimizes extrinsic task rewards, often favoring isolated task completion over continual adaptation. This paradigm can cause premature convergence to suboptimal policies and leaves useful experience only implicitly encoded in model parameters, limiting its retrieval and reuse for future decisions. We introduce RetroAgent, an online RL framework that trains agents to master interactive environments not merely by solving tasks, but by evolving across episodes. Inspired by human retrospective self-improvement, Re","authors_text":"Wenqi Shao, Xia Hu, Xiaoying Zhang, Yipeng Zhang, Zichen Liu","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-03-09T16:23:33Z","title":"RetroAgent: From Solving to Evolving via Retrospective Dual Intrinsic Feedback"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2603.08561","kind":"arxiv","version":6},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:295762123f672911ac22a9c0416717855195666f5ea3a52d79637497c742d9cc","target":"record","created_at":"2026-06-10T01:09:58Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"98c836dca69a8d56efc728236eaa7575b4c368ab0a19c8de6c8e5c2ab2ff65ea","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-03-09T16:23:33Z","title_canon_sha256":"a49aa9a10dfccef67588d36858d226a7720ee81b28ba216be5fd1773ca3cbe30"},"schema_version":"1.0","source":{"id":"2603.08561","kind":"arxiv","version":6}},"canonical_sha256":"8dff008da37f16c68735f8ea671e0b337290ae6ae1b653794f1efde4f26b7c9c","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"8dff008da37f16c68735f8ea671e0b337290ae6ae1b653794f1efde4f26b7c9c","first_computed_at":"2026-06-10T01:09:58.193244Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-10T01:09:58.193244Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"3ZjX2EhGwwGgZ8avE+wXqw5KiI3+bWS05RJ+AWmB057ioKH30hdzbrH5+oZ2vjmdHIfsBCUpXzyJ8uhF4uwBCA==","signature_status":"signed_v1","signed_at":"2026-06-10T01:09:58.194381Z","signed_message":"canonical_sha256_bytes"},"source_id":"2603.08561","source_kind":"arxiv","source_version":6}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:295762123f672911ac22a9c0416717855195666f5ea3a52d79637497c742d9cc","sha256:13b7caa925f4b8714d75996da0e366d4223acd28915a7f77255dbb5520cafbab"],"state_sha256":"efa2781476a180094978956f0a8839baea3afb2debeb08fd44a972f5491051dd"}