{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:ATSZCHBVBU5N5BQBECGSBOTGEJ","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"b8d22a991687452f8586bcb2dc0cbf565ff6b4cfdefd487891d294548a8d0fd9","cross_cats_sorted":["cs.CL","cs.IR","cs.LG"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2025-10-09T17:59:17Z","title_canon_sha256":"8522a5493521db03af60add728d52592abf69f4bfb9a1dadda2a04a91ffde055"},"schema_version":"1.0","source":{"id":"2510.08558","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2510.08558","created_at":"2026-05-26T02:03:57Z"},{"alias_kind":"arxiv_version","alias_value":"2510.08558v3","created_at":"2026-05-26T02:03:57Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2510.08558","created_at":"2026-05-26T02:03:57Z"},{"alias_kind":"pith_short_12","alias_value":"ATSZCHBVBU5N","created_at":"2026-05-26T02:03:57Z"},{"alias_kind":"pith_short_16","alias_value":"ATSZCHBVBU5N5BQB","created_at":"2026-05-26T02:03:57Z"},{"alias_kind":"pith_short_8","alias_value":"ATSZCHBV","created_at":"2026-05-26T02:03:57Z"}],"graph_snapshots":[{"event_id":"sha256:830cde3d1582fdb9f515df3c71d1b9976fc8a96a890fd76d307c9ce727377a98","target":"graph","created_at":"2026-05-26T02:03:57Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2510.08558/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"A long-term goal of language agents is to learn and improve through their own experience, ultimately outperforming humans in complex, real-world tasks. However, training agents from experience data with reinforcement learning remains difficult in many environments, which either lack verifiable rewards (e.g., websites) or require inefficient long-horizon rollouts (e.g., multi-turn tool use). As a result, most current agents rely on supervised fine-tuning on expert data, which is challenging to scale and generalizes poorly. This limitation stems from the nature of expert demonstrations: they cap","authors_text":"Ashish Shah, Bo Liu, Boyu Gou, Dat Huynh, Hengduo Li, Huan Sun, Jason Weston, Jiacheng Zhu, Jianwei Yang, Jian Xie, Kai Zhang, Lawrence Jang, Ning Zhang, Qi Qi, Sara Cao, Shuyan Zhou, Tianci Xue, Xiangchao Chen, Xian Li, Xiaohan Fu, Xiyao Wang, Yifan Wu, Yu Su, Yuting Ning, Yuxuan Sun, Zeyi Liao, Zhaorun Chen, Zhihan Liu, Zihang Meng, Zi Yang","cross_cats":["cs.CL","cs.IR","cs.LG"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2025-10-09T17:59:17Z","title":"Agent Learning via Early Experience"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2510.08558","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:65ddd88c39eb6356c6d2aa11012f9ea486316e24238d81eeaa264377f9190028","target":"record","created_at":"2026-05-26T02:03:57Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"b8d22a991687452f8586bcb2dc0cbf565ff6b4cfdefd487891d294548a8d0fd9","cross_cats_sorted":["cs.CL","cs.IR","cs.LG"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2025-10-09T17:59:17Z","title_canon_sha256":"8522a5493521db03af60add728d52592abf69f4bfb9a1dadda2a04a91ffde055"},"schema_version":"1.0","source":{"id":"2510.08558","kind":"arxiv","version":3}},"canonical_sha256":"04e5911c350d3ade8601208d20ba66227b7e87667749c5f06ea002e67a1a6fb7","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"04e5911c350d3ade8601208d20ba66227b7e87667749c5f06ea002e67a1a6fb7","first_computed_at":"2026-05-26T02:03:57.703847Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-26T02:03:57.703847Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"HBQ8qy3MRRUb7fQabK31Wytt+G/Bc1gSTSAEw3YpvdNvLpF3HMfCkWE6gjdslyQwXTbWi0YAWKoyWFwC2LLZDQ==","signature_status":"signed_v1","signed_at":"2026-05-26T02:03:57.704852Z","signed_message":"canonical_sha256_bytes"},"source_id":"2510.08558","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:65ddd88c39eb6356c6d2aa11012f9ea486316e24238d81eeaa264377f9190028","sha256:830cde3d1582fdb9f515df3c71d1b9976fc8a96a890fd76d307c9ce727377a98"],"state_sha256":"af40654b0e31b79b3f1f42cd523017a58c473a0aab740889e284ecbd93ed5007"}