{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:72PDHB5EGPMER2QZCJ7XXKO6DM","short_pith_number":"pith:72PDHB5E","schema_version":"1.0","canonical_sha256":"fe9e3387a433d848ea19127f7ba9de1b33030fdc3119d040fb08fcd7c8dea496","source":{"kind":"arxiv","id":"2606.17680","version":1},"attestation_state":"computed","paper":{"title":"EnvRL: Learn from Environment Dynamics in Agentic Reinforcement Learning","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.CL"],"primary_cat":"cs.LG","authors_text":"Hao Peng, Juanzi Li, Maosong Sun, Shuzheng Si, Songze Li, Yi Wang, Zhitong Wang","submitted_at":"2026-06-16T08:48:09Z","abstract_excerpt":"Reinforcement learning (RL) has emerged as a powerful paradigm for training Large Language Models (LLMs) as agents. However, conventional RL methods for long-horizon agentic tasks often struggle with sparse outcome rewards. Intuitively, this overlooks the rich environment dynamics information contained in rollout interaction trajectories. We argue that the interaction experience inherently serves as an implicit supervision signal, reveals the underlying transition mechanisms of the environment, and enables the agent to construct a more accurate internal model of the environment.. Therefore, in"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.17680","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-16T08:48:09Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"bf3ce162b9d58db7c636e3db3157f3410901459f27a16ca25a1619b15f97f3b8","abstract_canon_sha256":"c53f370898bc5802d02e5670958c08548bd86f4ad92deb3d27cfe9eb558c9811"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-19T16:10:19.224391Z","signature_b64":"i8HMpcoiTlUXtkcJJxRirgS8HXeBP5p+00tEWfyBzPPDkWhEI2hJZda0MJJcSf3ncxNOTKr5JZscvVNiQUZBBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"fe9e3387a433d848ea19127f7ba9de1b33030fdc3119d040fb08fcd7c8dea496","last_reissued_at":"2026-06-19T16:10:19.224028Z","signature_status":"signed_v1","first_computed_at":"2026-06-19T16:10:19.224028Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"EnvRL: Learn from Environment Dynamics in Agentic Reinforcement Learning","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.CL"],"primary_cat":"cs.LG","authors_text":"Hao Peng, Juanzi Li, Maosong Sun, Shuzheng Si, Songze Li, Yi Wang, Zhitong Wang","submitted_at":"2026-06-16T08:48:09Z","abstract_excerpt":"Reinforcement learning (RL) has emerged as a powerful paradigm for training Large Language Models (LLMs) as agents. However, conventional RL methods for long-horizon agentic tasks often struggle with sparse outcome rewards. Intuitively, this overlooks the rich environment dynamics information contained in rollout interaction trajectories. We argue that the interaction experience inherently serves as an implicit supervision signal, reveals the underlying transition mechanisms of the environment, and enables the agent to construct a more accurate internal model of the environment.. Therefore, in"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.17680","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.17680/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.17680","created_at":"2026-06-19T16:10:19.224090+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.17680v1","created_at":"2026-06-19T16:10:19.224090+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.17680","created_at":"2026-06-19T16:10:19.224090+00:00"},{"alias_kind":"pith_short_12","alias_value":"72PDHB5EGPME","created_at":"2026-06-19T16:10:19.224090+00:00"},{"alias_kind":"pith_short_16","alias_value":"72PDHB5EGPMER2QZ","created_at":"2026-06-19T16:10:19.224090+00:00"},{"alias_kind":"pith_short_8","alias_value":"72PDHB5E","created_at":"2026-06-19T16:10:19.224090+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/72PDHB5EGPMER2QZCJ7XXKO6DM","json":"https://pith.science/pith/72PDHB5EGPMER2QZCJ7XXKO6DM.json","graph_json":"https://pith.science/api/pith-number/72PDHB5EGPMER2QZCJ7XXKO6DM/graph.json","events_json":"https://pith.science/api/pith-number/72PDHB5EGPMER2QZCJ7XXKO6DM/events.json","paper":"https://pith.science/paper/72PDHB5E"},"agent_actions":{"view_html":"https://pith.science/pith/72PDHB5EGPMER2QZCJ7XXKO6DM","download_json":"https://pith.science/pith/72PDHB5EGPMER2QZCJ7XXKO6DM.json","view_paper":"https://pith.science/paper/72PDHB5E","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.17680&json=true","fetch_graph":"https://pith.science/api/pith-number/72PDHB5EGPMER2QZCJ7XXKO6DM/graph.json","fetch_events":"https://pith.science/api/pith-number/72PDHB5EGPMER2QZCJ7XXKO6DM/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/72PDHB5EGPMER2QZCJ7XXKO6DM/action/timestamp_anchor","attest_storage":"https://pith.science/pith/72PDHB5EGPMER2QZCJ7XXKO6DM/action/storage_attestation","attest_author":"https://pith.science/pith/72PDHB5EGPMER2QZCJ7XXKO6DM/action/author_attestation","sign_citation":"https://pith.science/pith/72PDHB5EGPMER2QZCJ7XXKO6DM/action/citation_signature","submit_replication":"https://pith.science/pith/72PDHB5EGPMER2QZCJ7XXKO6DM/action/replication_record"}},"created_at":"2026-06-19T16:10:19.224090+00:00","updated_at":"2026-06-19T16:10:19.224090+00:00"}