{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:72PDHB5EGPMER2QZCJ7XXKO6DM","short_pith_number":"pith:72PDHB5E","canonical_record":{"source":{"id":"2606.17680","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-16T08:48:09Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"bf3ce162b9d58db7c636e3db3157f3410901459f27a16ca25a1619b15f97f3b8","abstract_canon_sha256":"c53f370898bc5802d02e5670958c08548bd86f4ad92deb3d27cfe9eb558c9811"},"schema_version":"1.0"},"canonical_sha256":"fe9e3387a433d848ea19127f7ba9de1b33030fdc3119d040fb08fcd7c8dea496","source":{"kind":"arxiv","id":"2606.17680","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.17680","created_at":"2026-06-19T16:10:19Z"},{"alias_kind":"arxiv_version","alias_value":"2606.17680v1","created_at":"2026-06-19T16:10:19Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.17680","created_at":"2026-06-19T16:10:19Z"},{"alias_kind":"pith_short_12","alias_value":"72PDHB5EGPME","created_at":"2026-06-19T16:10:19Z"},{"alias_kind":"pith_short_16","alias_value":"72PDHB5EGPMER2QZ","created_at":"2026-06-19T16:10:19Z"},{"alias_kind":"pith_short_8","alias_value":"72PDHB5E","created_at":"2026-06-19T16:10:19Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:72PDHB5EGPMER2QZCJ7XXKO6DM","target":"record","payload":{"canonical_record":{"source":{"id":"2606.17680","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-16T08:48:09Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"bf3ce162b9d58db7c636e3db3157f3410901459f27a16ca25a1619b15f97f3b8","abstract_canon_sha256":"c53f370898bc5802d02e5670958c08548bd86f4ad92deb3d27cfe9eb558c9811"},"schema_version":"1.0"},"canonical_sha256":"fe9e3387a433d848ea19127f7ba9de1b33030fdc3119d040fb08fcd7c8dea496","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-19T16:10:19.224391Z","signature_b64":"i8HMpcoiTlUXtkcJJxRirgS8HXeBP5p+00tEWfyBzPPDkWhEI2hJZda0MJJcSf3ncxNOTKr5JZscvVNiQUZBBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"fe9e3387a433d848ea19127f7ba9de1b33030fdc3119d040fb08fcd7c8dea496","last_reissued_at":"2026-06-19T16:10:19.224028Z","signature_status":"signed_v1","first_computed_at":"2026-06-19T16:10:19.224028Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2606.17680","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-19T16:10:19Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"dPIEm3OaCPrDbeSienxCqufTicumDkE3Sxo0D2EBSXptVh/JwbdC9+7jnOp7YcHqtK4j91h38iPFliAnMQaNDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-04T06:17:40.862483Z"},"content_sha256":"82e97a4881641d80d6b7ff69dd60cb7f659b7f53822d3c7b4a29f134e51109a3","schema_version":"1.0","event_id":"sha256:82e97a4881641d80d6b7ff69dd60cb7f659b7f53822d3c7b4a29f134e51109a3"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:72PDHB5EGPMER2QZCJ7XXKO6DM","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"EnvRL: Learn from Environment Dynamics in Agentic Reinforcement Learning","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.CL"],"primary_cat":"cs.LG","authors_text":"Hao Peng, Juanzi Li, Maosong Sun, Shuzheng Si, Songze Li, Yi Wang, Zhitong Wang","submitted_at":"2026-06-16T08:48:09Z","abstract_excerpt":"Reinforcement learning (RL) has emerged as a powerful paradigm for training Large Language Models (LLMs) as agents. However, conventional RL methods for long-horizon agentic tasks often struggle with sparse outcome rewards. Intuitively, this overlooks the rich environment dynamics information contained in rollout interaction trajectories. We argue that the interaction experience inherently serves as an implicit supervision signal, reveals the underlying transition mechanisms of the environment, and enables the agent to construct a more accurate internal model of the environment.. Therefore, in"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.17680","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.17680/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-19T16:10:19Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"k2oVO0H0ZvIrsemfSTkyZ1Itb2sN9EznKVOZYxsj6oxLbEnODLHuCZiElBT6ps9dzetZi87qIoPYqokiUeNFDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-04T06:17:40.862852Z"},"content_sha256":"24cf191c0144dc0c80fef921ebcb26c120174116b40c8129f12f78acea1bd831","schema_version":"1.0","event_id":"sha256:24cf191c0144dc0c80fef921ebcb26c120174116b40c8129f12f78acea1bd831"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/72PDHB5EGPMER2QZCJ7XXKO6DM/bundle.json","state_url":"https://pith.science/pith/72PDHB5EGPMER2QZCJ7XXKO6DM/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/72PDHB5EGPMER2QZCJ7XXKO6DM/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-07-04T06:17:40Z","links":{"resolver":"https://pith.science/pith/72PDHB5EGPMER2QZCJ7XXKO6DM","bundle":"https://pith.science/pith/72PDHB5EGPMER2QZCJ7XXKO6DM/bundle.json","state":"https://pith.science/pith/72PDHB5EGPMER2QZCJ7XXKO6DM/state.json","well_known_bundle":"https://pith.science/.well-known/pith/72PDHB5EGPMER2QZCJ7XXKO6DM/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:72PDHB5EGPMER2QZCJ7XXKO6DM","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"c53f370898bc5802d02e5670958c08548bd86f4ad92deb3d27cfe9eb558c9811","cross_cats_sorted":["cs.CL"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-16T08:48:09Z","title_canon_sha256":"bf3ce162b9d58db7c636e3db3157f3410901459f27a16ca25a1619b15f97f3b8"},"schema_version":"1.0","source":{"id":"2606.17680","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.17680","created_at":"2026-06-19T16:10:19Z"},{"alias_kind":"arxiv_version","alias_value":"2606.17680v1","created_at":"2026-06-19T16:10:19Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.17680","created_at":"2026-06-19T16:10:19Z"},{"alias_kind":"pith_short_12","alias_value":"72PDHB5EGPME","created_at":"2026-06-19T16:10:19Z"},{"alias_kind":"pith_short_16","alias_value":"72PDHB5EGPMER2QZ","created_at":"2026-06-19T16:10:19Z"},{"alias_kind":"pith_short_8","alias_value":"72PDHB5E","created_at":"2026-06-19T16:10:19Z"}],"graph_snapshots":[{"event_id":"sha256:24cf191c0144dc0c80fef921ebcb26c120174116b40c8129f12f78acea1bd831","target":"graph","created_at":"2026-06-19T16:10:19Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.17680/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Reinforcement learning (RL) has emerged as a powerful paradigm for training Large Language Models (LLMs) as agents. However, conventional RL methods for long-horizon agentic tasks often struggle with sparse outcome rewards. Intuitively, this overlooks the rich environment dynamics information contained in rollout interaction trajectories. We argue that the interaction experience inherently serves as an implicit supervision signal, reveals the underlying transition mechanisms of the environment, and enables the agent to construct a more accurate internal model of the environment.. Therefore, in","authors_text":"Hao Peng, Juanzi Li, Maosong Sun, Shuzheng Si, Songze Li, Yi Wang, Zhitong Wang","cross_cats":["cs.CL"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-16T08:48:09Z","title":"EnvRL: Learn from Environment Dynamics in Agentic Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.17680","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:82e97a4881641d80d6b7ff69dd60cb7f659b7f53822d3c7b4a29f134e51109a3","target":"record","created_at":"2026-06-19T16:10:19Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"c53f370898bc5802d02e5670958c08548bd86f4ad92deb3d27cfe9eb558c9811","cross_cats_sorted":["cs.CL"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-16T08:48:09Z","title_canon_sha256":"bf3ce162b9d58db7c636e3db3157f3410901459f27a16ca25a1619b15f97f3b8"},"schema_version":"1.0","source":{"id":"2606.17680","kind":"arxiv","version":1}},"canonical_sha256":"fe9e3387a433d848ea19127f7ba9de1b33030fdc3119d040fb08fcd7c8dea496","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"fe9e3387a433d848ea19127f7ba9de1b33030fdc3119d040fb08fcd7c8dea496","first_computed_at":"2026-06-19T16:10:19.224028Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-19T16:10:19.224028Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"i8HMpcoiTlUXtkcJJxRirgS8HXeBP5p+00tEWfyBzPPDkWhEI2hJZda0MJJcSf3ncxNOTKr5JZscvVNiQUZBBA==","signature_status":"signed_v1","signed_at":"2026-06-19T16:10:19.224391Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.17680","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:82e97a4881641d80d6b7ff69dd60cb7f659b7f53822d3c7b4a29f134e51109a3","sha256:24cf191c0144dc0c80fef921ebcb26c120174116b40c8129f12f78acea1bd831"],"state_sha256":"221a5e2eb4d6a32d5adc1d87ea594267fd23629c01e118061b15dfef1c674452"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"2sS8/Jj5WDDk80kCq1Ym7GjcWlYbM5CbWjQv0WLBQsjioUqOyGMiOpMfqm3zWor+bJo/vMaEI6FscakRG1ibDQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-07-04T06:17:40.865109Z","bundle_sha256":"2ff0d83b753811e7cd8268bbde938740a25be739618baaea505e575b3516f7e3"}}