{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:TW67L2MRM7SMJXZHPZOE5X6PMR","short_pith_number":"pith:TW67L2MR","canonical_record":{"source":{"id":"2603.12109","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-03-12T16:14:14Z","cross_cats_sorted":[],"title_canon_sha256":"fb90383f593d5d89eca1b1591d0e0d4f4196691fa3ec9cca6e27874dbd3eacae","abstract_canon_sha256":"5c0f040a503d04260f379db2e3585629c0f1be69eb5c0752cf7cda9b9880082b"},"schema_version":"1.0"},"canonical_sha256":"9dbdf5e99167e4c4df277e5c4edfcf6460c2b964dc02e950e01ff0b8129ef6d3","source":{"kind":"arxiv","id":"2603.12109","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2603.12109","created_at":"2026-06-02T02:04:16Z"},{"alias_kind":"arxiv_version","alias_value":"2603.12109v2","created_at":"2026-06-02T02:04:16Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2603.12109","created_at":"2026-06-02T02:04:16Z"},{"alias_kind":"pith_short_12","alias_value":"TW67L2MRM7SM","created_at":"2026-06-02T02:04:16Z"},{"alias_kind":"pith_short_16","alias_value":"TW67L2MRM7SMJXZH","created_at":"2026-06-02T02:04:16Z"},{"alias_kind":"pith_short_8","alias_value":"TW67L2MR","created_at":"2026-06-02T02:04:16Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:TW67L2MRM7SMJXZHPZOE5X6PMR","target":"record","payload":{"canonical_record":{"source":{"id":"2603.12109","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-03-12T16:14:14Z","cross_cats_sorted":[],"title_canon_sha256":"fb90383f593d5d89eca1b1591d0e0d4f4196691fa3ec9cca6e27874dbd3eacae","abstract_canon_sha256":"5c0f040a503d04260f379db2e3585629c0f1be69eb5c0752cf7cda9b9880082b"},"schema_version":"1.0"},"canonical_sha256":"9dbdf5e99167e4c4df277e5c4edfcf6460c2b964dc02e950e01ff0b8129ef6d3","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-02T02:04:16.032431Z","signature_b64":"KEWdstBw0PfPgB4fWrAHtPUFmLc1TmU0pNunTnhOPAYqKF2J106oLyb283nQwscvJOFa54P6YFnIWOCPBsGzBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"9dbdf5e99167e4c4df277e5c4edfcf6460c2b964dc02e950e01ff0b8129ef6d3","last_reissued_at":"2026-06-02T02:04:16.031893Z","signature_status":"signed_v1","first_computed_at":"2026-06-02T02:04:16.031893Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2603.12109","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-02T02:04:16Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"glV4KXgtTPbSDVPjRpchzduXp8oyYyGa4c6z+J1u0734SHI7Tnj2FbPrgosJfFAJraqWLQffYBk9r0L1pQotAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-08T04:19:56.950306Z"},"content_sha256":"02f43391f4f8ebf704cbbe0d3554ca4f814c2ff3721cf11c09d1e47028902e3b","schema_version":"1.0","event_id":"sha256:02f43391f4f8ebf704cbbe0d3554ca4f814c2ff3721cf11c09d1e47028902e3b"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:TW67L2MRM7SMJXZHPZOE5X6PMR","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"On Information Self-Locking in Reinforcement Learning for Active Reasoning of LLM agents","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Deyu Zou, Fan Feng, James Cheng, Mufei Li, Pan Li, Yongqiang Chen, Yu Gong","submitted_at":"2026-03-12T16:14:14Z","abstract_excerpt":"Reinforcement learning (RL) has become a de facto paradigm for building LLM-based agents that act, interact, and reason over extended task horizons. However, in active reasoning where agents must elicit new observations through interaction with the environment to solve the task, we find that outcome-based RL can induce a systematic failure mode which we call information self-locking (SeL): agents fail both to elicit informative feedback and to internalize obtained evidence. To understand the issue, we trace agentic behaviors into two coupled capabilities: Action Selection (AS), which determine"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2603.12109","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2603.12109/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-02T02:04:16Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"dqR7FT/cuWeXamCh/DCjyuwYRJl0Uogq6mmSQ0dcq2SoXnDMHdQQdSiVwfHED8PM9YFyIpHGKU6UdMMu02hFAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-08T04:19:56.950714Z"},"content_sha256":"6b23e4fe5fc6c692c175d7c0ad54f160cfec26c6f3d4faeee74726f2b8e37395","schema_version":"1.0","event_id":"sha256:6b23e4fe5fc6c692c175d7c0ad54f160cfec26c6f3d4faeee74726f2b8e37395"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/TW67L2MRM7SMJXZHPZOE5X6PMR/bundle.json","state_url":"https://pith.science/pith/TW67L2MRM7SMJXZHPZOE5X6PMR/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/TW67L2MRM7SMJXZHPZOE5X6PMR/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-08T04:19:56Z","links":{"resolver":"https://pith.science/pith/TW67L2MRM7SMJXZHPZOE5X6PMR","bundle":"https://pith.science/pith/TW67L2MRM7SMJXZHPZOE5X6PMR/bundle.json","state":"https://pith.science/pith/TW67L2MRM7SMJXZHPZOE5X6PMR/state.json","well_known_bundle":"https://pith.science/.well-known/pith/TW67L2MRM7SMJXZHPZOE5X6PMR/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:TW67L2MRM7SMJXZHPZOE5X6PMR","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"5c0f040a503d04260f379db2e3585629c0f1be69eb5c0752cf7cda9b9880082b","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-03-12T16:14:14Z","title_canon_sha256":"fb90383f593d5d89eca1b1591d0e0d4f4196691fa3ec9cca6e27874dbd3eacae"},"schema_version":"1.0","source":{"id":"2603.12109","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2603.12109","created_at":"2026-06-02T02:04:16Z"},{"alias_kind":"arxiv_version","alias_value":"2603.12109v2","created_at":"2026-06-02T02:04:16Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2603.12109","created_at":"2026-06-02T02:04:16Z"},{"alias_kind":"pith_short_12","alias_value":"TW67L2MRM7SM","created_at":"2026-06-02T02:04:16Z"},{"alias_kind":"pith_short_16","alias_value":"TW67L2MRM7SMJXZH","created_at":"2026-06-02T02:04:16Z"},{"alias_kind":"pith_short_8","alias_value":"TW67L2MR","created_at":"2026-06-02T02:04:16Z"}],"graph_snapshots":[{"event_id":"sha256:6b23e4fe5fc6c692c175d7c0ad54f160cfec26c6f3d4faeee74726f2b8e37395","target":"graph","created_at":"2026-06-02T02:04:16Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2603.12109/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Reinforcement learning (RL) has become a de facto paradigm for building LLM-based agents that act, interact, and reason over extended task horizons. However, in active reasoning where agents must elicit new observations through interaction with the environment to solve the task, we find that outcome-based RL can induce a systematic failure mode which we call information self-locking (SeL): agents fail both to elicit informative feedback and to internalize obtained evidence. To understand the issue, we trace agentic behaviors into two coupled capabilities: Action Selection (AS), which determine","authors_text":"Deyu Zou, Fan Feng, James Cheng, Mufei Li, Pan Li, Yongqiang Chen, Yu Gong","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-03-12T16:14:14Z","title":"On Information Self-Locking in Reinforcement Learning for Active Reasoning of LLM agents"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2603.12109","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:02f43391f4f8ebf704cbbe0d3554ca4f814c2ff3721cf11c09d1e47028902e3b","target":"record","created_at":"2026-06-02T02:04:16Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"5c0f040a503d04260f379db2e3585629c0f1be69eb5c0752cf7cda9b9880082b","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-03-12T16:14:14Z","title_canon_sha256":"fb90383f593d5d89eca1b1591d0e0d4f4196691fa3ec9cca6e27874dbd3eacae"},"schema_version":"1.0","source":{"id":"2603.12109","kind":"arxiv","version":2}},"canonical_sha256":"9dbdf5e99167e4c4df277e5c4edfcf6460c2b964dc02e950e01ff0b8129ef6d3","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"9dbdf5e99167e4c4df277e5c4edfcf6460c2b964dc02e950e01ff0b8129ef6d3","first_computed_at":"2026-06-02T02:04:16.031893Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-02T02:04:16.031893Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"KEWdstBw0PfPgB4fWrAHtPUFmLc1TmU0pNunTnhOPAYqKF2J106oLyb283nQwscvJOFa54P6YFnIWOCPBsGzBA==","signature_status":"signed_v1","signed_at":"2026-06-02T02:04:16.032431Z","signed_message":"canonical_sha256_bytes"},"source_id":"2603.12109","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:02f43391f4f8ebf704cbbe0d3554ca4f814c2ff3721cf11c09d1e47028902e3b","sha256:6b23e4fe5fc6c692c175d7c0ad54f160cfec26c6f3d4faeee74726f2b8e37395"],"state_sha256":"d778b846de8d26ec26266333694a58db809fe6284879bd6858280b89ea34b9c5"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"zTFkjOgxO62bWocfn7fJ2KujWSdqTzk40lmdrACfAVp12kqkMafpfuMfxihkxGLvuLSmfpUE0+WdMh/farxBCA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-08T04:19:56.953079Z","bundle_sha256":"8d1ed6930c983ef5a8b09527980ddc9310b12f3a3fea8893875e9c517cb91f5c"}}