{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:DOSNAH2KJZXJBKGX6TS3WJ6X4R","short_pith_number":"pith:DOSNAH2K","canonical_record":{"source":{"id":"2601.03715","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2026-01-07T09:04:52Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"d7e7f8482590129b063dce375c7ff06812ec4bec412e8c28c1af2d51541584b4","abstract_canon_sha256":"11c7fcca884f65b38d34110d1f0490faaf8c24909cb5e7f88adc83850e351b33"},"schema_version":"1.0"},"canonical_sha256":"1ba4d01f4a4e6e90a8d7f4e5bb27d7e4655a5e5c0417c5b22d6149395f4c53b9","source":{"kind":"arxiv","id":"2601.03715","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2601.03715","created_at":"2026-05-25T02:02:12Z"},{"alias_kind":"arxiv_version","alias_value":"2601.03715v2","created_at":"2026-05-25T02:02:12Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2601.03715","created_at":"2026-05-25T02:02:12Z"},{"alias_kind":"pith_short_12","alias_value":"DOSNAH2KJZXJ","created_at":"2026-05-25T02:02:12Z"},{"alias_kind":"pith_short_16","alias_value":"DOSNAH2KJZXJBKGX","created_at":"2026-05-25T02:02:12Z"},{"alias_kind":"pith_short_8","alias_value":"DOSNAH2K","created_at":"2026-05-25T02:02:12Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:DOSNAH2KJZXJBKGX6TS3WJ6X4R","target":"record","payload":{"canonical_record":{"source":{"id":"2601.03715","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2026-01-07T09:04:52Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"d7e7f8482590129b063dce375c7ff06812ec4bec412e8c28c1af2d51541584b4","abstract_canon_sha256":"11c7fcca884f65b38d34110d1f0490faaf8c24909cb5e7f88adc83850e351b33"},"schema_version":"1.0"},"canonical_sha256":"1ba4d01f4a4e6e90a8d7f4e5bb27d7e4655a5e5c0417c5b22d6149395f4c53b9","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-25T02:02:12.107531Z","signature_b64":"n8atlA2PZ5y+d/Qb09tQpaLEbzGkAjWimGUp/sEP4vnlayB5d/EZrtdfZBHWrThdldLOFua7HOerZrp+PBPjCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"1ba4d01f4a4e6e90a8d7f4e5bb27d7e4655a5e5c0417c5b22d6149395f4c53b9","last_reissued_at":"2026-05-25T02:02:12.106626Z","signature_status":"signed_v1","first_computed_at":"2026-05-25T02:02:12.106626Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2601.03715","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-25T02:02:12Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"AwBBCq+2aLhemL1IsiDqyIBEAcIJ3xJsWhMme6VGX4qehg2E89qRNoT+OwGLuTKe3wU8Xp8/9auT33TmmSTGBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T17:41:00.040646Z"},"content_sha256":"5d1864e7f0d74e97569fe2c32a5dafa1d8635b729f5eb1cdf083c29637814e8e","schema_version":"1.0","event_id":"sha256:5d1864e7f0d74e97569fe2c32a5dafa1d8635b729f5eb1cdf083c29637814e8e"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:DOSNAH2KJZXJBKGX6TS3WJ6X4R","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"R$^3$L: Reflect-then-Retry Reinforcement Learning with Language-Guided Exploration, Pivotal Credit, and Positive Amplification","license":"http://creativecommons.org/licenses/by-sa/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Jiajie Xu, Weijie Shi, Xiaofang Zhou, Xuchen Pan, Yaliang Li, Yanxi Chen, Yuchang Sun, Zexi Li","submitted_at":"2026-01-07T09:04:52Z","abstract_excerpt":"Reinforcement learning drives recent advances in LLM reasoning and agentic capabilities, yet current approaches struggle with both exploration and exploitation. Exploration suffers from low success rates on difficult tasks and high costs of repeated rollouts from scratch. Exploitation suffers from coarse credit assignment and training instability: Trajectory-level rewards penalize valid prefixes for later errors, and failure-dominated groups overwhelm the few positive signals, leaving optimization without constructive direction. To this end, we propose R$^3$L, Reflect-then-Retry Reinforcement "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2601.03715","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2601.03715/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-25T02:02:12Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"2Yw2bheK4VIy2ECPCaQVgbJj5oHpV5Fe3/vwj95V80TudZOSuUtfhjCgZJsXATsB7sO21cAmMVs89NK+CqXQDw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T17:41:00.041429Z"},"content_sha256":"bb26fe76f30909b60a2d38de79c04c25f8dd40c951d85b3343ee7c75c13120ab","schema_version":"1.0","event_id":"sha256:bb26fe76f30909b60a2d38de79c04c25f8dd40c951d85b3343ee7c75c13120ab"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/DOSNAH2KJZXJBKGX6TS3WJ6X4R/bundle.json","state_url":"https://pith.science/pith/DOSNAH2KJZXJBKGX6TS3WJ6X4R/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/DOSNAH2KJZXJBKGX6TS3WJ6X4R/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-25T17:41:00Z","links":{"resolver":"https://pith.science/pith/DOSNAH2KJZXJBKGX6TS3WJ6X4R","bundle":"https://pith.science/pith/DOSNAH2KJZXJBKGX6TS3WJ6X4R/bundle.json","state":"https://pith.science/pith/DOSNAH2KJZXJBKGX6TS3WJ6X4R/state.json","well_known_bundle":"https://pith.science/.well-known/pith/DOSNAH2KJZXJBKGX6TS3WJ6X4R/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:DOSNAH2KJZXJBKGX6TS3WJ6X4R","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"11c7fcca884f65b38d34110d1f0490faaf8c24909cb5e7f88adc83850e351b33","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2026-01-07T09:04:52Z","title_canon_sha256":"d7e7f8482590129b063dce375c7ff06812ec4bec412e8c28c1af2d51541584b4"},"schema_version":"1.0","source":{"id":"2601.03715","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2601.03715","created_at":"2026-05-25T02:02:12Z"},{"alias_kind":"arxiv_version","alias_value":"2601.03715v2","created_at":"2026-05-25T02:02:12Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2601.03715","created_at":"2026-05-25T02:02:12Z"},{"alias_kind":"pith_short_12","alias_value":"DOSNAH2KJZXJ","created_at":"2026-05-25T02:02:12Z"},{"alias_kind":"pith_short_16","alias_value":"DOSNAH2KJZXJBKGX","created_at":"2026-05-25T02:02:12Z"},{"alias_kind":"pith_short_8","alias_value":"DOSNAH2K","created_at":"2026-05-25T02:02:12Z"}],"graph_snapshots":[{"event_id":"sha256:bb26fe76f30909b60a2d38de79c04c25f8dd40c951d85b3343ee7c75c13120ab","target":"graph","created_at":"2026-05-25T02:02:12Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2601.03715/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Reinforcement learning drives recent advances in LLM reasoning and agentic capabilities, yet current approaches struggle with both exploration and exploitation. Exploration suffers from low success rates on difficult tasks and high costs of repeated rollouts from scratch. Exploitation suffers from coarse credit assignment and training instability: Trajectory-level rewards penalize valid prefixes for later errors, and failure-dominated groups overwhelm the few positive signals, leaving optimization without constructive direction. To this end, we propose R$^3$L, Reflect-then-Retry Reinforcement ","authors_text":"Jiajie Xu, Weijie Shi, Xiaofang Zhou, Xuchen Pan, Yaliang Li, Yanxi Chen, Yuchang Sun, Zexi Li","cross_cats":["cs.AI"],"headline":"","license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2026-01-07T09:04:52Z","title":"R$^3$L: Reflect-then-Retry Reinforcement Learning with Language-Guided Exploration, Pivotal Credit, and Positive Amplification"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2601.03715","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:5d1864e7f0d74e97569fe2c32a5dafa1d8635b729f5eb1cdf083c29637814e8e","target":"record","created_at":"2026-05-25T02:02:12Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"11c7fcca884f65b38d34110d1f0490faaf8c24909cb5e7f88adc83850e351b33","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2026-01-07T09:04:52Z","title_canon_sha256":"d7e7f8482590129b063dce375c7ff06812ec4bec412e8c28c1af2d51541584b4"},"schema_version":"1.0","source":{"id":"2601.03715","kind":"arxiv","version":2}},"canonical_sha256":"1ba4d01f4a4e6e90a8d7f4e5bb27d7e4655a5e5c0417c5b22d6149395f4c53b9","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"1ba4d01f4a4e6e90a8d7f4e5bb27d7e4655a5e5c0417c5b22d6149395f4c53b9","first_computed_at":"2026-05-25T02:02:12.106626Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-25T02:02:12.106626Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"n8atlA2PZ5y+d/Qb09tQpaLEbzGkAjWimGUp/sEP4vnlayB5d/EZrtdfZBHWrThdldLOFua7HOerZrp+PBPjCg==","signature_status":"signed_v1","signed_at":"2026-05-25T02:02:12.107531Z","signed_message":"canonical_sha256_bytes"},"source_id":"2601.03715","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:5d1864e7f0d74e97569fe2c32a5dafa1d8635b729f5eb1cdf083c29637814e8e","sha256:bb26fe76f30909b60a2d38de79c04c25f8dd40c951d85b3343ee7c75c13120ab"],"state_sha256":"dc9c0ddd2e7a50b0d56a13fb831d0d330bba9820e23a9d008e867097bb970ca4"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"7BHkk2al4UKih7DkW2cfjUstQ5y1gumld4qr27IAP79v4gR5qPRpMwUVYx8j+eUbR2pEuc0xcd+bfcndGQn4CQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-25T17:41:00.045432Z","bundle_sha256":"6e88a3532868f74b031d9d091d193b43fdb65bc60554660c24f226a5b967bb36"}}