{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:KH3JMQ5LKPD7IU2645ZASETSVE","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"700fd5505fb1825b7f6c60be449d554fd63a66df753c2aa8560e5fe941785edb","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-28T16:14:45Z","title_canon_sha256":"4ad11462a8d826374e56ad7c6850b30218a158526e86ee1e00d447947deaf924"},"schema_version":"1.0","source":{"id":"2605.30154","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.30154","created_at":"2026-05-29T02:06:11Z"},{"alias_kind":"arxiv_version","alias_value":"2605.30154v1","created_at":"2026-05-29T02:06:11Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.30154","created_at":"2026-05-29T02:06:11Z"},{"alias_kind":"pith_short_12","alias_value":"KH3JMQ5LKPD7","created_at":"2026-05-29T02:06:11Z"},{"alias_kind":"pith_short_16","alias_value":"KH3JMQ5LKPD7IU26","created_at":"2026-05-29T02:06:11Z"},{"alias_kind":"pith_short_8","alias_value":"KH3JMQ5L","created_at":"2026-05-29T02:06:11Z"}],"graph_snapshots":[{"event_id":"sha256:2fba3987f126884700e4d60622b6cf7949eb9b4c5ddee16176eb0e34eb7e9b00","target":"graph","created_at":"2026-05-29T02:06:11Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.30154/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Correctness-based Reinforcement Learning with Verifiable Rewards (RLVR) trains language models from binary feedback on sampled outputs, but the objective optimized in expectation and the stochastic update geometry induced by finite rollout groups are often conflated. This paper develops RL2ML, a family of finite-rollout surrogate objectives with a closed-form, exactly unbiased gradient estimator. The family continuously connects standard reinforcement learning, maximum-likelihood-like training, and beyond-maximum-likelihood objectives while preserving estimator-objective alignment under a fixe","authors_text":"Yifu Zheng","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-28T16:14:45Z","title":"RL2ML: Finite-Rollout Surrogate Objectives from Reinforcement Learning to Maximum Likelihood"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.30154","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:ec5813b258d9cd693ce1c9bbf67b5e4d026863443c40d982b13df311d56c0dba","target":"record","created_at":"2026-05-29T02:06:11Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"700fd5505fb1825b7f6c60be449d554fd63a66df753c2aa8560e5fe941785edb","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-28T16:14:45Z","title_canon_sha256":"4ad11462a8d826374e56ad7c6850b30218a158526e86ee1e00d447947deaf924"},"schema_version":"1.0","source":{"id":"2605.30154","kind":"arxiv","version":1}},"canonical_sha256":"51f69643ab53c7f4535ee772091272a93e21c04ae7854e5efa84e4d3c70bca12","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"51f69643ab53c7f4535ee772091272a93e21c04ae7854e5efa84e4d3c70bca12","first_computed_at":"2026-05-29T02:06:11.238323Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-29T02:06:11.238323Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"hpXTuEkJyZGLtO8G2Oaj+IngVsNHlsjlbyw4mHwrT30McposGi7tmr7u2CuRwnNtVyiySZ98aVbrOtyqYt1eDQ==","signature_status":"signed_v1","signed_at":"2026-05-29T02:06:11.238726Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.30154","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:ec5813b258d9cd693ce1c9bbf67b5e4d026863443c40d982b13df311d56c0dba","sha256:2fba3987f126884700e4d60622b6cf7949eb9b4c5ddee16176eb0e34eb7e9b00"],"state_sha256":"83e90178482c203f008b13d0d9e2b4302075a81431939eeae30292e301a0d180"}