{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:SDW4N6THC5W66Y2TWWLQYF2TN2","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"414e2e63a93533d5b6f4ca1bed76f4e41b3208db136409d672cce037993c4e79","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"math.OC","submitted_at":"2026-05-25T22:07:24Z","title_canon_sha256":"791248f8ab097aa9912c98a16f5c1abf3664f442e0439d0135abb22fa9c88b36"},"schema_version":"1.0","source":{"id":"2605.26361","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.26361","created_at":"2026-05-27T01:05:13Z"},{"alias_kind":"arxiv_version","alias_value":"2605.26361v1","created_at":"2026-05-27T01:05:13Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.26361","created_at":"2026-05-27T01:05:13Z"},{"alias_kind":"pith_short_12","alias_value":"SDW4N6THC5W6","created_at":"2026-05-27T01:05:13Z"},{"alias_kind":"pith_short_16","alias_value":"SDW4N6THC5W66Y2T","created_at":"2026-05-27T01:05:13Z"},{"alias_kind":"pith_short_8","alias_value":"SDW4N6TH","created_at":"2026-05-27T01:05:13Z"}],"graph_snapshots":[{"event_id":"sha256:82346ae8c729b332b3a4d624aca83bf63ce115a5cb8846d110bcaa8b0c745ca0","target":"graph","created_at":"2026-05-27T01:05:13Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.26361/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Policy learning in modern operations environments faces a fundamental tension between limited operational data and the large, often continuous, state and action spaces over which good decisions must be identified and deployed. We study value-based policy learning in stochastic optimal control: a greedy policy induced by an estimate of the optimal action-value function $Q^*$ is deployed, and its performance is measured by regret. The empirical success of this approach calls for statistical insight into the structures that enable fast regret convergence. We show that, in continuous action spaces","authors_text":"Jose Blanchet, Peter Glynn, Shengbo Wang","cross_cats":["stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"math.OC","submitted_at":"2026-05-25T22:07:24Z","title":"Fast Convergence of Policy Regret in Learning Stochastic Optimal Control"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.26361","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:e9461b988fa7fa607039d919cea9a5b1ce85fed1c9c19175cb361087c00f2156","target":"record","created_at":"2026-05-27T01:05:13Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"414e2e63a93533d5b6f4ca1bed76f4e41b3208db136409d672cce037993c4e79","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"math.OC","submitted_at":"2026-05-25T22:07:24Z","title_canon_sha256":"791248f8ab097aa9912c98a16f5c1abf3664f442e0439d0135abb22fa9c88b36"},"schema_version":"1.0","source":{"id":"2605.26361","kind":"arxiv","version":1}},"canonical_sha256":"90edc6fa67176def6353b5970c17536e82615cb9166ab08d3f039d0ca31c065d","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"90edc6fa67176def6353b5970c17536e82615cb9166ab08d3f039d0ca31c065d","first_computed_at":"2026-05-27T01:05:13.892955Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-27T01:05:13.892955Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"QHul75QLVMjBz/DjhXLmU5WSIr/pCgN504LPSWEFoXFGuuTYRYQvtKg8guPKjeIGs9Pm88OVyM6OzfzZmfkBCQ==","signature_status":"signed_v1","signed_at":"2026-05-27T01:05:13.893759Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.26361","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:e9461b988fa7fa607039d919cea9a5b1ce85fed1c9c19175cb361087c00f2156","sha256:82346ae8c729b332b3a4d624aca83bf63ce115a5cb8846d110bcaa8b0c745ca0"],"state_sha256":"3941fa00889194fbef008e302d84e31b2b91714d0a2d66e290ddd51b120828b0"}