{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:LZMIL3YMVZOKVQNP32E37KWL5I","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"6952705cbdf7f7b2f4619f310809e0c30852c82e907a5ec54e780f6e3a30fdd8","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-27T10:20:40Z","title_canon_sha256":"aac4ac77ed365cb6dc56286db539377c8d4e1f42c9894b9d2fd5ca496735e99a"},"schema_version":"1.0","source":{"id":"2605.28276","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.28276","created_at":"2026-05-28T01:05:04Z"},{"alias_kind":"arxiv_version","alias_value":"2605.28276v1","created_at":"2026-05-28T01:05:04Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.28276","created_at":"2026-05-28T01:05:04Z"},{"alias_kind":"pith_short_12","alias_value":"LZMIL3YMVZOK","created_at":"2026-05-28T01:05:04Z"},{"alias_kind":"pith_short_16","alias_value":"LZMIL3YMVZOKVQNP","created_at":"2026-05-28T01:05:04Z"},{"alias_kind":"pith_short_8","alias_value":"LZMIL3YM","created_at":"2026-05-28T01:05:04Z"}],"graph_snapshots":[{"event_id":"sha256:d3d2b7c9afc7bbccd6d90de0ab251bd0bbc502bb9b9fbede65318efb5f908002","target":"graph","created_at":"2026-05-28T01:05:04Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.28276/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Reinforcement learning algorithms are commonly analyzed (and designed) under the Markov assumption. This is unrealistic, as most environments encountered in practice are either partially observable, or require function approximation that restricts the agent to access non-Markovian state features. We consider the problem of learning an optimal reactive policy in a finite environment with deterministic observations (or equivalently, hard state aggregation). We introduce a new algorithm, Committed Q-learning, and prove almost-sure convergence to the optimal reactive policy under an intuitive assu","authors_text":"Claire Vernade, Michael Muehlebach, Onno Eberhard","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-27T10:20:40Z","title":"Commit to the Bit: Reactive Reinforcement Learning Done Right"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.28276","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:8138dc78aa1a3e72a35c73cec92a06aba3af26ba17838541eadc9e9500793d57","target":"record","created_at":"2026-05-28T01:05:04Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"6952705cbdf7f7b2f4619f310809e0c30852c82e907a5ec54e780f6e3a30fdd8","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-27T10:20:40Z","title_canon_sha256":"aac4ac77ed365cb6dc56286db539377c8d4e1f42c9894b9d2fd5ca496735e99a"},"schema_version":"1.0","source":{"id":"2605.28276","kind":"arxiv","version":1}},"canonical_sha256":"5e5885ef0cae5caac1afde89bfaacbea3b3752ca6a6aa45e5b7f4e2554c6d4fc","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"5e5885ef0cae5caac1afde89bfaacbea3b3752ca6a6aa45e5b7f4e2554c6d4fc","first_computed_at":"2026-05-28T01:05:04.902608Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-28T01:05:04.902608Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"IXW+AmnyEFn5/Kx7PpJRoHcoSzdTdinxv/UeRs7+J7Zs7nLy/3Jq/reiRz/D9CcgkAq+ns9qBF+rgOGyVmUCAg==","signature_status":"signed_v1","signed_at":"2026-05-28T01:05:04.903019Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.28276","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:8138dc78aa1a3e72a35c73cec92a06aba3af26ba17838541eadc9e9500793d57","sha256:d3d2b7c9afc7bbccd6d90de0ab251bd0bbc502bb9b9fbede65318efb5f908002"],"state_sha256":"d9a1731f6680ce25f81284aec613611ad474f99ebd75e9eb501d47c5c75d8e6c"}