{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:7BXAU7YPLUWRXTYB5P3WGRFDEV","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"305751d645b8169b31a068444e82fb41a3ed378b60a9bc5ae42b6d0c994f3642","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2026-05-17T22:23:25Z","title_canon_sha256":"f8d624e8e6a49e2ee9564ecac766377ee3c3d9631205e613db9d7bb93bc7a742"},"schema_version":"1.0","source":{"id":"2605.17678","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.17678","created_at":"2026-05-20T00:04:52Z"},{"alias_kind":"arxiv_version","alias_value":"2605.17678v1","created_at":"2026-05-20T00:04:52Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.17678","created_at":"2026-05-20T00:04:52Z"},{"alias_kind":"pith_short_12","alias_value":"7BXAU7YPLUWR","created_at":"2026-05-20T00:04:52Z"},{"alias_kind":"pith_short_16","alias_value":"7BXAU7YPLUWRXTYB","created_at":"2026-05-20T00:04:52Z"},{"alias_kind":"pith_short_8","alias_value":"7BXAU7YP","created_at":"2026-05-20T00:04:52Z"}],"graph_snapshots":[{"event_id":"sha256:07727e5ef2cb8d6ccf62c984677affad46804f3bd12e2bb421eb95a687320b4b","target":"graph","created_at":"2026-05-20T00:04:52Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"We establish a Gaussian approximation bound in the convex distance with rate of order n^{-1/4}, up to polylogarithmic factors in n, for the Polyak-Ruppert averaged iterates."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The sequence of observed triples (s_k, a_k, s_{k+1}) forms a uniformly geometrically ergodic Markov chain, together with suitable regularity conditions for the projected soft Bellman equation."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"Establishes n^{-1/4} Gaussian approximation in convex distance for averaged entropy-regularized Q-learning with linear function approximation and polynomial stepsizes."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Entropy-regularized Q-learning with linear function approximation yields a Gaussian approximation bound of order n to the minus one-fourth for Polyak-Ruppert averaged iterates."}],"snapshot_sha256":"0f31c0d62c8a0aaa0f1f08aafdbc3921456b4adbdcdb447dd79b4a85b5c3e0e7"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[{"findings_count":0,"name":"doi_title_agreement","ran_at":"2026-05-19T22:31:19.443137Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"doi_compliance","ran_at":"2026-05-19T22:20:57.108737Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"cited_work_retraction","ran_at":"2026-05-19T21:51:58.976109Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"citation_quote_validity","ran_at":"2026-05-19T21:49:44.258233Z","status":"skipped","version":"0.1.0"},{"findings_count":0,"name":"ai_meta_artifact","ran_at":"2026-05-19T21:33:23.530051Z","status":"skipped","version":"1.0.0"},{"findings_count":0,"name":"claim_evidence","ran_at":"2026-05-19T21:21:57.442604Z","status":"completed","version":"1.0.0"}],"endpoint":"/pith/2605.17678/integrity.json","findings":[],"snapshot_sha256":"1058f9d7468c8f1729374a219f75350c86f515d40bfa1409982e837fb54b5cce","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"In this paper, we derive rates of convergence in the high-dimensional central limit theorem for Polyak--Ruppert averaged iterates generated by entropy-regularized asynchronous Q-learning with linear function approximation and a polynomial stepsize $k^{-\\omega}$, $\\omega \\in (1/2,1)$. Assuming that the sequence of observed triples $(s_k,a_k,s_{k+1})_{k \\geq 0}$ forms a uniformly geometrically ergodic Markov chain, and under suitable regularity conditions for the projected soft Bellman equation, we establish a Gaussian approximation bound in the convex distance with rate of order $n^{-1/4}$, up ","authors_text":"Alexey Naumov, Artemy Rubtsov, Eric Moulines, Rahul Singh, Sergey Samsonov","cross_cats":["cs.LG"],"headline":"Entropy-regularized Q-learning with linear function approximation yields a Gaussian approximation bound of order n to the minus one-fourth for Polyak-Ruppert averaged iterates.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2026-05-17T22:23:25Z","title":"On Gaussian approximation for entropy-regularized Q-learning with function approximation"},"references":{"count":40,"internal_anchors":3,"resolved_work":40,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"Residual algorithms: Reinforcement learning with function approximation","work_id":"7740df1b-123f-43d5-b998-329a7ac59906","year":1995},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"The reverse isoperimetric problem for gaussian measure.Discrete & Computational Geometry, 10(4):411–420, 1993","work_id":"e4f5d992-92af-405f-bd44-b7a85b0b2108","year":1993},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"Bertsekas and John N","work_id":"16043d2a-fad1-4af5-87e2-d17f27834c32","year":1996},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"Gaussian approximation for two-timescale linear stochastic approximation","work_id":"650395df-4448-4338-9dbb-5f6b8b6b0155","year":2026},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"Finite-sample analysis of nonlinear stochastic approximation with applications in reinforcement learning.Automatica, 146:110623, 2022","work_id":"fd03ab28-49e8-48e5-91ba-b2f06ab6e9d5","year":2022}],"snapshot_sha256":"0a9e7de1187c9a3d92c31e35262dcafa7453b07e3c7603b2645b25190b788e8f"},"source":{"id":"2605.17678","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-19T22:08:20.724234Z","id":"1fb85ef0-8a8a-4508-b400-fe18cf6333be","model_set":{"reader":"grok-4.3"},"one_line_summary":"Establishes n^{-1/4} Gaussian approximation in convex distance for averaged entropy-regularized Q-learning with linear function approximation and polynomial stepsizes.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Entropy-regularized Q-learning with linear function approximation yields a Gaussian approximation bound of order n to the minus one-fourth for Polyak-Ruppert averaged iterates.","strongest_claim":"We establish a Gaussian approximation bound in the convex distance with rate of order n^{-1/4}, up to polylogarithmic factors in n, for the Polyak-Ruppert averaged iterates.","weakest_assumption":"The sequence of observed triples (s_k, a_k, s_{k+1}) forms a uniformly geometrically ergodic Markov chain, together with suitable regularity conditions for the projected soft Bellman equation."}},"verdict_id":"1fb85ef0-8a8a-4508-b400-fe18cf6333be"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:8269eae5d5a45817d229e587fa056d0c81ff1b1fe820193ba7cfe1dfb77afb03","target":"record","created_at":"2026-05-20T00:04:52Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"305751d645b8169b31a068444e82fb41a3ed378b60a9bc5ae42b6d0c994f3642","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2026-05-17T22:23:25Z","title_canon_sha256":"f8d624e8e6a49e2ee9564ecac766377ee3c3d9631205e613db9d7bb93bc7a742"},"schema_version":"1.0","source":{"id":"2605.17678","kind":"arxiv","version":1}},"canonical_sha256":"f86e0a7f0f5d2d1bcf01ebf76344a3256edac56fc99daef1829bc0016618f5a9","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"f86e0a7f0f5d2d1bcf01ebf76344a3256edac56fc99daef1829bc0016618f5a9","first_computed_at":"2026-05-20T00:04:52.300419Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-20T00:04:52.300419Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"AZj+t5HThpEGNo/ycudpYmS5W210FQrXDORlj2hTNHcae2UZ5vAyT8mK2l0EkCg9ecvl9hKjLua9A2QKz8qVBg==","signature_status":"signed_v1","signed_at":"2026-05-20T00:04:52.301246Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.17678","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:8269eae5d5a45817d229e587fa056d0c81ff1b1fe820193ba7cfe1dfb77afb03","sha256:07727e5ef2cb8d6ccf62c984677affad46804f3bd12e2bb421eb95a687320b4b"],"state_sha256":"bfc859798e3c86aae844a489f5d57bc36cb9688d3ead400dc32db403dfbc53f5"}