{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2024:DOCLTGMLIFL2Y5LX7PZZVCSHCY","short_pith_number":"pith:DOCLTGML","schema_version":"1.0","canonical_sha256":"1b84b9998b4157ac7577fbf39a8a47162c2bdf135d5a98780dffcd3fb538c089","source":{"kind":"arxiv","id":"2409.03915","version":3},"attestation_state":"computed","paper":{"title":"Asynchronous Stochastic Approximation with Applications to Average-Reward Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["math.OC"],"primary_cat":"cs.LG","authors_text":"Huizhen Yu, Richard S. Sutton, Yi Wan","submitted_at":"2024-09-05T21:23:51Z","abstract_excerpt":"This paper investigates the stability and convergence properties of asynchronous stochastic approximation (SA) algorithms, with a focus on extensions relevant to average-reward reinforcement learning. We first extend a stability proof method of Borkar and Meyn to accommodate more general noise conditions than previously considered, thereby yielding broader convergence guarantees for asynchronous SA. To sharpen the convergence analysis, we further examine the shadowing properties of asynchronous SA, building on a dynamical systems approach of Hirsch and Bena\\\"{i}m. These results provide a theor"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2409.03915","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2024-09-05T21:23:51Z","cross_cats_sorted":["math.OC"],"title_canon_sha256":"e61ab771f677e6874f62f2903a9094db3eeb8b327f9c1dc93efe66ff7328dee1","abstract_canon_sha256":"dfd8f38b7d3d0efe2194b166e65e7a9f63ea21cdc55d547b0e563a0bd52ad591"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-02T01:03:29.218353Z","signature_b64":"4QPdzG/prqdP+deUuLnAbLSxtUhQY6i6rbn4GZp/iWYIsezWZ3QMWj0/qhCKbDAE/EeV80winG6h3FAo5FiTBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"1b84b9998b4157ac7577fbf39a8a47162c2bdf135d5a98780dffcd3fb538c089","last_reissued_at":"2026-06-02T01:03:29.217931Z","signature_status":"signed_v1","first_computed_at":"2026-06-02T01:03:29.217931Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Asynchronous Stochastic Approximation with Applications to Average-Reward Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["math.OC"],"primary_cat":"cs.LG","authors_text":"Huizhen Yu, Richard S. Sutton, Yi Wan","submitted_at":"2024-09-05T21:23:51Z","abstract_excerpt":"This paper investigates the stability and convergence properties of asynchronous stochastic approximation (SA) algorithms, with a focus on extensions relevant to average-reward reinforcement learning. We first extend a stability proof method of Borkar and Meyn to accommodate more general noise conditions than previously considered, thereby yielding broader convergence guarantees for asynchronous SA. To sharpen the convergence analysis, we further examine the shadowing properties of asynchronous SA, building on a dynamical systems approach of Hirsch and Bena\\\"{i}m. These results provide a theor"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2409.03915","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2409.03915/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2409.03915","created_at":"2026-06-02T01:03:29.217987+00:00"},{"alias_kind":"arxiv_version","alias_value":"2409.03915v3","created_at":"2026-06-02T01:03:29.217987+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2409.03915","created_at":"2026-06-02T01:03:29.217987+00:00"},{"alias_kind":"pith_short_12","alias_value":"DOCLTGMLIFL2","created_at":"2026-06-02T01:03:29.217987+00:00"},{"alias_kind":"pith_short_16","alias_value":"DOCLTGMLIFL2Y5LX","created_at":"2026-06-02T01:03:29.217987+00:00"},{"alias_kind":"pith_short_8","alias_value":"DOCLTGML","created_at":"2026-06-02T01:03:29.217987+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"2504.18743","citing_title":"From Set Convergence to Pointwise Convergence: Finite-Time Guarantees for Average-Reward Q-Learning with Adaptive Stepsizes","ref_index":83,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/DOCLTGMLIFL2Y5LX7PZZVCSHCY","json":"https://pith.science/pith/DOCLTGMLIFL2Y5LX7PZZVCSHCY.json","graph_json":"https://pith.science/api/pith-number/DOCLTGMLIFL2Y5LX7PZZVCSHCY/graph.json","events_json":"https://pith.science/api/pith-number/DOCLTGMLIFL2Y5LX7PZZVCSHCY/events.json","paper":"https://pith.science/paper/DOCLTGML"},"agent_actions":{"view_html":"https://pith.science/pith/DOCLTGMLIFL2Y5LX7PZZVCSHCY","download_json":"https://pith.science/pith/DOCLTGMLIFL2Y5LX7PZZVCSHCY.json","view_paper":"https://pith.science/paper/DOCLTGML","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2409.03915&json=true","fetch_graph":"https://pith.science/api/pith-number/DOCLTGMLIFL2Y5LX7PZZVCSHCY/graph.json","fetch_events":"https://pith.science/api/pith-number/DOCLTGMLIFL2Y5LX7PZZVCSHCY/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/DOCLTGMLIFL2Y5LX7PZZVCSHCY/action/timestamp_anchor","attest_storage":"https://pith.science/pith/DOCLTGMLIFL2Y5LX7PZZVCSHCY/action/storage_attestation","attest_author":"https://pith.science/pith/DOCLTGMLIFL2Y5LX7PZZVCSHCY/action/author_attestation","sign_citation":"https://pith.science/pith/DOCLTGMLIFL2Y5LX7PZZVCSHCY/action/citation_signature","submit_replication":"https://pith.science/pith/DOCLTGMLIFL2Y5LX7PZZVCSHCY/action/replication_record"}},"created_at":"2026-06-02T01:03:29.217987+00:00","updated_at":"2026-06-02T01:03:29.217987+00:00"}