{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2014:FU2U6JQM7GS5LZBMZK7CWSUBDZ","short_pith_number":"pith:FU2U6JQM","schema_version":"1.0","canonical_sha256":"2d354f260cf9a5d5e42ccabe2b4a811e5441fa2f7578f110ae71c30a315b296c","source":{"kind":"arxiv","id":"1410.2954","version":1},"attestation_state":"computed","paper":{"title":"Q-learning for Optimal Control of Continuous-time Systems","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.SY","authors_text":"Biao Luo, Derong Liu, Tingwen Huang","submitted_at":"2014-10-11T06:06:20Z","abstract_excerpt":"In this paper, two Q-learning (QL) methods are proposed and their convergence theories are established for addressing the model-free optimal control problem of general nonlinear continuous-time systems. By introducing the Q-function for continuous-time systems, policy iteration based QL (PIQL) and value iteration based QL (VIQL) algorithms are proposed for learning the optimal control policy from real system data rather than using mathematical system model. It is proved that both PIQL and VIQL methods generate a nonincreasing Q-function sequence, which converges to the optimal Q-function. For "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1410.2954","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.SY","submitted_at":"2014-10-11T06:06:20Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"5d1433bb80c6b9c9faf7b6647b2e72b2597b12162d1c71dc23b9540b895778a2","abstract_canon_sha256":"2db34d9434daa8e24509f43b5786c1f8bef335661e115fa3a225582d65e6a30a"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T02:40:17.395805Z","signature_b64":"NzZvaFguF9u945hcNqVgJDjW1auLDPZTw0zngDC4GWXRGSPIByUkxPkxckSGOiOrresCZTlAwgaE3e1GSgDeBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"2d354f260cf9a5d5e42ccabe2b4a811e5441fa2f7578f110ae71c30a315b296c","last_reissued_at":"2026-05-18T02:40:17.395366Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T02:40:17.395366Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Q-learning for Optimal Control of Continuous-time Systems","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.SY","authors_text":"Biao Luo, Derong Liu, Tingwen Huang","submitted_at":"2014-10-11T06:06:20Z","abstract_excerpt":"In this paper, two Q-learning (QL) methods are proposed and their convergence theories are established for addressing the model-free optimal control problem of general nonlinear continuous-time systems. By introducing the Q-function for continuous-time systems, policy iteration based QL (PIQL) and value iteration based QL (VIQL) algorithms are proposed for learning the optimal control policy from real system data rather than using mathematical system model. It is proved that both PIQL and VIQL methods generate a nonincreasing Q-function sequence, which converges to the optimal Q-function. For "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1410.2954","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1410.2954","created_at":"2026-05-18T02:40:17.395435+00:00"},{"alias_kind":"arxiv_version","alias_value":"1410.2954v1","created_at":"2026-05-18T02:40:17.395435+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1410.2954","created_at":"2026-05-18T02:40:17.395435+00:00"},{"alias_kind":"pith_short_12","alias_value":"FU2U6JQM7GS5","created_at":"2026-05-18T12:28:28.263976+00:00"},{"alias_kind":"pith_short_16","alias_value":"FU2U6JQM7GS5LZBM","created_at":"2026-05-18T12:28:28.263976+00:00"},{"alias_kind":"pith_short_8","alias_value":"FU2U6JQM","created_at":"2026-05-18T12:28:28.263976+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/FU2U6JQM7GS5LZBMZK7CWSUBDZ","json":"https://pith.science/pith/FU2U6JQM7GS5LZBMZK7CWSUBDZ.json","graph_json":"https://pith.science/api/pith-number/FU2U6JQM7GS5LZBMZK7CWSUBDZ/graph.json","events_json":"https://pith.science/api/pith-number/FU2U6JQM7GS5LZBMZK7CWSUBDZ/events.json","paper":"https://pith.science/paper/FU2U6JQM"},"agent_actions":{"view_html":"https://pith.science/pith/FU2U6JQM7GS5LZBMZK7CWSUBDZ","download_json":"https://pith.science/pith/FU2U6JQM7GS5LZBMZK7CWSUBDZ.json","view_paper":"https://pith.science/paper/FU2U6JQM","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1410.2954&json=true","fetch_graph":"https://pith.science/api/pith-number/FU2U6JQM7GS5LZBMZK7CWSUBDZ/graph.json","fetch_events":"https://pith.science/api/pith-number/FU2U6JQM7GS5LZBMZK7CWSUBDZ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/FU2U6JQM7GS5LZBMZK7CWSUBDZ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/FU2U6JQM7GS5LZBMZK7CWSUBDZ/action/storage_attestation","attest_author":"https://pith.science/pith/FU2U6JQM7GS5LZBMZK7CWSUBDZ/action/author_attestation","sign_citation":"https://pith.science/pith/FU2U6JQM7GS5LZBMZK7CWSUBDZ/action/citation_signature","submit_replication":"https://pith.science/pith/FU2U6JQM7GS5LZBMZK7CWSUBDZ/action/replication_record"}},"created_at":"2026-05-18T02:40:17.395435+00:00","updated_at":"2026-05-18T02:40:17.395435+00:00"}