{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:LEYTE3NPACE2RSY2RLYLVC7Q7R","short_pith_number":"pith:LEYTE3NP","schema_version":"1.0","canonical_sha256":"5931326daf0089a8cb1a8af0ba8bf0fc676fa1b52c906ec26661b333a61ab846","source":{"kind":"arxiv","id":"1712.08642","version":1},"attestation_state":"computed","paper":{"title":"Least-Squares Temporal Difference Learning for the Linear Quadratic Regulator","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Benjamin Recht, Stephen Tu","submitted_at":"2017-12-22T20:12:07Z","abstract_excerpt":"Reinforcement learning (RL) has been successfully used to solve many continuous control tasks. Despite its impressive results however, fundamental questions regarding the sample complexity of RL on continuous problems remain open. We study the performance of RL in this setting by considering the behavior of the Least-Squares Temporal Difference (LSTD) estimator on the classic Linear Quadratic Regulator (LQR) problem from optimal control. We give the first finite-time analysis of the number of samples needed to estimate the value function for a fixed static state-feedback policy to within $\\var"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1712.08642","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-12-22T20:12:07Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"4610a200418183c7797823f92b17f7c88e77a27d54909efdc8a0b38bb1ae3a88","abstract_canon_sha256":"52ebecafe6689058ec8d1b00621d2b78a17b0aa0bdd9ae6cb523c50f98a8df74"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:27:17.140213Z","signature_b64":"CadwESVe86C0HqJdiJzXCxh4OZDp6TlUz0BGj+w29hdKo7z5Sjx+UovStvDFvmGp3wDP0Kudj/ygG/n5Du7VCQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"5931326daf0089a8cb1a8af0ba8bf0fc676fa1b52c906ec26661b333a61ab846","last_reissued_at":"2026-05-18T00:27:17.139672Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:27:17.139672Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Least-Squares Temporal Difference Learning for the Linear Quadratic Regulator","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Benjamin Recht, Stephen Tu","submitted_at":"2017-12-22T20:12:07Z","abstract_excerpt":"Reinforcement learning (RL) has been successfully used to solve many continuous control tasks. Despite its impressive results however, fundamental questions regarding the sample complexity of RL on continuous problems remain open. We study the performance of RL in this setting by considering the behavior of the Least-Squares Temporal Difference (LSTD) estimator on the classic Linear Quadratic Regulator (LQR) problem from optimal control. We give the first finite-time analysis of the number of samples needed to estimate the value function for a fixed static state-feedback policy to within $\\var"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1712.08642","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1712.08642","created_at":"2026-05-18T00:27:17.139736+00:00"},{"alias_kind":"arxiv_version","alias_value":"1712.08642v1","created_at":"2026-05-18T00:27:17.139736+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1712.08642","created_at":"2026-05-18T00:27:17.139736+00:00"},{"alias_kind":"pith_short_12","alias_value":"LEYTE3NPACE2","created_at":"2026-05-18T12:31:28.150371+00:00"},{"alias_kind":"pith_short_16","alias_value":"LEYTE3NPACE2RSY2","created_at":"2026-05-18T12:31:28.150371+00:00"},{"alias_kind":"pith_short_8","alias_value":"LEYTE3NP","created_at":"2026-05-18T12:31:28.150371+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/LEYTE3NPACE2RSY2RLYLVC7Q7R","json":"https://pith.science/pith/LEYTE3NPACE2RSY2RLYLVC7Q7R.json","graph_json":"https://pith.science/api/pith-number/LEYTE3NPACE2RSY2RLYLVC7Q7R/graph.json","events_json":"https://pith.science/api/pith-number/LEYTE3NPACE2RSY2RLYLVC7Q7R/events.json","paper":"https://pith.science/paper/LEYTE3NP"},"agent_actions":{"view_html":"https://pith.science/pith/LEYTE3NPACE2RSY2RLYLVC7Q7R","download_json":"https://pith.science/pith/LEYTE3NPACE2RSY2RLYLVC7Q7R.json","view_paper":"https://pith.science/paper/LEYTE3NP","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1712.08642&json=true","fetch_graph":"https://pith.science/api/pith-number/LEYTE3NPACE2RSY2RLYLVC7Q7R/graph.json","fetch_events":"https://pith.science/api/pith-number/LEYTE3NPACE2RSY2RLYLVC7Q7R/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/LEYTE3NPACE2RSY2RLYLVC7Q7R/action/timestamp_anchor","attest_storage":"https://pith.science/pith/LEYTE3NPACE2RSY2RLYLVC7Q7R/action/storage_attestation","attest_author":"https://pith.science/pith/LEYTE3NPACE2RSY2RLYLVC7Q7R/action/author_attestation","sign_citation":"https://pith.science/pith/LEYTE3NPACE2RSY2RLYLVC7Q7R/action/citation_signature","submit_replication":"https://pith.science/pith/LEYTE3NPACE2RSY2RLYLVC7Q7R/action/replication_record"}},"created_at":"2026-05-18T00:27:17.139736+00:00","updated_at":"2026-05-18T00:27:17.139736+00:00"}