{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:LEYTE3NPACE2RSY2RLYLVC7Q7R","short_pith_number":"pith:LEYTE3NP","canonical_record":{"source":{"id":"1712.08642","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-12-22T20:12:07Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"4610a200418183c7797823f92b17f7c88e77a27d54909efdc8a0b38bb1ae3a88","abstract_canon_sha256":"52ebecafe6689058ec8d1b00621d2b78a17b0aa0bdd9ae6cb523c50f98a8df74"},"schema_version":"1.0"},"canonical_sha256":"5931326daf0089a8cb1a8af0ba8bf0fc676fa1b52c906ec26661b333a61ab846","source":{"kind":"arxiv","id":"1712.08642","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1712.08642","created_at":"2026-05-18T00:27:17Z"},{"alias_kind":"arxiv_version","alias_value":"1712.08642v1","created_at":"2026-05-18T00:27:17Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1712.08642","created_at":"2026-05-18T00:27:17Z"},{"alias_kind":"pith_short_12","alias_value":"LEYTE3NPACE2","created_at":"2026-05-18T12:31:28Z"},{"alias_kind":"pith_short_16","alias_value":"LEYTE3NPACE2RSY2","created_at":"2026-05-18T12:31:28Z"},{"alias_kind":"pith_short_8","alias_value":"LEYTE3NP","created_at":"2026-05-18T12:31:28Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:LEYTE3NPACE2RSY2RLYLVC7Q7R","target":"record","payload":{"canonical_record":{"source":{"id":"1712.08642","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-12-22T20:12:07Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"4610a200418183c7797823f92b17f7c88e77a27d54909efdc8a0b38bb1ae3a88","abstract_canon_sha256":"52ebecafe6689058ec8d1b00621d2b78a17b0aa0bdd9ae6cb523c50f98a8df74"},"schema_version":"1.0"},"canonical_sha256":"5931326daf0089a8cb1a8af0ba8bf0fc676fa1b52c906ec26661b333a61ab846","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:27:17.140213Z","signature_b64":"CadwESVe86C0HqJdiJzXCxh4OZDp6TlUz0BGj+w29hdKo7z5Sjx+UovStvDFvmGp3wDP0Kudj/ygG/n5Du7VCQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"5931326daf0089a8cb1a8af0ba8bf0fc676fa1b52c906ec26661b333a61ab846","last_reissued_at":"2026-05-18T00:27:17.139672Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:27:17.139672Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1712.08642","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:27:17Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"77DW3J0PTMTwMGPgPcFbgLUAL44+/64wpeYTcOBgKJz0ohMeoltMYOjzhGVE34ZwW4nQ/8pw/XkaRe5VmYl7AA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-05T14:53:27.059448Z"},"content_sha256":"7a53e157e858225296ffc5d9f3cd5c7f706033494b8c08cf248e95f1e0fbc0ba","schema_version":"1.0","event_id":"sha256:7a53e157e858225296ffc5d9f3cd5c7f706033494b8c08cf248e95f1e0fbc0ba"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:LEYTE3NPACE2RSY2RLYLVC7Q7R","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Least-Squares Temporal Difference Learning for the Linear Quadratic Regulator","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Benjamin Recht, Stephen Tu","submitted_at":"2017-12-22T20:12:07Z","abstract_excerpt":"Reinforcement learning (RL) has been successfully used to solve many continuous control tasks. Despite its impressive results however, fundamental questions regarding the sample complexity of RL on continuous problems remain open. We study the performance of RL in this setting by considering the behavior of the Least-Squares Temporal Difference (LSTD) estimator on the classic Linear Quadratic Regulator (LQR) problem from optimal control. We give the first finite-time analysis of the number of samples needed to estimate the value function for a fixed static state-feedback policy to within $\\var"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1712.08642","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:27:17Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"8EJjMs+T0nR2Xny2bSVRZ723HLg1zaqTN4G0d5zlPDxXSSAIOarS9D6hBHweNvTd0iQBzSv7PwR9v0hHpQ4rCA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-05T14:53:27.059791Z"},"content_sha256":"e04c35cb6ab54ff32c9ef5074f3b341ebace23aa9b87b4eea91ce991fe06de8b","schema_version":"1.0","event_id":"sha256:e04c35cb6ab54ff32c9ef5074f3b341ebace23aa9b87b4eea91ce991fe06de8b"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/LEYTE3NPACE2RSY2RLYLVC7Q7R/bundle.json","state_url":"https://pith.science/pith/LEYTE3NPACE2RSY2RLYLVC7Q7R/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/LEYTE3NPACE2RSY2RLYLVC7Q7R/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-05T14:53:27Z","links":{"resolver":"https://pith.science/pith/LEYTE3NPACE2RSY2RLYLVC7Q7R","bundle":"https://pith.science/pith/LEYTE3NPACE2RSY2RLYLVC7Q7R/bundle.json","state":"https://pith.science/pith/LEYTE3NPACE2RSY2RLYLVC7Q7R/state.json","well_known_bundle":"https://pith.science/.well-known/pith/LEYTE3NPACE2RSY2RLYLVC7Q7R/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:LEYTE3NPACE2RSY2RLYLVC7Q7R","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"52ebecafe6689058ec8d1b00621d2b78a17b0aa0bdd9ae6cb523c50f98a8df74","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-12-22T20:12:07Z","title_canon_sha256":"4610a200418183c7797823f92b17f7c88e77a27d54909efdc8a0b38bb1ae3a88"},"schema_version":"1.0","source":{"id":"1712.08642","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1712.08642","created_at":"2026-05-18T00:27:17Z"},{"alias_kind":"arxiv_version","alias_value":"1712.08642v1","created_at":"2026-05-18T00:27:17Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1712.08642","created_at":"2026-05-18T00:27:17Z"},{"alias_kind":"pith_short_12","alias_value":"LEYTE3NPACE2","created_at":"2026-05-18T12:31:28Z"},{"alias_kind":"pith_short_16","alias_value":"LEYTE3NPACE2RSY2","created_at":"2026-05-18T12:31:28Z"},{"alias_kind":"pith_short_8","alias_value":"LEYTE3NP","created_at":"2026-05-18T12:31:28Z"}],"graph_snapshots":[{"event_id":"sha256:e04c35cb6ab54ff32c9ef5074f3b341ebace23aa9b87b4eea91ce991fe06de8b","target":"graph","created_at":"2026-05-18T00:27:17Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Reinforcement learning (RL) has been successfully used to solve many continuous control tasks. Despite its impressive results however, fundamental questions regarding the sample complexity of RL on continuous problems remain open. We study the performance of RL in this setting by considering the behavior of the Least-Squares Temporal Difference (LSTD) estimator on the classic Linear Quadratic Regulator (LQR) problem from optimal control. We give the first finite-time analysis of the number of samples needed to estimate the value function for a fixed static state-feedback policy to within $\\var","authors_text":"Benjamin Recht, Stephen Tu","cross_cats":["stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-12-22T20:12:07Z","title":"Least-Squares Temporal Difference Learning for the Linear Quadratic Regulator"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1712.08642","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:7a53e157e858225296ffc5d9f3cd5c7f706033494b8c08cf248e95f1e0fbc0ba","target":"record","created_at":"2026-05-18T00:27:17Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"52ebecafe6689058ec8d1b00621d2b78a17b0aa0bdd9ae6cb523c50f98a8df74","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-12-22T20:12:07Z","title_canon_sha256":"4610a200418183c7797823f92b17f7c88e77a27d54909efdc8a0b38bb1ae3a88"},"schema_version":"1.0","source":{"id":"1712.08642","kind":"arxiv","version":1}},"canonical_sha256":"5931326daf0089a8cb1a8af0ba8bf0fc676fa1b52c906ec26661b333a61ab846","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"5931326daf0089a8cb1a8af0ba8bf0fc676fa1b52c906ec26661b333a61ab846","first_computed_at":"2026-05-18T00:27:17.139672Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:27:17.139672Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"CadwESVe86C0HqJdiJzXCxh4OZDp6TlUz0BGj+w29hdKo7z5Sjx+UovStvDFvmGp3wDP0Kudj/ygG/n5Du7VCQ==","signature_status":"signed_v1","signed_at":"2026-05-18T00:27:17.140213Z","signed_message":"canonical_sha256_bytes"},"source_id":"1712.08642","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:7a53e157e858225296ffc5d9f3cd5c7f706033494b8c08cf248e95f1e0fbc0ba","sha256:e04c35cb6ab54ff32c9ef5074f3b341ebace23aa9b87b4eea91ce991fe06de8b"],"state_sha256":"60cc5e63676f1cdc9d2adb2976b89a5c13a640d2ca834687088b0e7242affe3a"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"zCkcfmrW5Jh27ASIVEUo34XK0wHLw/IS4GqmEclI3pomRvyO4JdbE+EN2C5gALAEaVtCymOxHks0lmjl3N6tAw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-05T14:53:27.061877Z","bundle_sha256":"f4f9d6861f9d40a3524ad85d273f933f3bcd38fb952234fd427aeed6773cb7e4"}}