{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2012:KCZW2R2ANXC73JX2TIB4TGTD4T","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"a5d210410102dfec4c5d8040c3ff1bee23f836fe737c9d5089d9ee01c31d9c41","cross_cats_sorted":["cs.LG","cs.SY"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2012-06-13T15:45:04Z","title_canon_sha256":"2e26ee77a84622563091aeab4e6b01cd92011d95b14a1feecc6e4eb6a9031802"},"schema_version":"1.0","source":{"id":"1206.3285","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1206.3285","created_at":"2026-05-18T03:53:30Z"},{"alias_kind":"arxiv_version","alias_value":"1206.3285v1","created_at":"2026-05-18T03:53:30Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1206.3285","created_at":"2026-05-18T03:53:30Z"},{"alias_kind":"pith_short_12","alias_value":"KCZW2R2ANXC7","created_at":"2026-05-18T12:27:11Z"},{"alias_kind":"pith_short_16","alias_value":"KCZW2R2ANXC73JX2","created_at":"2026-05-18T12:27:11Z"},{"alias_kind":"pith_short_8","alias_value":"KCZW2R2A","created_at":"2026-05-18T12:27:11Z"}],"graph_snapshots":[{"event_id":"sha256:940b1db6dffb56740dbe7a2ea9fafca9fead9c230f02cb908a5dae8888b03095","target":"graph","created_at":"2026-05-18T03:53:30Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We consider the problem of efficiently learning optimal control policies and value functions over large state spaces in an online setting in which estimates must be available after each interaction with the world. This paper develops an explicitly model-based approach extending the Dyna architecture to linear function approximation. Dynastyle planning proceeds by generating imaginary experience from the world model and then applying model-free reinforcement learning algorithms to the imagined state transitions. Our main results are to prove that linear Dyna-style planning converges to a unique","authors_text":"Alborz Geramifard, Csaba Szepesvari, Michael P. Bowling, Richard S. Sutton","cross_cats":["cs.LG","cs.SY"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2012-06-13T15:45:04Z","title":"Dyna-Style Planning with Linear Function Approximation and Prioritized Sweeping"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1206.3285","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:0bf25bec0608184c96477de4a489df593a87adb3e5dcf6527c202350ff3a4c8f","target":"record","created_at":"2026-05-18T03:53:30Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"a5d210410102dfec4c5d8040c3ff1bee23f836fe737c9d5089d9ee01c31d9c41","cross_cats_sorted":["cs.LG","cs.SY"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2012-06-13T15:45:04Z","title_canon_sha256":"2e26ee77a84622563091aeab4e6b01cd92011d95b14a1feecc6e4eb6a9031802"},"schema_version":"1.0","source":{"id":"1206.3285","kind":"arxiv","version":1}},"canonical_sha256":"50b36d47406dc5fda6fa9a03c99a63e4e4d9427f8bdbfa2a2e9e87259c56f43a","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"50b36d47406dc5fda6fa9a03c99a63e4e4d9427f8bdbfa2a2e9e87259c56f43a","first_computed_at":"2026-05-18T03:53:30.748538Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T03:53:30.748538Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"hOwUVlSJ3b7SC1p6pBhltA2A8hX1jv4Nv04iWuBsxGriV1egJt+ia3EPwH1IB5eb6YiZCfOcPrqjTRbmhLk1CQ==","signature_status":"signed_v1","signed_at":"2026-05-18T03:53:30.749082Z","signed_message":"canonical_sha256_bytes"},"source_id":"1206.3285","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:0bf25bec0608184c96477de4a489df593a87adb3e5dcf6527c202350ff3a4c8f","sha256:940b1db6dffb56740dbe7a2ea9fafca9fead9c230f02cb908a5dae8888b03095"],"state_sha256":"a25a44786d0bb20e64b897901dd7b470732df6f2f66c872472237d302137cfb2"}