{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2015:ATJUTU3PCJBJLCYTE5UQAUI6II","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"8006a85e8c849178a9037e534bef67a042614fae17f482ece0ae2aa78e5e8b61","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2015-12-13T17:13:33Z","title_canon_sha256":"951503db87114e0a7676bbfbfb267b68a3315dea957fd25255d184d40bb3b0de"},"schema_version":"1.0","source":{"id":"1512.04087","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1512.04087","created_at":"2026-05-18T01:04:57Z"},{"alias_kind":"arxiv_version","alias_value":"1512.04087v2","created_at":"2026-05-18T01:04:57Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1512.04087","created_at":"2026-05-18T01:04:57Z"},{"alias_kind":"pith_short_12","alias_value":"ATJUTU3PCJBJ","created_at":"2026-05-18T12:29:10Z"},{"alias_kind":"pith_short_16","alias_value":"ATJUTU3PCJBJLCYT","created_at":"2026-05-18T12:29:10Z"},{"alias_kind":"pith_short_8","alias_value":"ATJUTU3P","created_at":"2026-05-18T12:29:10Z"}],"graph_snapshots":[{"event_id":"sha256:e075a819bae0edabbb6392a726bdca53fea51dc85373c883c6d0ede2cf66efa8","target":"graph","created_at":"2026-05-18T01:04:57Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"The temporal-difference methods TD($\\lambda$) and Sarsa($\\lambda$) form a core part of modern reinforcement learning. Their appeal comes from their good performance, low computational cost, and their simple interpretation, given by their forward view. Recently, new versions of these methods were introduced, called true online TD($\\lambda$) and true online Sarsa($\\lambda$), respectively (van Seijen & Sutton, 2014). These new versions maintain an exact equivalence with the forward view at all times, whereas the traditional versions only approximate it for small step-sizes. We hypothesize that th","authors_text":"A. Rupam Mahmood, Harm van Seijen, Marlos C. Machado, Patrick M. Pilarski, Richard S. Sutton","cross_cats":["cs.LG"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2015-12-13T17:13:33Z","title":"True Online Temporal-Difference Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1512.04087","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:fcfffe892ee1cb3dfe10d1169e6fdab198787861bea12739139216c57545339f","target":"record","created_at":"2026-05-18T01:04:57Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"8006a85e8c849178a9037e534bef67a042614fae17f482ece0ae2aa78e5e8b61","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2015-12-13T17:13:33Z","title_canon_sha256":"951503db87114e0a7676bbfbfb267b68a3315dea957fd25255d184d40bb3b0de"},"schema_version":"1.0","source":{"id":"1512.04087","kind":"arxiv","version":2}},"canonical_sha256":"04d349d36f1242958b13276900511e421ff0f86afeb42a055d106582a2cad44e","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"04d349d36f1242958b13276900511e421ff0f86afeb42a055d106582a2cad44e","first_computed_at":"2026-05-18T01:04:57.708375Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T01:04:57.708375Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"fL6/iWGpq3JjmeI7aEKaXKUMXC/MGshnhhCDATQM7CNhtxy7Vqv9PyJMql9EblJV2iPzRngRePQ1wu+d/YreAg==","signature_status":"signed_v1","signed_at":"2026-05-18T01:04:57.708947Z","signed_message":"canonical_sha256_bytes"},"source_id":"1512.04087","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:fcfffe892ee1cb3dfe10d1169e6fdab198787861bea12739139216c57545339f","sha256:e075a819bae0edabbb6392a726bdca53fea51dc85373c883c6d0ede2cf66efa8"],"state_sha256":"0ad958691da698b13a0850e39c69d49fcea9d69d9c334151ed9ebe14a48cc67f"}