{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:3JIBQXNXVMN5E5C7EVBEUMQJCL","short_pith_number":"pith:3JIBQXNX","canonical_record":{"source":{"id":"1806.01175","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-06-04T16:16:51Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"3b0f178932c2420dcd59b760bdd1532f5f591ee7de593b11a5ed09ca0accd6c4","abstract_canon_sha256":"1293b61ab94d63dd9da576044b0353f2ec37d7d5d4551f50e76df4491d39a8cf"},"schema_version":"1.0"},"canonical_sha256":"da50185db7ab1bd2745f25424a320912f141bd36c9c8764ae9ff3ac00ed3ea7a","source":{"kind":"arxiv","id":"1806.01175","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1806.01175","created_at":"2026-05-18T00:14:17Z"},{"alias_kind":"arxiv_version","alias_value":"1806.01175v1","created_at":"2026-05-18T00:14:17Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1806.01175","created_at":"2026-05-18T00:14:17Z"},{"alias_kind":"pith_short_12","alias_value":"3JIBQXNXVMN5","created_at":"2026-05-18T12:32:02Z"},{"alias_kind":"pith_short_16","alias_value":"3JIBQXNXVMN5E5C7","created_at":"2026-05-18T12:32:02Z"},{"alias_kind":"pith_short_8","alias_value":"3JIBQXNX","created_at":"2026-05-18T12:32:02Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:3JIBQXNXVMN5E5C7EVBEUMQJCL","target":"record","payload":{"canonical_record":{"source":{"id":"1806.01175","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-06-04T16:16:51Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"3b0f178932c2420dcd59b760bdd1532f5f591ee7de593b11a5ed09ca0accd6c4","abstract_canon_sha256":"1293b61ab94d63dd9da576044b0353f2ec37d7d5d4551f50e76df4491d39a8cf"},"schema_version":"1.0"},"canonical_sha256":"da50185db7ab1bd2745f25424a320912f141bd36c9c8764ae9ff3ac00ed3ea7a","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:14:17.207129Z","signature_b64":"KdyZb+Kj8caZjZXUfpw6Nv5FqHajBzCihXsvxZLIr7MvKVzhhNRP8FdArnJ0acRjJE0KQ5ZMIZZukFrIVaoVCw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"da50185db7ab1bd2745f25424a320912f141bd36c9c8764ae9ff3ac00ed3ea7a","last_reissued_at":"2026-05-18T00:14:17.206601Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:14:17.206601Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1806.01175","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:14:17Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"8mvyuMdyQ2dKZnnd9RqetccLTdi+/habKFkmTKsoUnq2q6j3OSBd0mbPPFoQSkfMVaUh5IKU18vwXyyfVfGtBQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T22:16:49.999989Z"},"content_sha256":"bb35b8283b11cef95f72fffe231c7aaffbd6ef0c7f3dd6c30136fcc0a60ef9ac","schema_version":"1.0","event_id":"sha256:bb35b8283b11cef95f72fffe231c7aaffbd6ef0c7f3dd6c30136fcc0a60ef9ac"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:3JIBQXNXVMN5E5C7EVBEUMQJCL","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"TD or not TD: Analyzing the Role of Temporal Differencing in Deep Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Alexey Dosovitskiy, Artemij Amiranashvili, Thomas Brox, Vladlen Koltun","submitted_at":"2018-06-04T16:16:51Z","abstract_excerpt":"Our understanding of reinforcement learning (RL) has been shaped by theoretical and empirical results that were obtained decades ago using tabular representations and linear function approximators. These results suggest that RL methods that use temporal differencing (TD) are superior to direct Monte Carlo estimation (MC). How do these results hold up in deep RL, which deals with perceptually complex environments and deep nonlinear models? In this paper, we re-examine the role of TD in modern deep RL, using specially designed environments that control for specific factors that affect performanc"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1806.01175","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:14:17Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"qOf88Jq50IrfyxjuteJuuUDHHabtpbn0HjdIf+ULsfouRxOrcXknZgaJAI7eAmsJD/jr1+D22egDWLR7+EneBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T22:16:50.000715Z"},"content_sha256":"8e8b716353ad88c3d60f6b2479fdfd14d8fcfc575e536ecd19b240e383448d91","schema_version":"1.0","event_id":"sha256:8e8b716353ad88c3d60f6b2479fdfd14d8fcfc575e536ecd19b240e383448d91"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/3JIBQXNXVMN5E5C7EVBEUMQJCL/bundle.json","state_url":"https://pith.science/pith/3JIBQXNXVMN5E5C7EVBEUMQJCL/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/3JIBQXNXVMN5E5C7EVBEUMQJCL/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-31T22:16:50Z","links":{"resolver":"https://pith.science/pith/3JIBQXNXVMN5E5C7EVBEUMQJCL","bundle":"https://pith.science/pith/3JIBQXNXVMN5E5C7EVBEUMQJCL/bundle.json","state":"https://pith.science/pith/3JIBQXNXVMN5E5C7EVBEUMQJCL/state.json","well_known_bundle":"https://pith.science/.well-known/pith/3JIBQXNXVMN5E5C7EVBEUMQJCL/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:3JIBQXNXVMN5E5C7EVBEUMQJCL","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"1293b61ab94d63dd9da576044b0353f2ec37d7d5d4551f50e76df4491d39a8cf","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-06-04T16:16:51Z","title_canon_sha256":"3b0f178932c2420dcd59b760bdd1532f5f591ee7de593b11a5ed09ca0accd6c4"},"schema_version":"1.0","source":{"id":"1806.01175","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1806.01175","created_at":"2026-05-18T00:14:17Z"},{"alias_kind":"arxiv_version","alias_value":"1806.01175v1","created_at":"2026-05-18T00:14:17Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1806.01175","created_at":"2026-05-18T00:14:17Z"},{"alias_kind":"pith_short_12","alias_value":"3JIBQXNXVMN5","created_at":"2026-05-18T12:32:02Z"},{"alias_kind":"pith_short_16","alias_value":"3JIBQXNXVMN5E5C7","created_at":"2026-05-18T12:32:02Z"},{"alias_kind":"pith_short_8","alias_value":"3JIBQXNX","created_at":"2026-05-18T12:32:02Z"}],"graph_snapshots":[{"event_id":"sha256:8e8b716353ad88c3d60f6b2479fdfd14d8fcfc575e536ecd19b240e383448d91","target":"graph","created_at":"2026-05-18T00:14:17Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Our understanding of reinforcement learning (RL) has been shaped by theoretical and empirical results that were obtained decades ago using tabular representations and linear function approximators. These results suggest that RL methods that use temporal differencing (TD) are superior to direct Monte Carlo estimation (MC). How do these results hold up in deep RL, which deals with perceptually complex environments and deep nonlinear models? In this paper, we re-examine the role of TD in modern deep RL, using specially designed environments that control for specific factors that affect performanc","authors_text":"Alexey Dosovitskiy, Artemij Amiranashvili, Thomas Brox, Vladlen Koltun","cross_cats":["cs.AI","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-06-04T16:16:51Z","title":"TD or not TD: Analyzing the Role of Temporal Differencing in Deep Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1806.01175","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:bb35b8283b11cef95f72fffe231c7aaffbd6ef0c7f3dd6c30136fcc0a60ef9ac","target":"record","created_at":"2026-05-18T00:14:17Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"1293b61ab94d63dd9da576044b0353f2ec37d7d5d4551f50e76df4491d39a8cf","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-06-04T16:16:51Z","title_canon_sha256":"3b0f178932c2420dcd59b760bdd1532f5f591ee7de593b11a5ed09ca0accd6c4"},"schema_version":"1.0","source":{"id":"1806.01175","kind":"arxiv","version":1}},"canonical_sha256":"da50185db7ab1bd2745f25424a320912f141bd36c9c8764ae9ff3ac00ed3ea7a","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"da50185db7ab1bd2745f25424a320912f141bd36c9c8764ae9ff3ac00ed3ea7a","first_computed_at":"2026-05-18T00:14:17.206601Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:14:17.206601Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"KdyZb+Kj8caZjZXUfpw6Nv5FqHajBzCihXsvxZLIr7MvKVzhhNRP8FdArnJ0acRjJE0KQ5ZMIZZukFrIVaoVCw==","signature_status":"signed_v1","signed_at":"2026-05-18T00:14:17.207129Z","signed_message":"canonical_sha256_bytes"},"source_id":"1806.01175","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:bb35b8283b11cef95f72fffe231c7aaffbd6ef0c7f3dd6c30136fcc0a60ef9ac","sha256:8e8b716353ad88c3d60f6b2479fdfd14d8fcfc575e536ecd19b240e383448d91"],"state_sha256":"a1601b5c0306d2357ba1b7d40c668512a6b4069e7363997681b6e8d503241c4a"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"MM2uTZRTZgrQdshzLbTq16bAlqA4dYAa8avw1LyKQ3xzuIHFDIJKvdT38tkUj0oKf5744+f5FFBkG04FZEBmAQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-31T22:16:50.004644Z","bundle_sha256":"0a486657535841f3f5e2e58794e12f84c9e79609b2c1b72adeb86a9089bd2c85"}}