{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:PBCCOPAWRYZSAK5LKT5OF6ADIK","short_pith_number":"pith:PBCCOPAW","canonical_record":{"source":{"id":"2603.07833","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.LG","submitted_at":"2026-03-08T22:32:15Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"58cb096c3b848f4b6416f53316d28dfb6a73f1bbe740f237ca51e916c82e9bc5","abstract_canon_sha256":"0ca02a3b61d2a42b6a813a19748660ebb62740cc78a6b57920cdd07b02ac0ab8"},"schema_version":"1.0"},"canonical_sha256":"7844273c168e33202bab54fae2f80342b2a19f4e770f4ac12d3443bd35f28ab3","source":{"kind":"arxiv","id":"2603.07833","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2603.07833","created_at":"2026-05-17T23:38:59Z"},{"alias_kind":"arxiv_version","alias_value":"2603.07833v2","created_at":"2026-05-17T23:38:59Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2603.07833","created_at":"2026-05-17T23:38:59Z"},{"alias_kind":"pith_short_12","alias_value":"PBCCOPAWRYZS","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"PBCCOPAWRYZSAK5L","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"PBCCOPAW","created_at":"2026-05-18T12:33:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:PBCCOPAWRYZSAK5LKT5OF6ADIK","target":"record","payload":{"canonical_record":{"source":{"id":"2603.07833","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.LG","submitted_at":"2026-03-08T22:32:15Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"58cb096c3b848f4b6416f53316d28dfb6a73f1bbe740f237ca51e916c82e9bc5","abstract_canon_sha256":"0ca02a3b61d2a42b6a813a19748660ebb62740cc78a6b57920cdd07b02ac0ab8"},"schema_version":"1.0"},"canonical_sha256":"7844273c168e33202bab54fae2f80342b2a19f4e770f4ac12d3443bd35f28ab3","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:38:59.722582Z","signature_b64":"U1+s1GCuGYTRrz1l3ibno0iuj99Q49EQAvcjNjB+hH/j5MWOaMzq/buu03h1GjdGMn2OMaalhbVBR1MOobyKBw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"7844273c168e33202bab54fae2f80342b2a19f4e770f4ac12d3443bd35f28ab3","last_reissued_at":"2026-05-17T23:38:59.721803Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:38:59.721803Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2603.07833","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:59Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ERsvdqftCJdzsOejwkzvq4cHrruPDH4eQcJd4TiVh0DZLQxjQFPmJChBsac/3XRBN4/k4irDHOcqbKxo3uu6DQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T21:57:29.256266Z"},"content_sha256":"49f0a37bb263a48bac28bcda41962de2db226d3618577fc8f2430a6cbd0601a3","schema_version":"1.0","event_id":"sha256:49f0a37bb263a48bac28bcda41962de2db226d3618577fc8f2430a6cbd0601a3"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:PBCCOPAWRYZSAK5LKT5OF6ADIK","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Gradient Iterated Temporal-Difference Learning","license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","headline":"Gradient Iterated Temporal-Difference learning takes full gradients through moving targets to match semi-gradient speeds on Atari and other benchmarks.","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Adam White, Carlo D'Eramo, Habib Maraqten, Jan Peters, Kevin Gerhardt, Martha White, Th\\'eo Vincent, Yogesh Tripathi","submitted_at":"2026-03-08T22:32:15Z","abstract_excerpt":"Temporal-difference (TD) learning is highly effective at controlling and evaluating an agent's long-term outcomes. Most approaches in this paradigm implement a semi-gradient update to boost the learning speed, which consists of ignoring the gradient of the bootstrapped estimate. While popular, this type of update is prone to divergence, as Baird's counterexample illustrates. Gradient TD methods were introduced to overcome this issue, but have not been widely used, potentially due to issues with learning speed compared to semi-gradient methods. Recently, iterated TD learning was developed to in"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Our evaluation reveals that this algorithm, called Gradient Iterated Temporal-Difference learning, has a competitive learning speed against semi-gradient methods across various benchmarks, including Atari games, a result that no prior work on gradient TD methods has demonstrated.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That computing gradients through the sequence of moving targets in iterated TD will not introduce new instabilities or require prohibitive extra computation that negates the speed gains.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"Gradient Iterated TD learning stabilizes iterated TD by computing gradients over moving targets and achieves competitive speed to semi-gradient methods on Atari games and other benchmarks.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Gradient Iterated Temporal-Difference learning takes full gradients through moving targets to match semi-gradient speeds on Atari and other benchmarks.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"28416d077a601e7aab212e9854663e7e318eb3791c667fdf6faf457df2b0cd68"},"source":{"id":"2603.07833","kind":"arxiv","version":2},"verdict":{"id":"a47fb471-0552-402c-b39a-fcf394f14ba4","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-15T14:15:13.282201Z","strongest_claim":"Our evaluation reveals that this algorithm, called Gradient Iterated Temporal-Difference learning, has a competitive learning speed against semi-gradient methods across various benchmarks, including Atari games, a result that no prior work on gradient TD methods has demonstrated.","one_line_summary":"Gradient Iterated TD learning stabilizes iterated TD by computing gradients over moving targets and achieves competitive speed to semi-gradient methods on Atari games and other benchmarks.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That computing gradients through the sequence of moving targets in iterated TD will not introduce new instabilities or require prohibitive extra computation that negates the speed gains.","pith_extraction_headline":"Gradient Iterated Temporal-Difference learning takes full gradients through moving targets to match semi-gradient speeds on Atari and other benchmarks."},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":2,"snapshot_sha256":"eb5201b8cbb334641979ed3715df32942c777919b068e653ebde9158af3b7a27"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"a47fb471-0552-402c-b39a-fcf394f14ba4"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:59Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"wuEXOazz1wq1CGhMGU1IkLrTP03EhZXUppPBclTd16eXhPAZomyIsHxGniR9v0SOj/s0mSgyiebEidZC9GWLCg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T21:57:29.256857Z"},"content_sha256":"e7cf104a91a7a174abb4900be4677d575eabaf7ae21d7e92a3490db1371a5ba3","schema_version":"1.0","event_id":"sha256:e7cf104a91a7a174abb4900be4677d575eabaf7ae21d7e92a3490db1371a5ba3"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/PBCCOPAWRYZSAK5LKT5OF6ADIK/bundle.json","state_url":"https://pith.science/pith/PBCCOPAWRYZSAK5LKT5OF6ADIK/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/PBCCOPAWRYZSAK5LKT5OF6ADIK/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-25T21:57:29Z","links":{"resolver":"https://pith.science/pith/PBCCOPAWRYZSAK5LKT5OF6ADIK","bundle":"https://pith.science/pith/PBCCOPAWRYZSAK5LKT5OF6ADIK/bundle.json","state":"https://pith.science/pith/PBCCOPAWRYZSAK5LKT5OF6ADIK/state.json","well_known_bundle":"https://pith.science/.well-known/pith/PBCCOPAWRYZSAK5LKT5OF6ADIK/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:PBCCOPAWRYZSAK5LKT5OF6ADIK","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"0ca02a3b61d2a42b6a813a19748660ebb62740cc78a6b57920cdd07b02ac0ab8","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.LG","submitted_at":"2026-03-08T22:32:15Z","title_canon_sha256":"58cb096c3b848f4b6416f53316d28dfb6a73f1bbe740f237ca51e916c82e9bc5"},"schema_version":"1.0","source":{"id":"2603.07833","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2603.07833","created_at":"2026-05-17T23:38:59Z"},{"alias_kind":"arxiv_version","alias_value":"2603.07833v2","created_at":"2026-05-17T23:38:59Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2603.07833","created_at":"2026-05-17T23:38:59Z"},{"alias_kind":"pith_short_12","alias_value":"PBCCOPAWRYZS","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"PBCCOPAWRYZSAK5L","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"PBCCOPAW","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:e7cf104a91a7a174abb4900be4677d575eabaf7ae21d7e92a3490db1371a5ba3","target":"graph","created_at":"2026-05-17T23:38:59Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Our evaluation reveals that this algorithm, called Gradient Iterated Temporal-Difference learning, has a competitive learning speed against semi-gradient methods across various benchmarks, including Atari games, a result that no prior work on gradient TD methods has demonstrated."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That computing gradients through the sequence of moving targets in iterated TD will not introduce new instabilities or require prohibitive extra computation that negates the speed gains."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"Gradient Iterated TD learning stabilizes iterated TD by computing gradients over moving targets and achieves competitive speed to semi-gradient methods on Atari games and other benchmarks."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Gradient Iterated Temporal-Difference learning takes full gradients through moving targets to match semi-gradient speeds on Atari and other benchmarks."}],"snapshot_sha256":"28416d077a601e7aab212e9854663e7e318eb3791c667fdf6faf457df2b0cd68"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"eb5201b8cbb334641979ed3715df32942c777919b068e653ebde9158af3b7a27"},"paper":{"abstract_excerpt":"Temporal-difference (TD) learning is highly effective at controlling and evaluating an agent's long-term outcomes. Most approaches in this paradigm implement a semi-gradient update to boost the learning speed, which consists of ignoring the gradient of the bootstrapped estimate. While popular, this type of update is prone to divergence, as Baird's counterexample illustrates. Gradient TD methods were introduced to overcome this issue, but have not been widely used, potentially due to issues with learning speed compared to semi-gradient methods. Recently, iterated TD learning was developed to in","authors_text":"Adam White, Carlo D'Eramo, Habib Maraqten, Jan Peters, Kevin Gerhardt, Martha White, Th\\'eo Vincent, Yogesh Tripathi","cross_cats":["cs.AI"],"headline":"Gradient Iterated Temporal-Difference learning takes full gradients through moving targets to match semi-gradient speeds on Atari and other benchmarks.","license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.LG","submitted_at":"2026-03-08T22:32:15Z","title":"Gradient Iterated Temporal-Difference Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2603.07833","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-15T14:15:13.282201Z","id":"a47fb471-0552-402c-b39a-fcf394f14ba4","model_set":{"reader":"grok-4.3"},"one_line_summary":"Gradient Iterated TD learning stabilizes iterated TD by computing gradients over moving targets and achieves competitive speed to semi-gradient methods on Atari games and other benchmarks.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Gradient Iterated Temporal-Difference learning takes full gradients through moving targets to match semi-gradient speeds on Atari and other benchmarks.","strongest_claim":"Our evaluation reveals that this algorithm, called Gradient Iterated Temporal-Difference learning, has a competitive learning speed against semi-gradient methods across various benchmarks, including Atari games, a result that no prior work on gradient TD methods has demonstrated.","weakest_assumption":"That computing gradients through the sequence of moving targets in iterated TD will not introduce new instabilities or require prohibitive extra computation that negates the speed gains."}},"verdict_id":"a47fb471-0552-402c-b39a-fcf394f14ba4"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:49f0a37bb263a48bac28bcda41962de2db226d3618577fc8f2430a6cbd0601a3","target":"record","created_at":"2026-05-17T23:38:59Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"0ca02a3b61d2a42b6a813a19748660ebb62740cc78a6b57920cdd07b02ac0ab8","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.LG","submitted_at":"2026-03-08T22:32:15Z","title_canon_sha256":"58cb096c3b848f4b6416f53316d28dfb6a73f1bbe740f237ca51e916c82e9bc5"},"schema_version":"1.0","source":{"id":"2603.07833","kind":"arxiv","version":2}},"canonical_sha256":"7844273c168e33202bab54fae2f80342b2a19f4e770f4ac12d3443bd35f28ab3","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"7844273c168e33202bab54fae2f80342b2a19f4e770f4ac12d3443bd35f28ab3","first_computed_at":"2026-05-17T23:38:59.721803Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:38:59.721803Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"U1+s1GCuGYTRrz1l3ibno0iuj99Q49EQAvcjNjB+hH/j5MWOaMzq/buu03h1GjdGMn2OMaalhbVBR1MOobyKBw==","signature_status":"signed_v1","signed_at":"2026-05-17T23:38:59.722582Z","signed_message":"canonical_sha256_bytes"},"source_id":"2603.07833","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:49f0a37bb263a48bac28bcda41962de2db226d3618577fc8f2430a6cbd0601a3","sha256:e7cf104a91a7a174abb4900be4677d575eabaf7ae21d7e92a3490db1371a5ba3"],"state_sha256":"b4bf6577950f59e76be8772b9f72aff7a68a3fa6e329c978c7c1300bae0965de"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"3XmGTGNbyIHq+2cuqsdE8nmgQhw+XqLYSXO42oKdpCCSjooXT8qnd0O60zfV2eKsSXuErbi9iEeh/Z9HGSznDQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-25T21:57:29.259910Z","bundle_sha256":"936bc5393d186ed59dd3d24d8e29fd55a2bbf0693881b1c4c84a5e325cb83d43"}}