{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2016:FYTINHHO7IQUINEJSGMVT2567V","short_pith_number":"pith:FYTINHHO","schema_version":"1.0","canonical_sha256":"2e26869ceefa21443489919959ebbefd7030dd357b09d69b38330510b8216e6e","source":{"kind":"arxiv","id":"1608.05151","version":1},"attestation_state":"computed","paper":{"title":"Effective Multi-step Temporal-Difference Learning for Non-Linear Function Approximation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Harm van Seijen","submitted_at":"2016-08-18T01:21:27Z","abstract_excerpt":"Multi-step temporal-difference (TD) learning, where the update targets contain information from multiple time steps ahead, is one of the most popular forms of TD learning for linear function approximation. The reason is that multi-step methods often yield substantially better performance than their single-step counter-parts, due to a lower bias of the update targets. For non-linear function approximation, however, single-step methods appear to be the norm. Part of the reason could be that on many domains the popular multi-step methods TD($\\lambda$) and Sarsa($\\lambda$) do not perform well when"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1608.05151","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2016-08-18T01:21:27Z","cross_cats_sorted":[],"title_canon_sha256":"f768f59dbe231dbdce1e03483f6c1d8c864758d092f05f6ba6834587eede78c9","abstract_canon_sha256":"7a306dce63fc2c7ee73685a53a1f79b0cad086c82be4812bb99e20b9114eb844"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:08:31.790252Z","signature_b64":"EK/T4mQKluCqIQEBknPup6DKCwBgojnkwh6rPVAP47Hg+exzLkMY6PSAzhz4bLR4apuYTUqYrxQhveTEoufDCQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"2e26869ceefa21443489919959ebbefd7030dd357b09d69b38330510b8216e6e","last_reissued_at":"2026-05-18T01:08:31.789821Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:08:31.789821Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Effective Multi-step Temporal-Difference Learning for Non-Linear Function Approximation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Harm van Seijen","submitted_at":"2016-08-18T01:21:27Z","abstract_excerpt":"Multi-step temporal-difference (TD) learning, where the update targets contain information from multiple time steps ahead, is one of the most popular forms of TD learning for linear function approximation. The reason is that multi-step methods often yield substantially better performance than their single-step counter-parts, due to a lower bias of the update targets. For non-linear function approximation, however, single-step methods appear to be the norm. Part of the reason could be that on many domains the popular multi-step methods TD($\\lambda$) and Sarsa($\\lambda$) do not perform well when"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1608.05151","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1608.05151","created_at":"2026-05-18T01:08:31.789893+00:00"},{"alias_kind":"arxiv_version","alias_value":"1608.05151v1","created_at":"2026-05-18T01:08:31.789893+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1608.05151","created_at":"2026-05-18T01:08:31.789893+00:00"},{"alias_kind":"pith_short_12","alias_value":"FYTINHHO7IQU","created_at":"2026-05-18T12:30:15.759754+00:00"},{"alias_kind":"pith_short_16","alias_value":"FYTINHHO7IQUINEJ","created_at":"2026-05-18T12:30:15.759754+00:00"},{"alias_kind":"pith_short_8","alias_value":"FYTINHHO","created_at":"2026-05-18T12:30:15.759754+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/FYTINHHO7IQUINEJSGMVT2567V","json":"https://pith.science/pith/FYTINHHO7IQUINEJSGMVT2567V.json","graph_json":"https://pith.science/api/pith-number/FYTINHHO7IQUINEJSGMVT2567V/graph.json","events_json":"https://pith.science/api/pith-number/FYTINHHO7IQUINEJSGMVT2567V/events.json","paper":"https://pith.science/paper/FYTINHHO"},"agent_actions":{"view_html":"https://pith.science/pith/FYTINHHO7IQUINEJSGMVT2567V","download_json":"https://pith.science/pith/FYTINHHO7IQUINEJSGMVT2567V.json","view_paper":"https://pith.science/paper/FYTINHHO","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1608.05151&json=true","fetch_graph":"https://pith.science/api/pith-number/FYTINHHO7IQUINEJSGMVT2567V/graph.json","fetch_events":"https://pith.science/api/pith-number/FYTINHHO7IQUINEJSGMVT2567V/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/FYTINHHO7IQUINEJSGMVT2567V/action/timestamp_anchor","attest_storage":"https://pith.science/pith/FYTINHHO7IQUINEJSGMVT2567V/action/storage_attestation","attest_author":"https://pith.science/pith/FYTINHHO7IQUINEJSGMVT2567V/action/author_attestation","sign_citation":"https://pith.science/pith/FYTINHHO7IQUINEJSGMVT2567V/action/citation_signature","submit_replication":"https://pith.science/pith/FYTINHHO7IQUINEJSGMVT2567V/action/replication_record"}},"created_at":"2026-05-18T01:08:31.789893+00:00","updated_at":"2026-05-18T01:08:31.789893+00:00"}