{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:N5EBY4PKS5EGOLKFK7DVBQT2MF","short_pith_number":"pith:N5EBY4PK","canonical_record":{"source":{"id":"1907.04651","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-07-05T22:33:36Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"6155d1ae098c17e60c63c62b1217a6ec4f33dfa8f349acc9ca03b868bd609033","abstract_canon_sha256":"cfa265ec0208854707f0ac568c2763e892aefa16ef48fae4c45a6491ead85cd5"},"schema_version":"1.0"},"canonical_sha256":"6f481c71ea9748672d4557c750c27a616f5f4cd646731c0c990db88f264eb603","source":{"kind":"arxiv","id":"1907.04651","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1907.04651","created_at":"2026-05-17T23:40:57Z"},{"alias_kind":"arxiv_version","alias_value":"1907.04651v1","created_at":"2026-05-17T23:40:57Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1907.04651","created_at":"2026-05-17T23:40:57Z"},{"alias_kind":"pith_short_12","alias_value":"N5EBY4PKS5EG","created_at":"2026-05-18T12:33:24Z"},{"alias_kind":"pith_short_16","alias_value":"N5EBY4PKS5EGOLKF","created_at":"2026-05-18T12:33:24Z"},{"alias_kind":"pith_short_8","alias_value":"N5EBY4PK","created_at":"2026-05-18T12:33:24Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:N5EBY4PKS5EGOLKFK7DVBQT2MF","target":"record","payload":{"canonical_record":{"source":{"id":"1907.04651","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-07-05T22:33:36Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"6155d1ae098c17e60c63c62b1217a6ec4f33dfa8f349acc9ca03b868bd609033","abstract_canon_sha256":"cfa265ec0208854707f0ac568c2763e892aefa16ef48fae4c45a6491ead85cd5"},"schema_version":"1.0"},"canonical_sha256":"6f481c71ea9748672d4557c750c27a616f5f4cd646731c0c990db88f264eb603","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:40:57.216253Z","signature_b64":"UBY2teT1y5BDEMKTBlDCduN4OU7KTzhtTQ4+sddJDw0CU0fWDgnLrYdtfVgTiM23unJnnR2JClVmlBd4tZApCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"6f481c71ea9748672d4557c750c27a616f5f4cd646731c0c990db88f264eb603","last_reissued_at":"2026-05-17T23:40:57.215725Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:40:57.215725Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1907.04651","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:40:57Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"TV3sDTMboeFJ9UHbPyzQ84hImeKJx9gXe7xmcQL93ItySI1MnK5xTRuh/a/9YbILS2M2p79q4sRae2ra0FoSAQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-20T16:38:24.382694Z"},"content_sha256":"0e069b6c0d33043b76b16c8b59a809b4198bb3d2f7ae4fe8563145842528a1b2","schema_version":"1.0","event_id":"sha256:0e069b6c0d33043b76b16c8b59a809b4198bb3d2f7ae4fe8563145842528a1b2"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:N5EBY4PKS5EGOLKFK7DVBQT2MF","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Incrementally Learning Functions of the Return","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Brendan Bennett, Muhammad Zaheer, Vincent Liu, Wesley Chung","submitted_at":"2019-07-05T22:33:36Z","abstract_excerpt":"Temporal difference methods enable efficient estimation of value functions in reinforcement learning in an incremental fashion, and are of broader interest because they correspond learning as observed in biological systems. Standard value functions correspond to the expected value of a sum of discounted returns. While this formulation is often sufficient for many purposes, it would often be useful to be able to represent functions of the return as well. Unfortunately, most such functions cannot be estimated directly using TD methods. We propose a means of estimating functions of the return usi"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1907.04651","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:40:57Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"LymV9mt3uCeRahZogyPkphoV23QxNJZXdl5o6L2xF4tZL7zX/e4BwFzvIT2gd7C+Nurf52q6YycaSJObwrnhCg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-20T16:38:24.383045Z"},"content_sha256":"905e898a3f26d65aeef260116c7292014ef825c02384fce496c52a8672b10c41","schema_version":"1.0","event_id":"sha256:905e898a3f26d65aeef260116c7292014ef825c02384fce496c52a8672b10c41"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/N5EBY4PKS5EGOLKFK7DVBQT2MF/bundle.json","state_url":"https://pith.science/pith/N5EBY4PKS5EGOLKFK7DVBQT2MF/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/N5EBY4PKS5EGOLKFK7DVBQT2MF/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-20T16:38:24Z","links":{"resolver":"https://pith.science/pith/N5EBY4PKS5EGOLKFK7DVBQT2MF","bundle":"https://pith.science/pith/N5EBY4PKS5EGOLKFK7DVBQT2MF/bundle.json","state":"https://pith.science/pith/N5EBY4PKS5EGOLKFK7DVBQT2MF/state.json","well_known_bundle":"https://pith.science/.well-known/pith/N5EBY4PKS5EGOLKFK7DVBQT2MF/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:N5EBY4PKS5EGOLKFK7DVBQT2MF","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"cfa265ec0208854707f0ac568c2763e892aefa16ef48fae4c45a6491ead85cd5","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-07-05T22:33:36Z","title_canon_sha256":"6155d1ae098c17e60c63c62b1217a6ec4f33dfa8f349acc9ca03b868bd609033"},"schema_version":"1.0","source":{"id":"1907.04651","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1907.04651","created_at":"2026-05-17T23:40:57Z"},{"alias_kind":"arxiv_version","alias_value":"1907.04651v1","created_at":"2026-05-17T23:40:57Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1907.04651","created_at":"2026-05-17T23:40:57Z"},{"alias_kind":"pith_short_12","alias_value":"N5EBY4PKS5EG","created_at":"2026-05-18T12:33:24Z"},{"alias_kind":"pith_short_16","alias_value":"N5EBY4PKS5EGOLKF","created_at":"2026-05-18T12:33:24Z"},{"alias_kind":"pith_short_8","alias_value":"N5EBY4PK","created_at":"2026-05-18T12:33:24Z"}],"graph_snapshots":[{"event_id":"sha256:905e898a3f26d65aeef260116c7292014ef825c02384fce496c52a8672b10c41","target":"graph","created_at":"2026-05-17T23:40:57Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Temporal difference methods enable efficient estimation of value functions in reinforcement learning in an incremental fashion, and are of broader interest because they correspond learning as observed in biological systems. Standard value functions correspond to the expected value of a sum of discounted returns. While this formulation is often sufficient for many purposes, it would often be useful to be able to represent functions of the return as well. Unfortunately, most such functions cannot be estimated directly using TD methods. We propose a means of estimating functions of the return usi","authors_text":"Brendan Bennett, Muhammad Zaheer, Vincent Liu, Wesley Chung","cross_cats":["cs.AI","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-07-05T22:33:36Z","title":"Incrementally Learning Functions of the Return"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1907.04651","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:0e069b6c0d33043b76b16c8b59a809b4198bb3d2f7ae4fe8563145842528a1b2","target":"record","created_at":"2026-05-17T23:40:57Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"cfa265ec0208854707f0ac568c2763e892aefa16ef48fae4c45a6491ead85cd5","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-07-05T22:33:36Z","title_canon_sha256":"6155d1ae098c17e60c63c62b1217a6ec4f33dfa8f349acc9ca03b868bd609033"},"schema_version":"1.0","source":{"id":"1907.04651","kind":"arxiv","version":1}},"canonical_sha256":"6f481c71ea9748672d4557c750c27a616f5f4cd646731c0c990db88f264eb603","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"6f481c71ea9748672d4557c750c27a616f5f4cd646731c0c990db88f264eb603","first_computed_at":"2026-05-17T23:40:57.215725Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:40:57.215725Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"UBY2teT1y5BDEMKTBlDCduN4OU7KTzhtTQ4+sddJDw0CU0fWDgnLrYdtfVgTiM23unJnnR2JClVmlBd4tZApCg==","signature_status":"signed_v1","signed_at":"2026-05-17T23:40:57.216253Z","signed_message":"canonical_sha256_bytes"},"source_id":"1907.04651","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:0e069b6c0d33043b76b16c8b59a809b4198bb3d2f7ae4fe8563145842528a1b2","sha256:905e898a3f26d65aeef260116c7292014ef825c02384fce496c52a8672b10c41"],"state_sha256":"8f51b9bfefbd0f4b358c094de3dc12a5e2a833db8699e91fc9993d04fbdc188b"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"TSXIkBB4Yv/uiw8cOXMGivhr8CdZmnkZ2IEumkLiYiap6wwE6M/rjwSKGH2nscu37MO+1T5lZwpl1ctFmJvCAw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-20T16:38:24.385249Z","bundle_sha256":"dddfbe8912694b363e49d27979fe8798b6f8e305d42d110465622c9d5410b6c6"}}