{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:NGR2U4LAMPX7YMTZMBPQOKC7KS","short_pith_number":"pith:NGR2U4LA","canonical_record":{"source":{"id":"1902.01883","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-02-05T19:45:08Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"d5178c5538ccf858938fafa33aa4992765388c82477036c3db7cf9d92c472b6c","abstract_canon_sha256":"6a1bd1431160a34a58cf78286f3b6ece3eb385b049f192f96ab811433a4321eb"},"schema_version":"1.0"},"canonical_sha256":"69a3aa716063effc3279605f07285f54a95db3cf4097aa649b545c4cf54fc975","source":{"kind":"arxiv","id":"1902.01883","version":3},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1902.01883","created_at":"2026-05-17T23:45:09Z"},{"alias_kind":"arxiv_version","alias_value":"1902.01883v3","created_at":"2026-05-17T23:45:09Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1902.01883","created_at":"2026-05-17T23:45:09Z"},{"alias_kind":"pith_short_12","alias_value":"NGR2U4LAMPX7","created_at":"2026-05-18T12:33:24Z"},{"alias_kind":"pith_short_16","alias_value":"NGR2U4LAMPX7YMTZ","created_at":"2026-05-18T12:33:24Z"},{"alias_kind":"pith_short_8","alias_value":"NGR2U4LA","created_at":"2026-05-18T12:33:24Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:NGR2U4LAMPX7YMTZMBPQOKC7KS","target":"record","payload":{"canonical_record":{"source":{"id":"1902.01883","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-02-05T19:45:08Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"d5178c5538ccf858938fafa33aa4992765388c82477036c3db7cf9d92c472b6c","abstract_canon_sha256":"6a1bd1431160a34a58cf78286f3b6ece3eb385b049f192f96ab811433a4321eb"},"schema_version":"1.0"},"canonical_sha256":"69a3aa716063effc3279605f07285f54a95db3cf4097aa649b545c4cf54fc975","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:45:09.220605Z","signature_b64":"WoIHlEb571+MMLq29fnWRQUKgnVVnmfE+i9gvHrwHAFC9a3xWHbnhME9GW48/zwz4gK7o9/1ph9C0jWbpBnxAA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"69a3aa716063effc3279605f07285f54a95db3cf4097aa649b545c4cf54fc975","last_reissued_at":"2026-05-17T23:45:09.219872Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:45:09.219872Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1902.01883","source_version":3,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:45:09Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"t8PirVLBcsYIOzpH0ulVZ6kfPdo8nFWF9Qtf9TeeAqgnOTWBvEZXupY2PeyfPUkVaPpOBAOQyC/S0GFWd+AiDw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-03T02:41:30.930481Z"},"content_sha256":"1eb9feb7b0e329222b43aebc4850dcc68e1b8e5e9f01b942754f2e84fd8fb45e","schema_version":"1.0","event_id":"sha256:1eb9feb7b0e329222b43aebc4850dcc68e1b8e5e9f01b942754f2e84fd8fb45e"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:NGR2U4LAMPX7YMTZMBPQOKC7KS","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Separating value functions across time-scales","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Ahmed Touati, Emma Brunskill, Joelle Pineau, Joshua Romoff, Peter Henderson, Yann Ollivier","submitted_at":"2019-02-05T19:45:08Z","abstract_excerpt":"In many finite horizon episodic reinforcement learning (RL) settings, it is desirable to optimize for the undiscounted return - in settings like Atari, for instance, the goal is to collect the most points while staying alive in the long run. Yet, it may be difficult (or even intractable) mathematically to learn with this target. As such, temporal discounting is often applied to optimize over a shorter effective planning horizon. This comes at the risk of potentially biasing the optimization target away from the undiscounted goal. In settings where this bias is unacceptable - where the system m"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1902.01883","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:45:09Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"SrNOlviszOl8IOJ9ShtLkKBSjTdw72zzzVQ40v1XIrnZgwll0+nn2DiwEKLzNi0lj4nk1iXgJFqCIqAQU8gsCg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-03T02:41:30.930829Z"},"content_sha256":"88b97bd77818ed41ae7bce6b28f342fa6f8dc665cc33955d4549ddb78dbe8dc4","schema_version":"1.0","event_id":"sha256:88b97bd77818ed41ae7bce6b28f342fa6f8dc665cc33955d4549ddb78dbe8dc4"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/NGR2U4LAMPX7YMTZMBPQOKC7KS/bundle.json","state_url":"https://pith.science/pith/NGR2U4LAMPX7YMTZMBPQOKC7KS/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/NGR2U4LAMPX7YMTZMBPQOKC7KS/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-03T02:41:30Z","links":{"resolver":"https://pith.science/pith/NGR2U4LAMPX7YMTZMBPQOKC7KS","bundle":"https://pith.science/pith/NGR2U4LAMPX7YMTZMBPQOKC7KS/bundle.json","state":"https://pith.science/pith/NGR2U4LAMPX7YMTZMBPQOKC7KS/state.json","well_known_bundle":"https://pith.science/.well-known/pith/NGR2U4LAMPX7YMTZMBPQOKC7KS/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:NGR2U4LAMPX7YMTZMBPQOKC7KS","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"6a1bd1431160a34a58cf78286f3b6ece3eb385b049f192f96ab811433a4321eb","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-02-05T19:45:08Z","title_canon_sha256":"d5178c5538ccf858938fafa33aa4992765388c82477036c3db7cf9d92c472b6c"},"schema_version":"1.0","source":{"id":"1902.01883","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1902.01883","created_at":"2026-05-17T23:45:09Z"},{"alias_kind":"arxiv_version","alias_value":"1902.01883v3","created_at":"2026-05-17T23:45:09Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1902.01883","created_at":"2026-05-17T23:45:09Z"},{"alias_kind":"pith_short_12","alias_value":"NGR2U4LAMPX7","created_at":"2026-05-18T12:33:24Z"},{"alias_kind":"pith_short_16","alias_value":"NGR2U4LAMPX7YMTZ","created_at":"2026-05-18T12:33:24Z"},{"alias_kind":"pith_short_8","alias_value":"NGR2U4LA","created_at":"2026-05-18T12:33:24Z"}],"graph_snapshots":[{"event_id":"sha256:88b97bd77818ed41ae7bce6b28f342fa6f8dc665cc33955d4549ddb78dbe8dc4","target":"graph","created_at":"2026-05-17T23:45:09Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"In many finite horizon episodic reinforcement learning (RL) settings, it is desirable to optimize for the undiscounted return - in settings like Atari, for instance, the goal is to collect the most points while staying alive in the long run. Yet, it may be difficult (or even intractable) mathematically to learn with this target. As such, temporal discounting is often applied to optimize over a shorter effective planning horizon. This comes at the risk of potentially biasing the optimization target away from the undiscounted goal. In settings where this bias is unacceptable - where the system m","authors_text":"Ahmed Touati, Emma Brunskill, Joelle Pineau, Joshua Romoff, Peter Henderson, Yann Ollivier","cross_cats":["cs.AI","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-02-05T19:45:08Z","title":"Separating value functions across time-scales"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1902.01883","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:1eb9feb7b0e329222b43aebc4850dcc68e1b8e5e9f01b942754f2e84fd8fb45e","target":"record","created_at":"2026-05-17T23:45:09Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"6a1bd1431160a34a58cf78286f3b6ece3eb385b049f192f96ab811433a4321eb","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-02-05T19:45:08Z","title_canon_sha256":"d5178c5538ccf858938fafa33aa4992765388c82477036c3db7cf9d92c472b6c"},"schema_version":"1.0","source":{"id":"1902.01883","kind":"arxiv","version":3}},"canonical_sha256":"69a3aa716063effc3279605f07285f54a95db3cf4097aa649b545c4cf54fc975","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"69a3aa716063effc3279605f07285f54a95db3cf4097aa649b545c4cf54fc975","first_computed_at":"2026-05-17T23:45:09.219872Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:45:09.219872Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"WoIHlEb571+MMLq29fnWRQUKgnVVnmfE+i9gvHrwHAFC9a3xWHbnhME9GW48/zwz4gK7o9/1ph9C0jWbpBnxAA==","signature_status":"signed_v1","signed_at":"2026-05-17T23:45:09.220605Z","signed_message":"canonical_sha256_bytes"},"source_id":"1902.01883","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:1eb9feb7b0e329222b43aebc4850dcc68e1b8e5e9f01b942754f2e84fd8fb45e","sha256:88b97bd77818ed41ae7bce6b28f342fa6f8dc665cc33955d4549ddb78dbe8dc4"],"state_sha256":"88622313388c10c3f4f8c3121d93d6a5f009136afe87b183746d3769b6172149"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"izA8LOIt6lDsUaBFc5NTQpTjBgZbyxyRF8CsGT6kDmRguWxF4DSZKX3BzmiOPaNKFGCZa50AdeahdxUdCsKBBQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-03T02:41:30.932792Z","bundle_sha256":"a4b3c3abd384a6b34a936a5bbd171d65541d6b8ffe8f76447d67689879557f48"}}