{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:OSKCVIIOQUP26OOQ67OAIGQ7DX","short_pith_number":"pith:OSKCVIIO","canonical_record":{"source":{"id":"1907.10827","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-07-24T16:26:21Z","cross_cats_sorted":["cs.MA","stat.ML"],"title_canon_sha256":"199f55bd61a8fad3911c457ca35387d7800e090524d41faa045951213d1c705f","abstract_canon_sha256":"37851cb08a17fa47038c149c0c1ed64472ad022246952b149702fa24a8fa3c5a"},"schema_version":"1.0"},"canonical_sha256":"74942aa10e851faf39d0f7dc041a1f1dd30bde388a7885e0a74325a0ff66b757","source":{"kind":"arxiv","id":"1907.10827","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1907.10827","created_at":"2026-05-17T23:39:34Z"},{"alias_kind":"arxiv_version","alias_value":"1907.10827v1","created_at":"2026-05-17T23:39:34Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1907.10827","created_at":"2026-05-17T23:39:34Z"},{"alias_kind":"pith_short_12","alias_value":"OSKCVIIOQUP2","created_at":"2026-05-18T12:33:24Z"},{"alias_kind":"pith_short_16","alias_value":"OSKCVIIOQUP26OOQ","created_at":"2026-05-18T12:33:24Z"},{"alias_kind":"pith_short_8","alias_value":"OSKCVIIO","created_at":"2026-05-18T12:33:24Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:OSKCVIIOQUP26OOQ67OAIGQ7DX","target":"record","payload":{"canonical_record":{"source":{"id":"1907.10827","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-07-24T16:26:21Z","cross_cats_sorted":["cs.MA","stat.ML"],"title_canon_sha256":"199f55bd61a8fad3911c457ca35387d7800e090524d41faa045951213d1c705f","abstract_canon_sha256":"37851cb08a17fa47038c149c0c1ed64472ad022246952b149702fa24a8fa3c5a"},"schema_version":"1.0"},"canonical_sha256":"74942aa10e851faf39d0f7dc041a1f1dd30bde388a7885e0a74325a0ff66b757","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:39:34.396988Z","signature_b64":"tc66LSTAtVkORkgNvmC8N39gEq5biWnReV9mrQ3OPH/oeV46Zl06o5QAZz3gbrricfeV3XoNozJDCFo2v5wpDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"74942aa10e851faf39d0f7dc041a1f1dd30bde388a7885e0a74325a0ff66b757","last_reissued_at":"2026-05-17T23:39:34.396582Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:39:34.396582Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1907.10827","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:39:34Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"HcPrBwwUrgPYts0IhDszwY5EeMBNF2xIVoUrK/1mOvch4rVIFbbOLJCHzltC3F7VghfFoetNHJF6iPwdQiG9Dw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-23T16:37:21.049455Z"},"content_sha256":"bc42840c2af2be4b3b799e3e6aed8b84b28c456002f86ebe0a31cda01ab444d2","schema_version":"1.0","event_id":"sha256:bc42840c2af2be4b3b799e3e6aed8b84b28c456002f86ebe0a31cda01ab444d2"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:OSKCVIIOQUP26OOQ67OAIGQ7DX","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Terminal Prediction as an Auxiliary Task for Deep Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.MA","stat.ML"],"primary_cat":"cs.LG","authors_text":"Bilal Kartal, Matthew E. Taylor, Pablo Hernandez-Leal","submitted_at":"2019-07-24T16:26:21Z","abstract_excerpt":"Deep reinforcement learning has achieved great successes in recent years, but there are still open challenges, such as convergence to locally optimal policies and sample inefficiency. In this paper, we contribute a novel self-supervised auxiliary task, i.e., Terminal Prediction (TP), estimating temporal closeness to terminal states for episodic tasks. The intuition is to help representation learning by letting the agent predict how close it is to a terminal state, while learning its control policy. Although TP could be integrated with multiple algorithms, this paper focuses on Asynchronous Adv"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1907.10827","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:39:34Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"yRBaupXKU6BcJvIN3vj7iobNkayrmU2Jm5GBAyBKMVCa/IEwMBchg69QHl/JoOjyS76ERZFVZgi4jXpvhaeXAw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-23T16:37:21.050175Z"},"content_sha256":"8ccc5206c8fdac9a08d8b64ebafbd17438591c10c30be56bd36294c5e647dcb6","schema_version":"1.0","event_id":"sha256:8ccc5206c8fdac9a08d8b64ebafbd17438591c10c30be56bd36294c5e647dcb6"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/OSKCVIIOQUP26OOQ67OAIGQ7DX/bundle.json","state_url":"https://pith.science/pith/OSKCVIIOQUP26OOQ67OAIGQ7DX/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/OSKCVIIOQUP26OOQ67OAIGQ7DX/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-23T16:37:21Z","links":{"resolver":"https://pith.science/pith/OSKCVIIOQUP26OOQ67OAIGQ7DX","bundle":"https://pith.science/pith/OSKCVIIOQUP26OOQ67OAIGQ7DX/bundle.json","state":"https://pith.science/pith/OSKCVIIOQUP26OOQ67OAIGQ7DX/state.json","well_known_bundle":"https://pith.science/.well-known/pith/OSKCVIIOQUP26OOQ67OAIGQ7DX/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:OSKCVIIOQUP26OOQ67OAIGQ7DX","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"37851cb08a17fa47038c149c0c1ed64472ad022246952b149702fa24a8fa3c5a","cross_cats_sorted":["cs.MA","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-07-24T16:26:21Z","title_canon_sha256":"199f55bd61a8fad3911c457ca35387d7800e090524d41faa045951213d1c705f"},"schema_version":"1.0","source":{"id":"1907.10827","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1907.10827","created_at":"2026-05-17T23:39:34Z"},{"alias_kind":"arxiv_version","alias_value":"1907.10827v1","created_at":"2026-05-17T23:39:34Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1907.10827","created_at":"2026-05-17T23:39:34Z"},{"alias_kind":"pith_short_12","alias_value":"OSKCVIIOQUP2","created_at":"2026-05-18T12:33:24Z"},{"alias_kind":"pith_short_16","alias_value":"OSKCVIIOQUP26OOQ","created_at":"2026-05-18T12:33:24Z"},{"alias_kind":"pith_short_8","alias_value":"OSKCVIIO","created_at":"2026-05-18T12:33:24Z"}],"graph_snapshots":[{"event_id":"sha256:8ccc5206c8fdac9a08d8b64ebafbd17438591c10c30be56bd36294c5e647dcb6","target":"graph","created_at":"2026-05-17T23:39:34Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Deep reinforcement learning has achieved great successes in recent years, but there are still open challenges, such as convergence to locally optimal policies and sample inefficiency. In this paper, we contribute a novel self-supervised auxiliary task, i.e., Terminal Prediction (TP), estimating temporal closeness to terminal states for episodic tasks. The intuition is to help representation learning by letting the agent predict how close it is to a terminal state, while learning its control policy. Although TP could be integrated with multiple algorithms, this paper focuses on Asynchronous Adv","authors_text":"Bilal Kartal, Matthew E. Taylor, Pablo Hernandez-Leal","cross_cats":["cs.MA","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-07-24T16:26:21Z","title":"Terminal Prediction as an Auxiliary Task for Deep Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1907.10827","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:bc42840c2af2be4b3b799e3e6aed8b84b28c456002f86ebe0a31cda01ab444d2","target":"record","created_at":"2026-05-17T23:39:34Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"37851cb08a17fa47038c149c0c1ed64472ad022246952b149702fa24a8fa3c5a","cross_cats_sorted":["cs.MA","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-07-24T16:26:21Z","title_canon_sha256":"199f55bd61a8fad3911c457ca35387d7800e090524d41faa045951213d1c705f"},"schema_version":"1.0","source":{"id":"1907.10827","kind":"arxiv","version":1}},"canonical_sha256":"74942aa10e851faf39d0f7dc041a1f1dd30bde388a7885e0a74325a0ff66b757","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"74942aa10e851faf39d0f7dc041a1f1dd30bde388a7885e0a74325a0ff66b757","first_computed_at":"2026-05-17T23:39:34.396582Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:39:34.396582Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"tc66LSTAtVkORkgNvmC8N39gEq5biWnReV9mrQ3OPH/oeV46Zl06o5QAZz3gbrricfeV3XoNozJDCFo2v5wpDQ==","signature_status":"signed_v1","signed_at":"2026-05-17T23:39:34.396988Z","signed_message":"canonical_sha256_bytes"},"source_id":"1907.10827","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:bc42840c2af2be4b3b799e3e6aed8b84b28c456002f86ebe0a31cda01ab444d2","sha256:8ccc5206c8fdac9a08d8b64ebafbd17438591c10c30be56bd36294c5e647dcb6"],"state_sha256":"7d403589f8af08275f2e48b1af231ef61e407392eaeba077906ce45720addba7"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"KMlR+DWNrIfCqx9MH2mOksGgJ3bSmJ2fn7HcoP2kqLzgV9xeXLuX92mQxlhcj5nDSGe2Zp0/a1/waV/ARxfaAA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-23T16:37:21.055582Z","bundle_sha256":"85d2edf1ab7a2e544c876db6325888b2da6b4d1378c99946e0fa8774141de7a0"}}