{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:55DC4IM62JYA3BVF4O7KJBMMDU","short_pith_number":"pith:55DC4IM6","canonical_record":{"source":{"id":"1804.03758","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-04-11T00:06:36Z","cross_cats_sorted":["cs.LG","stat.ML"],"title_canon_sha256":"3187ef8bf6acf8d626e10e0783ab76f70985a0505cccb277efa29be325363671","abstract_canon_sha256":"8d9899d3fa753f9521665d735723a12c8e2e6b30b4b582e4dbb5e04fd1d9675f"},"schema_version":"1.0"},"canonical_sha256":"ef462e219ed2700d86a5e3bea4858c1d2bdf5b126cede567aa0ef795b9662953","source":{"kind":"arxiv","id":"1804.03758","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1804.03758","created_at":"2026-05-18T00:18:43Z"},{"alias_kind":"arxiv_version","alias_value":"1804.03758v1","created_at":"2026-05-18T00:18:43Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1804.03758","created_at":"2026-05-18T00:18:43Z"},{"alias_kind":"pith_short_12","alias_value":"55DC4IM62JYA","created_at":"2026-05-18T12:32:05Z"},{"alias_kind":"pith_short_16","alias_value":"55DC4IM62JYA3BVF","created_at":"2026-05-18T12:32:05Z"},{"alias_kind":"pith_short_8","alias_value":"55DC4IM6","created_at":"2026-05-18T12:32:05Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:55DC4IM62JYA3BVF4O7KJBMMDU","target":"record","payload":{"canonical_record":{"source":{"id":"1804.03758","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-04-11T00:06:36Z","cross_cats_sorted":["cs.LG","stat.ML"],"title_canon_sha256":"3187ef8bf6acf8d626e10e0783ab76f70985a0505cccb277efa29be325363671","abstract_canon_sha256":"8d9899d3fa753f9521665d735723a12c8e2e6b30b4b582e4dbb5e04fd1d9675f"},"schema_version":"1.0"},"canonical_sha256":"ef462e219ed2700d86a5e3bea4858c1d2bdf5b126cede567aa0ef795b9662953","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:18:43.142798Z","signature_b64":"sSfnhYtuByF98PmzmptgtRyzHv7JPtoC5Fn7z8pUwxR4QNw5B8GvUrU4ScElZ0Mf3NqBAiSG0AK9ttvyIEIuBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"ef462e219ed2700d86a5e3bea4858c1d2bdf5b126cede567aa0ef795b9662953","last_reissued_at":"2026-05-18T00:18:43.142117Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:18:43.142117Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1804.03758","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:18:43Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Zo3/6T7T+lrh5p7pCuiKGppM1bhcsXpqnzYvvyXQTgZ0LSC4SdjGCLgh41TNheDm/wZvzpqeSYYiDR1Gak3uCw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-09T05:24:22.497888Z"},"content_sha256":"a63afc3e1a9aeee730a5a43fa6d9b99b834c74ea82fd97302377743dcf9b7b03","schema_version":"1.0","event_id":"sha256:a63afc3e1a9aeee730a5a43fa6d9b99b834c74ea82fd97302377743dcf9b7b03"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:55DC4IM62JYA3BVF4O7KJBMMDU","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Universal Successor Representations for Transfer Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG","stat.ML"],"primary_cat":"cs.AI","authors_text":"Chen Ma, Junfeng Wen, Yoshua Bengio","submitted_at":"2018-04-11T00:06:36Z","abstract_excerpt":"The objective of transfer reinforcement learning is to generalize from a set of previous tasks to unseen new tasks. In this work, we focus on the transfer scenario where the dynamics among tasks are the same, but their goals differ. Although general value function (Sutton et al., 2011) has been shown to be useful for knowledge transfer, learning a universal value function can be challenging in practice. To attack this, we propose (1) to use universal successor representations (USR) to represent the transferable knowledge and (2) a USR approximator (USRA) that can be trained by interacting with"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1804.03758","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:18:43Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"RZ6dTniu6R3toEd75X1fNm2FNImuCMuFkEtDGPd16bVzq7eyVpRIbgNPpP1z5wmsiz23AifNQUYQdzI32HLWDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-09T05:24:22.498612Z"},"content_sha256":"1f35a9461046e725629032db1e08460ad78c1a926a0cc84f1d7ccfa4ddf6e45f","schema_version":"1.0","event_id":"sha256:1f35a9461046e725629032db1e08460ad78c1a926a0cc84f1d7ccfa4ddf6e45f"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/55DC4IM62JYA3BVF4O7KJBMMDU/bundle.json","state_url":"https://pith.science/pith/55DC4IM62JYA3BVF4O7KJBMMDU/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/55DC4IM62JYA3BVF4O7KJBMMDU/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-09T05:24:22Z","links":{"resolver":"https://pith.science/pith/55DC4IM62JYA3BVF4O7KJBMMDU","bundle":"https://pith.science/pith/55DC4IM62JYA3BVF4O7KJBMMDU/bundle.json","state":"https://pith.science/pith/55DC4IM62JYA3BVF4O7KJBMMDU/state.json","well_known_bundle":"https://pith.science/.well-known/pith/55DC4IM62JYA3BVF4O7KJBMMDU/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:55DC4IM62JYA3BVF4O7KJBMMDU","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"8d9899d3fa753f9521665d735723a12c8e2e6b30b4b582e4dbb5e04fd1d9675f","cross_cats_sorted":["cs.LG","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-04-11T00:06:36Z","title_canon_sha256":"3187ef8bf6acf8d626e10e0783ab76f70985a0505cccb277efa29be325363671"},"schema_version":"1.0","source":{"id":"1804.03758","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1804.03758","created_at":"2026-05-18T00:18:43Z"},{"alias_kind":"arxiv_version","alias_value":"1804.03758v1","created_at":"2026-05-18T00:18:43Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1804.03758","created_at":"2026-05-18T00:18:43Z"},{"alias_kind":"pith_short_12","alias_value":"55DC4IM62JYA","created_at":"2026-05-18T12:32:05Z"},{"alias_kind":"pith_short_16","alias_value":"55DC4IM62JYA3BVF","created_at":"2026-05-18T12:32:05Z"},{"alias_kind":"pith_short_8","alias_value":"55DC4IM6","created_at":"2026-05-18T12:32:05Z"}],"graph_snapshots":[{"event_id":"sha256:1f35a9461046e725629032db1e08460ad78c1a926a0cc84f1d7ccfa4ddf6e45f","target":"graph","created_at":"2026-05-18T00:18:43Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"The objective of transfer reinforcement learning is to generalize from a set of previous tasks to unseen new tasks. In this work, we focus on the transfer scenario where the dynamics among tasks are the same, but their goals differ. Although general value function (Sutton et al., 2011) has been shown to be useful for knowledge transfer, learning a universal value function can be challenging in practice. To attack this, we propose (1) to use universal successor representations (USR) to represent the transferable knowledge and (2) a USR approximator (USRA) that can be trained by interacting with","authors_text":"Chen Ma, Junfeng Wen, Yoshua Bengio","cross_cats":["cs.LG","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-04-11T00:06:36Z","title":"Universal Successor Representations for Transfer Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1804.03758","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:a63afc3e1a9aeee730a5a43fa6d9b99b834c74ea82fd97302377743dcf9b7b03","target":"record","created_at":"2026-05-18T00:18:43Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"8d9899d3fa753f9521665d735723a12c8e2e6b30b4b582e4dbb5e04fd1d9675f","cross_cats_sorted":["cs.LG","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-04-11T00:06:36Z","title_canon_sha256":"3187ef8bf6acf8d626e10e0783ab76f70985a0505cccb277efa29be325363671"},"schema_version":"1.0","source":{"id":"1804.03758","kind":"arxiv","version":1}},"canonical_sha256":"ef462e219ed2700d86a5e3bea4858c1d2bdf5b126cede567aa0ef795b9662953","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"ef462e219ed2700d86a5e3bea4858c1d2bdf5b126cede567aa0ef795b9662953","first_computed_at":"2026-05-18T00:18:43.142117Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:18:43.142117Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"sSfnhYtuByF98PmzmptgtRyzHv7JPtoC5Fn7z8pUwxR4QNw5B8GvUrU4ScElZ0Mf3NqBAiSG0AK9ttvyIEIuBg==","signature_status":"signed_v1","signed_at":"2026-05-18T00:18:43.142798Z","signed_message":"canonical_sha256_bytes"},"source_id":"1804.03758","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:a63afc3e1a9aeee730a5a43fa6d9b99b834c74ea82fd97302377743dcf9b7b03","sha256:1f35a9461046e725629032db1e08460ad78c1a926a0cc84f1d7ccfa4ddf6e45f"],"state_sha256":"a58e5d548631a1f2daf316ec15b1695e54e629e795c22fe6e66e73fb1810a47a"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"VmhIJkKnQiJqq6C34ucLq658Av1wNeJEGJzeHMrUn7kpHfc+vAdinziDoThOCH/t/ectFCZ+OgQPNAI2nVjpBQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-09T05:24:22.503037Z","bundle_sha256":"21b16418f82d96d29ce448413ff69c74f24ebbc4f239b9e257a347f56401cff9"}}