{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:XMHR6VCGWTGG3ZUYS45DOWY43A","short_pith_number":"pith:XMHR6VCG","canonical_record":{"source":{"id":"1707.04175","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-07-13T15:24:20Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"d496a9afd820c9d7a3996c5d5cce3b1cce15da9566200140bda2570facf6ec91","abstract_canon_sha256":"9232f8b0a8886166c52b1c9f27b5bf72ad31c44805770566173b2397ca3a02eb"},"schema_version":"1.0"},"canonical_sha256":"bb0f1f5446b4cc6de698973a375b1cd80ffd44982a8a6492254b7426249ce8b0","source":{"kind":"arxiv","id":"1707.04175","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1707.04175","created_at":"2026-05-18T00:40:20Z"},{"alias_kind":"arxiv_version","alias_value":"1707.04175v1","created_at":"2026-05-18T00:40:20Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1707.04175","created_at":"2026-05-18T00:40:20Z"},{"alias_kind":"pith_short_12","alias_value":"XMHR6VCGWTGG","created_at":"2026-05-18T12:31:56Z"},{"alias_kind":"pith_short_16","alias_value":"XMHR6VCGWTGG3ZUY","created_at":"2026-05-18T12:31:56Z"},{"alias_kind":"pith_short_8","alias_value":"XMHR6VCG","created_at":"2026-05-18T12:31:56Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:XMHR6VCGWTGG3ZUYS45DOWY43A","target":"record","payload":{"canonical_record":{"source":{"id":"1707.04175","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-07-13T15:24:20Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"d496a9afd820c9d7a3996c5d5cce3b1cce15da9566200140bda2570facf6ec91","abstract_canon_sha256":"9232f8b0a8886166c52b1c9f27b5bf72ad31c44805770566173b2397ca3a02eb"},"schema_version":"1.0"},"canonical_sha256":"bb0f1f5446b4cc6de698973a375b1cd80ffd44982a8a6492254b7426249ce8b0","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:40:20.335490Z","signature_b64":"fMkr3fxiHXHCAzr3h7jbGSdXLgQxnLKAaz1eFz8qgF7dJp+UV2ntH/ukJka2bIyldWeiCwbDciHM9ZA4Wf6/Bg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"bb0f1f5446b4cc6de698973a375b1cd80ffd44982a8a6492254b7426249ce8b0","last_reissued_at":"2026-05-18T00:40:20.334520Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:40:20.334520Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1707.04175","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:40:20Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"5qJUf1mi91tV/9zgQ7GmWOv7GHvOkgypm/GcSEPzHXuadAyyyk/0rypzK2WXsq8cuY/yYwbGj2N50q3tB8r6Bg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-21T12:35:15.752732Z"},"content_sha256":"1fd0b497fb5ec6e449170565ca08119cd5cf60aaf530ce47be67e9f15302b320","schema_version":"1.0","event_id":"sha256:1fd0b497fb5ec6e449170565ca08119cd5cf60aaf530ce47be67e9f15302b320"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:XMHR6VCGWTGG3ZUYS45DOWY43A","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Distral: Robust Multitask Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"James Kirkpatrick, John Quan, Nicolas Heess, Raia Hadsell, Razvan Pascanu, Victor Bapst, Wojciech Marian Czarnecki, Yee Whye Teh","submitted_at":"2017-07-13T15:24:20Z","abstract_excerpt":"Most deep reinforcement learning algorithms are data inefficient in complex and rich environments, limiting their applicability to many scenarios. One direction for improving data efficiency is multitask learning with shared neural network parameters, where efficiency may be improved through transfer across related tasks. In practice, however, this is not usually observed, because gradients from different tasks can interfere negatively, making learning unstable and sometimes even less data efficient. Another issue is the different reward schemes between tasks, which can easily lead to one task"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1707.04175","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:40:20Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"JxhiQanY8aNhRoQJCShg5ftZm2H4+dDkgs4EA0hiRgYHXJYqK2X/5BGnBsIB4j9+rHmrvJo05/RNn6eak8N8Ag==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-21T12:35:15.753082Z"},"content_sha256":"7b606b50badb687d89c67f245928e0718ac57ec85437e67ac9275f1db652dfd8","schema_version":"1.0","event_id":"sha256:7b606b50badb687d89c67f245928e0718ac57ec85437e67ac9275f1db652dfd8"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/XMHR6VCGWTGG3ZUYS45DOWY43A/bundle.json","state_url":"https://pith.science/pith/XMHR6VCGWTGG3ZUYS45DOWY43A/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/XMHR6VCGWTGG3ZUYS45DOWY43A/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-21T12:35:15Z","links":{"resolver":"https://pith.science/pith/XMHR6VCGWTGG3ZUYS45DOWY43A","bundle":"https://pith.science/pith/XMHR6VCGWTGG3ZUYS45DOWY43A/bundle.json","state":"https://pith.science/pith/XMHR6VCGWTGG3ZUYS45DOWY43A/state.json","well_known_bundle":"https://pith.science/.well-known/pith/XMHR6VCGWTGG3ZUYS45DOWY43A/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:XMHR6VCGWTGG3ZUYS45DOWY43A","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"9232f8b0a8886166c52b1c9f27b5bf72ad31c44805770566173b2397ca3a02eb","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-07-13T15:24:20Z","title_canon_sha256":"d496a9afd820c9d7a3996c5d5cce3b1cce15da9566200140bda2570facf6ec91"},"schema_version":"1.0","source":{"id":"1707.04175","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1707.04175","created_at":"2026-05-18T00:40:20Z"},{"alias_kind":"arxiv_version","alias_value":"1707.04175v1","created_at":"2026-05-18T00:40:20Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1707.04175","created_at":"2026-05-18T00:40:20Z"},{"alias_kind":"pith_short_12","alias_value":"XMHR6VCGWTGG","created_at":"2026-05-18T12:31:56Z"},{"alias_kind":"pith_short_16","alias_value":"XMHR6VCGWTGG3ZUY","created_at":"2026-05-18T12:31:56Z"},{"alias_kind":"pith_short_8","alias_value":"XMHR6VCG","created_at":"2026-05-18T12:31:56Z"}],"graph_snapshots":[{"event_id":"sha256:7b606b50badb687d89c67f245928e0718ac57ec85437e67ac9275f1db652dfd8","target":"graph","created_at":"2026-05-18T00:40:20Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Most deep reinforcement learning algorithms are data inefficient in complex and rich environments, limiting their applicability to many scenarios. One direction for improving data efficiency is multitask learning with shared neural network parameters, where efficiency may be improved through transfer across related tasks. In practice, however, this is not usually observed, because gradients from different tasks can interfere negatively, making learning unstable and sometimes even less data efficient. Another issue is the different reward schemes between tasks, which can easily lead to one task","authors_text":"James Kirkpatrick, John Quan, Nicolas Heess, Raia Hadsell, Razvan Pascanu, Victor Bapst, Wojciech Marian Czarnecki, Yee Whye Teh","cross_cats":["stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-07-13T15:24:20Z","title":"Distral: Robust Multitask Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1707.04175","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:1fd0b497fb5ec6e449170565ca08119cd5cf60aaf530ce47be67e9f15302b320","target":"record","created_at":"2026-05-18T00:40:20Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"9232f8b0a8886166c52b1c9f27b5bf72ad31c44805770566173b2397ca3a02eb","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-07-13T15:24:20Z","title_canon_sha256":"d496a9afd820c9d7a3996c5d5cce3b1cce15da9566200140bda2570facf6ec91"},"schema_version":"1.0","source":{"id":"1707.04175","kind":"arxiv","version":1}},"canonical_sha256":"bb0f1f5446b4cc6de698973a375b1cd80ffd44982a8a6492254b7426249ce8b0","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"bb0f1f5446b4cc6de698973a375b1cd80ffd44982a8a6492254b7426249ce8b0","first_computed_at":"2026-05-18T00:40:20.334520Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:40:20.334520Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"fMkr3fxiHXHCAzr3h7jbGSdXLgQxnLKAaz1eFz8qgF7dJp+UV2ntH/ukJka2bIyldWeiCwbDciHM9ZA4Wf6/Bg==","signature_status":"signed_v1","signed_at":"2026-05-18T00:40:20.335490Z","signed_message":"canonical_sha256_bytes"},"source_id":"1707.04175","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:1fd0b497fb5ec6e449170565ca08119cd5cf60aaf530ce47be67e9f15302b320","sha256:7b606b50badb687d89c67f245928e0718ac57ec85437e67ac9275f1db652dfd8"],"state_sha256":"1f9d2d4fc8f8056b8d88cc5c6cfc034fdf20d54afda81002f89b9ca2134cb458"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"l5uKkHQvn2u4sUAcikTLH73rx9zLFK3y35mzBPw0mJI2Qu8aZpFuOFVWHaJZQHJvsHLdvGJBbkjPqFufPhK4Cg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-21T12:35:15.755555Z","bundle_sha256":"df0ba21527eb4bea16c2536cc677ee1b933f0b35aa35cc508cbb3f27e9696858"}}