{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2016:F2L4WA7QEUFKQL4Y7FIOOL64LV","short_pith_number":"pith:F2L4WA7Q","canonical_record":{"source":{"id":"1611.01796","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-11-06T15:36:56Z","cross_cats_sorted":["cs.NE"],"title_canon_sha256":"e311470b4507d3012883223df979951edf69af9022b636ef787657e107f6b34a","abstract_canon_sha256":"aef4ad2022621b95f34c4a32170c7a27420652bd1c4627d9c3bba055baabda14"},"schema_version":"1.0"},"canonical_sha256":"2e97cb03f0250aa82f98f950e72fdc5d581badd95a62f9317dd9a9adeb867872","source":{"kind":"arxiv","id":"1611.01796","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1611.01796","created_at":"2026-05-18T00:42:12Z"},{"alias_kind":"arxiv_version","alias_value":"1611.01796v2","created_at":"2026-05-18T00:42:12Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1611.01796","created_at":"2026-05-18T00:42:12Z"},{"alias_kind":"pith_short_12","alias_value":"F2L4WA7QEUFK","created_at":"2026-05-18T12:30:15Z"},{"alias_kind":"pith_short_16","alias_value":"F2L4WA7QEUFKQL4Y","created_at":"2026-05-18T12:30:15Z"},{"alias_kind":"pith_short_8","alias_value":"F2L4WA7Q","created_at":"2026-05-18T12:30:15Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2016:F2L4WA7QEUFKQL4Y7FIOOL64LV","target":"record","payload":{"canonical_record":{"source":{"id":"1611.01796","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-11-06T15:36:56Z","cross_cats_sorted":["cs.NE"],"title_canon_sha256":"e311470b4507d3012883223df979951edf69af9022b636ef787657e107f6b34a","abstract_canon_sha256":"aef4ad2022621b95f34c4a32170c7a27420652bd1c4627d9c3bba055baabda14"},"schema_version":"1.0"},"canonical_sha256":"2e97cb03f0250aa82f98f950e72fdc5d581badd95a62f9317dd9a9adeb867872","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:42:12.688031Z","signature_b64":"sPtLQrjPqf11GC2j5mjeQ0FQF7J4AwomNGQclXSiky3iNOYn7+icO2/2r5noj/XeBKMNu2l6QRYhPUo0CH6dCQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"2e97cb03f0250aa82f98f950e72fdc5d581badd95a62f9317dd9a9adeb867872","last_reissued_at":"2026-05-18T00:42:12.687561Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:42:12.687561Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1611.01796","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:42:12Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"yvw9m9u0NZQ1x0pQmxb702NsXi5q8r0JRu0JEX182PPPUXMFjTkf/vU9VpbHQgm2KaES4gMHkfYvB4/09BhFCw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T22:17:02.187724Z"},"content_sha256":"798251e70fe2a7f8a39939e3672bc6d250dfa75a8530ba45f9aba3d38fd5bc08","schema_version":"1.0","event_id":"sha256:798251e70fe2a7f8a39939e3672bc6d250dfa75a8530ba45f9aba3d38fd5bc08"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2016:F2L4WA7QEUFKQL4Y7FIOOL64LV","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Modular Multitask Reinforcement Learning with Policy Sketches","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.NE"],"primary_cat":"cs.LG","authors_text":"Dan Klein, Jacob Andreas, Sergey Levine","submitted_at":"2016-11-06T15:36:56Z","abstract_excerpt":"We describe a framework for multitask deep reinforcement learning guided by policy sketches. Sketches annotate tasks with sequences of named subtasks, providing information about high-level structural relationships among tasks but not how to implement them---specifically not providing the detailed guidance used by much previous work on learning policy abstractions for RL (e.g. intermediate rewards, subtask completion signals, or intrinsic motivations). To learn from sketches, we present a model that associates every subtask with a modular subpolicy, and jointly maximizes reward over full task-"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1611.01796","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:42:12Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"1hoUvJ8/hSP53Eu1aCz0G7hkqGIOr58FqIoHb+plg7c5AJFDkHPPfhVv+qhlykHosxcjb8V/Ew4rsfoH2oepBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T22:17:02.188409Z"},"content_sha256":"8a2156086d2803a4ae93a8d498df37e35c43486cd627dd974e0d15abcf66ceae","schema_version":"1.0","event_id":"sha256:8a2156086d2803a4ae93a8d498df37e35c43486cd627dd974e0d15abcf66ceae"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/F2L4WA7QEUFKQL4Y7FIOOL64LV/bundle.json","state_url":"https://pith.science/pith/F2L4WA7QEUFKQL4Y7FIOOL64LV/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/F2L4WA7QEUFKQL4Y7FIOOL64LV/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-25T22:17:02Z","links":{"resolver":"https://pith.science/pith/F2L4WA7QEUFKQL4Y7FIOOL64LV","bundle":"https://pith.science/pith/F2L4WA7QEUFKQL4Y7FIOOL64LV/bundle.json","state":"https://pith.science/pith/F2L4WA7QEUFKQL4Y7FIOOL64LV/state.json","well_known_bundle":"https://pith.science/.well-known/pith/F2L4WA7QEUFKQL4Y7FIOOL64LV/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2016:F2L4WA7QEUFKQL4Y7FIOOL64LV","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"aef4ad2022621b95f34c4a32170c7a27420652bd1c4627d9c3bba055baabda14","cross_cats_sorted":["cs.NE"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-11-06T15:36:56Z","title_canon_sha256":"e311470b4507d3012883223df979951edf69af9022b636ef787657e107f6b34a"},"schema_version":"1.0","source":{"id":"1611.01796","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1611.01796","created_at":"2026-05-18T00:42:12Z"},{"alias_kind":"arxiv_version","alias_value":"1611.01796v2","created_at":"2026-05-18T00:42:12Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1611.01796","created_at":"2026-05-18T00:42:12Z"},{"alias_kind":"pith_short_12","alias_value":"F2L4WA7QEUFK","created_at":"2026-05-18T12:30:15Z"},{"alias_kind":"pith_short_16","alias_value":"F2L4WA7QEUFKQL4Y","created_at":"2026-05-18T12:30:15Z"},{"alias_kind":"pith_short_8","alias_value":"F2L4WA7Q","created_at":"2026-05-18T12:30:15Z"}],"graph_snapshots":[{"event_id":"sha256:8a2156086d2803a4ae93a8d498df37e35c43486cd627dd974e0d15abcf66ceae","target":"graph","created_at":"2026-05-18T00:42:12Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We describe a framework for multitask deep reinforcement learning guided by policy sketches. Sketches annotate tasks with sequences of named subtasks, providing information about high-level structural relationships among tasks but not how to implement them---specifically not providing the detailed guidance used by much previous work on learning policy abstractions for RL (e.g. intermediate rewards, subtask completion signals, or intrinsic motivations). To learn from sketches, we present a model that associates every subtask with a modular subpolicy, and jointly maximizes reward over full task-","authors_text":"Dan Klein, Jacob Andreas, Sergey Levine","cross_cats":["cs.NE"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-11-06T15:36:56Z","title":"Modular Multitask Reinforcement Learning with Policy Sketches"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1611.01796","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:798251e70fe2a7f8a39939e3672bc6d250dfa75a8530ba45f9aba3d38fd5bc08","target":"record","created_at":"2026-05-18T00:42:12Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"aef4ad2022621b95f34c4a32170c7a27420652bd1c4627d9c3bba055baabda14","cross_cats_sorted":["cs.NE"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-11-06T15:36:56Z","title_canon_sha256":"e311470b4507d3012883223df979951edf69af9022b636ef787657e107f6b34a"},"schema_version":"1.0","source":{"id":"1611.01796","kind":"arxiv","version":2}},"canonical_sha256":"2e97cb03f0250aa82f98f950e72fdc5d581badd95a62f9317dd9a9adeb867872","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"2e97cb03f0250aa82f98f950e72fdc5d581badd95a62f9317dd9a9adeb867872","first_computed_at":"2026-05-18T00:42:12.687561Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:42:12.687561Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"sPtLQrjPqf11GC2j5mjeQ0FQF7J4AwomNGQclXSiky3iNOYn7+icO2/2r5noj/XeBKMNu2l6QRYhPUo0CH6dCQ==","signature_status":"signed_v1","signed_at":"2026-05-18T00:42:12.688031Z","signed_message":"canonical_sha256_bytes"},"source_id":"1611.01796","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:798251e70fe2a7f8a39939e3672bc6d250dfa75a8530ba45f9aba3d38fd5bc08","sha256:8a2156086d2803a4ae93a8d498df37e35c43486cd627dd974e0d15abcf66ceae"],"state_sha256":"f7431e8c0ec91e6d295367b2629e6bea6b403b82a80c6cc1d547e7f76f198dee"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"8LE3/7HMiA1yCk2jk2MeqI+rJrnEYOWCUYQ6EBY6ummm8iyHTH6ydaMmMDhDn3pB/aexLmF0ePwVJZP5FaVXBg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-25T22:17:02.192081Z","bundle_sha256":"e55871f93c703066a0a3565da068b4dc389ef49e0a7b60e6cbf4309a85cf82ad"}}