{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2016:F2L4WA7QEUFKQL4Y7FIOOL64LV","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"aef4ad2022621b95f34c4a32170c7a27420652bd1c4627d9c3bba055baabda14","cross_cats_sorted":["cs.NE"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-11-06T15:36:56Z","title_canon_sha256":"e311470b4507d3012883223df979951edf69af9022b636ef787657e107f6b34a"},"schema_version":"1.0","source":{"id":"1611.01796","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1611.01796","created_at":"2026-05-18T00:42:12Z"},{"alias_kind":"arxiv_version","alias_value":"1611.01796v2","created_at":"2026-05-18T00:42:12Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1611.01796","created_at":"2026-05-18T00:42:12Z"},{"alias_kind":"pith_short_12","alias_value":"F2L4WA7QEUFK","created_at":"2026-05-18T12:30:15Z"},{"alias_kind":"pith_short_16","alias_value":"F2L4WA7QEUFKQL4Y","created_at":"2026-05-18T12:30:15Z"},{"alias_kind":"pith_short_8","alias_value":"F2L4WA7Q","created_at":"2026-05-18T12:30:15Z"}],"graph_snapshots":[{"event_id":"sha256:8a2156086d2803a4ae93a8d498df37e35c43486cd627dd974e0d15abcf66ceae","target":"graph","created_at":"2026-05-18T00:42:12Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We describe a framework for multitask deep reinforcement learning guided by policy sketches. Sketches annotate tasks with sequences of named subtasks, providing information about high-level structural relationships among tasks but not how to implement them---specifically not providing the detailed guidance used by much previous work on learning policy abstractions for RL (e.g. intermediate rewards, subtask completion signals, or intrinsic motivations). To learn from sketches, we present a model that associates every subtask with a modular subpolicy, and jointly maximizes reward over full task-","authors_text":"Dan Klein, Jacob Andreas, Sergey Levine","cross_cats":["cs.NE"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-11-06T15:36:56Z","title":"Modular Multitask Reinforcement Learning with Policy Sketches"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1611.01796","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:798251e70fe2a7f8a39939e3672bc6d250dfa75a8530ba45f9aba3d38fd5bc08","target":"record","created_at":"2026-05-18T00:42:12Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"aef4ad2022621b95f34c4a32170c7a27420652bd1c4627d9c3bba055baabda14","cross_cats_sorted":["cs.NE"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-11-06T15:36:56Z","title_canon_sha256":"e311470b4507d3012883223df979951edf69af9022b636ef787657e107f6b34a"},"schema_version":"1.0","source":{"id":"1611.01796","kind":"arxiv","version":2}},"canonical_sha256":"2e97cb03f0250aa82f98f950e72fdc5d581badd95a62f9317dd9a9adeb867872","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"2e97cb03f0250aa82f98f950e72fdc5d581badd95a62f9317dd9a9adeb867872","first_computed_at":"2026-05-18T00:42:12.687561Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:42:12.687561Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"sPtLQrjPqf11GC2j5mjeQ0FQF7J4AwomNGQclXSiky3iNOYn7+icO2/2r5noj/XeBKMNu2l6QRYhPUo0CH6dCQ==","signature_status":"signed_v1","signed_at":"2026-05-18T00:42:12.688031Z","signed_message":"canonical_sha256_bytes"},"source_id":"1611.01796","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:798251e70fe2a7f8a39939e3672bc6d250dfa75a8530ba45f9aba3d38fd5bc08","sha256:8a2156086d2803a4ae93a8d498df37e35c43486cd627dd974e0d15abcf66ceae"],"state_sha256":"f7431e8c0ec91e6d295367b2629e6bea6b403b82a80c6cc1d547e7f76f198dee"}