{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:4K6YH27CMTCG3II3LZTGSZXWJ5","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"c3f68c6ae1f6eb3cb6f1d1808ed3c71a852c5fe3714142004a4b51a440267eff","cross_cats_sorted":["cs.LG","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-05-24T23:19:44Z","title_canon_sha256":"60fa8d53b90ed10dbb04b3da7aa48a288d1e49f9d6ddc778ede3fed4487dbaf2"},"schema_version":"1.0","source":{"id":"1705.08997","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1705.08997","created_at":"2026-05-18T00:43:40Z"},{"alias_kind":"arxiv_version","alias_value":"1705.08997v1","created_at":"2026-05-18T00:43:40Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1705.08997","created_at":"2026-05-18T00:43:40Z"},{"alias_kind":"pith_short_12","alias_value":"4K6YH27CMTCG","created_at":"2026-05-18T12:31:00Z"},{"alias_kind":"pith_short_16","alias_value":"4K6YH27CMTCG3II3","created_at":"2026-05-18T12:31:00Z"},{"alias_kind":"pith_short_8","alias_value":"4K6YH27C","created_at":"2026-05-18T12:31:00Z"}],"graph_snapshots":[{"event_id":"sha256:fdac3c59083e749787674cfd80abbabc5a2a2cfb5de098597520d864809895e0","target":"graph","created_at":"2026-05-18T00:43:40Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Typical reinforcement learning (RL) agents learn to complete tasks specified by reward functions tailored to their domain. As such, the policies they learn do not generalize even to similar domains. To address this issue, we develop a framework through which a deep RL agent learns to generalize policies from smaller, simpler domains to more complex ones using a recurrent attention mechanism. The task is presented to the agent as an image and an instruction specifying the goal. This meta-controller guides the agent towards its goal by designing a sequence of smaller subtasks on the part of the ","authors_text":"Charles Isbell, Farhan Tejani, Himanshu Sahni, Saurabh Kumar, Yannick Schroecker","cross_cats":["cs.LG","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-05-24T23:19:44Z","title":"State Space Decomposition and Subgoal Creation for Transfer in Deep Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1705.08997","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:299bbca00ee19ac7e61e78e85376e6155e32038c9023bc1e40ce1d3f86b77510","target":"record","created_at":"2026-05-18T00:43:40Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"c3f68c6ae1f6eb3cb6f1d1808ed3c71a852c5fe3714142004a4b51a440267eff","cross_cats_sorted":["cs.LG","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-05-24T23:19:44Z","title_canon_sha256":"60fa8d53b90ed10dbb04b3da7aa48a288d1e49f9d6ddc778ede3fed4487dbaf2"},"schema_version":"1.0","source":{"id":"1705.08997","kind":"arxiv","version":1}},"canonical_sha256":"e2bd83ebe264c46da11b5e666966f64f68528c0948106eb17ff9304edf64d152","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"e2bd83ebe264c46da11b5e666966f64f68528c0948106eb17ff9304edf64d152","first_computed_at":"2026-05-18T00:43:40.708384Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:43:40.708384Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"lf/HrI9WiahTGVtx9zL2I65hMYmejyT8XCn03cqV0zDCLkaWRtpiC3C+p1tqv4Zdfm0QIXpax0m+Jc+9ulupBg==","signature_status":"signed_v1","signed_at":"2026-05-18T00:43:40.708891Z","signed_message":"canonical_sha256_bytes"},"source_id":"1705.08997","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:299bbca00ee19ac7e61e78e85376e6155e32038c9023bc1e40ce1d3f86b77510","sha256:fdac3c59083e749787674cfd80abbabc5a2a2cfb5de098597520d864809895e0"],"state_sha256":"217afb73616f4e989f873be9f467af46451032dde8431a22b5ea45f551162238"}