{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:ZPTHSSP4QHSUMMAASUMYWHEY7P","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"bc35cb77f16d148d86ee8b965ea47111ee1ef6701fd789bc21df65f0406d1040","cross_cats_sorted":["cs.AI","cs.NE","cs.RO"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-11-16T10:05:31Z","title_canon_sha256":"269b28ce188d944418daaef3fd1a9f04d04b40613590fc7e2748e3c9a8128104"},"schema_version":"1.0","source":{"id":"1711.06006","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1711.06006","created_at":"2026-05-17T23:53:09Z"},{"alias_kind":"arxiv_version","alias_value":"1711.06006v3","created_at":"2026-05-17T23:53:09Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1711.06006","created_at":"2026-05-17T23:53:09Z"},{"alias_kind":"pith_short_12","alias_value":"ZPTHSSP4QHSU","created_at":"2026-05-18T12:31:59Z"},{"alias_kind":"pith_short_16","alias_value":"ZPTHSSP4QHSUMMAA","created_at":"2026-05-18T12:31:59Z"},{"alias_kind":"pith_short_8","alias_value":"ZPTHSSP4","created_at":"2026-05-18T12:31:59Z"}],"graph_snapshots":[{"event_id":"sha256:f4d14819745c174d5ee70d63f42f824a2cbdfe158cf7f7233794dfc7b65152ea","target":"graph","created_at":"2026-05-17T23:53:09Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"A reinforcement learning agent that needs to pursue different goals across episodes requires a goal-conditional policy. In addition to their potential to generalize desirable behavior to unseen goals, such policies may also enable higher-level planning based on subgoals. In sparse-reward environments, the capacity to exploit information about the degree to which an arbitrary goal has been achieved while another goal was intended appears crucial to enable sample efficient learning. However, reinforcement learning agents have only recently been endowed with such capacity for hindsight. In this p","authors_text":"Avinash Ummadisingu, Filipe Mutz, Juergen Schmidhuber, Paulo Rauber","cross_cats":["cs.AI","cs.NE","cs.RO"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-11-16T10:05:31Z","title":"Hindsight policy gradients"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1711.06006","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:457581cabc0122f2927a08349be460cc2784396545c7c47d5455738fbdad6452","target":"record","created_at":"2026-05-17T23:53:09Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"bc35cb77f16d148d86ee8b965ea47111ee1ef6701fd789bc21df65f0406d1040","cross_cats_sorted":["cs.AI","cs.NE","cs.RO"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-11-16T10:05:31Z","title_canon_sha256":"269b28ce188d944418daaef3fd1a9f04d04b40613590fc7e2748e3c9a8128104"},"schema_version":"1.0","source":{"id":"1711.06006","kind":"arxiv","version":3}},"canonical_sha256":"cbe67949fc81e546300095198b1c98fbe527a8296f9f20eabd874a14a5399891","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"cbe67949fc81e546300095198b1c98fbe527a8296f9f20eabd874a14a5399891","first_computed_at":"2026-05-17T23:53:09.811096Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:53:09.811096Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"zGg3D+F3eeYBao0waRiLHpmOQGilbCplv41uQkHgMnI7xi2VUp6vpcLDkQBRQxdYYssOoJEjo4/VHikbmNimCw==","signature_status":"signed_v1","signed_at":"2026-05-17T23:53:09.811558Z","signed_message":"canonical_sha256_bytes"},"source_id":"1711.06006","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:457581cabc0122f2927a08349be460cc2784396545c7c47d5455738fbdad6452","sha256:f4d14819745c174d5ee70d63f42f824a2cbdfe158cf7f7233794dfc7b65152ea"],"state_sha256":"75b84d28c927948dae2d4788cb8dd2779cb94a3336231d2b261b31f17f50badf"}