{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:L5TAETQDXTUTUOEGF7646YFSAM","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"193cf398f888abec330b72cdc7e98ce58ccf8b51a6c2cd4b19bc10a202ec2109","cross_cats_sorted":["cs.AI","cs.RO"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-05-17T23:05:46Z","title_canon_sha256":"5e866500b0f926da6d83832eba478306d082d96371c8fddb62dfe18f236978d6"},"schema_version":"1.0","source":{"id":"1705.06366","kind":"arxiv","version":5}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1705.06366","created_at":"2026-05-18T00:10:13Z"},{"alias_kind":"arxiv_version","alias_value":"1705.06366v5","created_at":"2026-05-18T00:10:13Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1705.06366","created_at":"2026-05-18T00:10:13Z"},{"alias_kind":"pith_short_12","alias_value":"L5TAETQDXTUT","created_at":"2026-05-18T12:31:28Z"},{"alias_kind":"pith_short_16","alias_value":"L5TAETQDXTUTUOEG","created_at":"2026-05-18T12:31:28Z"},{"alias_kind":"pith_short_8","alias_value":"L5TAETQD","created_at":"2026-05-18T12:31:28Z"}],"graph_snapshots":[{"event_id":"sha256:bfbab04c589c9cecb6c501e9aeaef40413a50e9422e0cad0d6304e7489c8c6dd","target":"graph","created_at":"2026-05-18T00:10:13Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Reinforcement learning is a powerful technique to train an agent to perform a task. However, an agent that is trained using reinforcement learning is only capable of achieving the single task that is specified via its reward function. Such an approach does not scale well to settings in which an agent needs to perform a diverse set of tasks, such as navigating to varying positions in a room or moving objects to varying locations. Instead, we propose a method that allows an agent to automatically discover the range of tasks that it is capable of performing. We use a generator network to propose ","authors_text":"Carlos Florensa, David Held, Pieter Abbeel, Xinyang Geng","cross_cats":["cs.AI","cs.RO"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-05-17T23:05:46Z","title":"Automatic Goal Generation for Reinforcement Learning Agents"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1705.06366","kind":"arxiv","version":5},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:90dd4df5dee5e4ce285009638a0b26c087d54124db334b613a3a9b37837a6591","target":"record","created_at":"2026-05-18T00:10:13Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"193cf398f888abec330b72cdc7e98ce58ccf8b51a6c2cd4b19bc10a202ec2109","cross_cats_sorted":["cs.AI","cs.RO"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-05-17T23:05:46Z","title_canon_sha256":"5e866500b0f926da6d83832eba478306d082d96371c8fddb62dfe18f236978d6"},"schema_version":"1.0","source":{"id":"1705.06366","kind":"arxiv","version":5}},"canonical_sha256":"5f66024e03bce93a38862ffdcf60b2032d50bf63470ee346bb4f44dc29dbc6be","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"5f66024e03bce93a38862ffdcf60b2032d50bf63470ee346bb4f44dc29dbc6be","first_computed_at":"2026-05-18T00:10:13.706258Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:10:13.706258Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"LgyPeokAAbmzY3YlgbO2cktWuRmDDzOuN3C1vlwDSPpNEK4siOVUiVtoFNnjQPVhFq5ssPlOhpkw9ii8MbFXBg==","signature_status":"signed_v1","signed_at":"2026-05-18T00:10:13.706856Z","signed_message":"canonical_sha256_bytes"},"source_id":"1705.06366","source_kind":"arxiv","source_version":5}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:90dd4df5dee5e4ce285009638a0b26c087d54124db334b613a3a9b37837a6591","sha256:bfbab04c589c9cecb6c501e9aeaef40413a50e9422e0cad0d6304e7489c8c6dd"],"state_sha256":"d7b5d812c1324ecf62fb02bea2cdb6d0122c0f59e1e4691430fe52f958a62ecb"}