{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:42DRJS37ZSFYWSZIDPXWHOEZWL","short_pith_number":"pith:42DRJS37","schema_version":"1.0","canonical_sha256":"e68714cb7fcc8b8b4b281bef63b899b2cf3f78412f7012f0500a7166145e7f26","source":{"kind":"arxiv","id":"1707.05300","version":3},"attestation_state":"computed","paper":{"title":"Reverse Curriculum Generation for Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG","cs.NE","cs.RO"],"primary_cat":"cs.AI","authors_text":"Carlos Florensa, David Held, Markus Wulfmeier, Michael Zhang, Pieter Abbeel","submitted_at":"2017-07-17T17:53:54Z","abstract_excerpt":"Many relevant tasks require an agent to reach a certain state, or to manipulate objects into a desired configuration. For example, we might want a robot to align and assemble a gear onto an axle or insert and turn a key in a lock. These goal-oriented tasks present a considerable challenge for reinforcement learning, since their natural reward function is sparse and prohibitive amounts of exploration are required to reach the goal and receive some learning signal. Past approaches tackle these problems by exploiting expert demonstrations or by manually designing a task-specific reward shaping fu"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1707.05300","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-07-17T17:53:54Z","cross_cats_sorted":["cs.LG","cs.NE","cs.RO"],"title_canon_sha256":"28b304a97b81a4ea07e1ee26ce60031eb72575f5d4d6dbdd7ca05828e05f1ee5","abstract_canon_sha256":"6238bc3b633b8dc8c89eddad75f88b7fc9abe65cf104c20d1bd32e3bf2fb6d73"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:10:13.554393Z","signature_b64":"1t22g+MAbhndd6UaRLsEhNOlzIY0XwSZ36A7PNeR5VlrnVawVRI9Ny4xc75UzM8eUxSjohJdUe2177OBme+jBw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"e68714cb7fcc8b8b4b281bef63b899b2cf3f78412f7012f0500a7166145e7f26","last_reissued_at":"2026-05-18T00:10:13.553822Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:10:13.553822Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Reverse Curriculum Generation for Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG","cs.NE","cs.RO"],"primary_cat":"cs.AI","authors_text":"Carlos Florensa, David Held, Markus Wulfmeier, Michael Zhang, Pieter Abbeel","submitted_at":"2017-07-17T17:53:54Z","abstract_excerpt":"Many relevant tasks require an agent to reach a certain state, or to manipulate objects into a desired configuration. For example, we might want a robot to align and assemble a gear onto an axle or insert and turn a key in a lock. These goal-oriented tasks present a considerable challenge for reinforcement learning, since their natural reward function is sparse and prohibitive amounts of exploration are required to reach the goal and receive some learning signal. Past approaches tackle these problems by exploiting expert demonstrations or by manually designing a task-specific reward shaping fu"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1707.05300","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1707.05300","created_at":"2026-05-18T00:10:13.553906+00:00"},{"alias_kind":"arxiv_version","alias_value":"1707.05300v3","created_at":"2026-05-18T00:10:13.553906+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1707.05300","created_at":"2026-05-18T00:10:13.553906+00:00"},{"alias_kind":"pith_short_12","alias_value":"42DRJS37ZSFY","created_at":"2026-05-18T12:30:58.224056+00:00"},{"alias_kind":"pith_short_16","alias_value":"42DRJS37ZSFYWSZI","created_at":"2026-05-18T12:30:58.224056+00:00"},{"alias_kind":"pith_short_8","alias_value":"42DRJS37","created_at":"2026-05-18T12:30:58.224056+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"1906.12266","citing_title":"Growing Action Spaces","ref_index":3,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/42DRJS37ZSFYWSZIDPXWHOEZWL","json":"https://pith.science/pith/42DRJS37ZSFYWSZIDPXWHOEZWL.json","graph_json":"https://pith.science/api/pith-number/42DRJS37ZSFYWSZIDPXWHOEZWL/graph.json","events_json":"https://pith.science/api/pith-number/42DRJS37ZSFYWSZIDPXWHOEZWL/events.json","paper":"https://pith.science/paper/42DRJS37"},"agent_actions":{"view_html":"https://pith.science/pith/42DRJS37ZSFYWSZIDPXWHOEZWL","download_json":"https://pith.science/pith/42DRJS37ZSFYWSZIDPXWHOEZWL.json","view_paper":"https://pith.science/paper/42DRJS37","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1707.05300&json=true","fetch_graph":"https://pith.science/api/pith-number/42DRJS37ZSFYWSZIDPXWHOEZWL/graph.json","fetch_events":"https://pith.science/api/pith-number/42DRJS37ZSFYWSZIDPXWHOEZWL/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/42DRJS37ZSFYWSZIDPXWHOEZWL/action/timestamp_anchor","attest_storage":"https://pith.science/pith/42DRJS37ZSFYWSZIDPXWHOEZWL/action/storage_attestation","attest_author":"https://pith.science/pith/42DRJS37ZSFYWSZIDPXWHOEZWL/action/author_attestation","sign_citation":"https://pith.science/pith/42DRJS37ZSFYWSZIDPXWHOEZWL/action/citation_signature","submit_replication":"https://pith.science/pith/42DRJS37ZSFYWSZIDPXWHOEZWL/action/replication_record"}},"created_at":"2026-05-18T00:10:13.553906+00:00","updated_at":"2026-05-18T00:10:13.553906+00:00"}