{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:LSJVVM2BAMDFOBIWTNOYPSLXLT","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"beb8f9be761a911387cf528326b4148e2b88c1e801988656d9b30a154237fe95","cross_cats_sorted":["cs.AI","cs.NE","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-12-08T20:16:16Z","title_canon_sha256":"2a2e9ea613d8670dfef47d18bc9f0969fa15b14c4bf94d77e99b240d0bf01e68"},"schema_version":"1.0","source":{"id":"1812.03381","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1812.03381","created_at":"2026-05-17T23:58:46Z"},{"alias_kind":"arxiv_version","alias_value":"1812.03381v1","created_at":"2026-05-17T23:58:46Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1812.03381","created_at":"2026-05-17T23:58:46Z"},{"alias_kind":"pith_short_12","alias_value":"LSJVVM2BAMDF","created_at":"2026-05-18T12:32:37Z"},{"alias_kind":"pith_short_16","alias_value":"LSJVVM2BAMDFOBIW","created_at":"2026-05-18T12:32:37Z"},{"alias_kind":"pith_short_8","alias_value":"LSJVVM2B","created_at":"2026-05-18T12:32:37Z"}],"graph_snapshots":[{"event_id":"sha256:537f8a230e863ed87c7a3b42130f0a2bf9fa66609c5accb5deb9eff62db893e1","target":"graph","created_at":"2026-05-17T23:58:46Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We propose a new method for learning from a single demonstration to solve hard exploration tasks like the Atari game Montezuma's Revenge. Instead of imitating human demonstrations, as proposed in other recent works, our approach is to maximize rewards directly. Our agent is trained using off-the-shelf reinforcement learning, but starts every episode by resetting to a state from a demonstration. By starting from such demonstration states, the agent requires much less exploration to learn a game compared to when it starts from the beginning of the game at every episode. We analyze reinforcement ","authors_text":"Richard Chen, Tim Salimans","cross_cats":["cs.AI","cs.NE","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-12-08T20:16:16Z","title":"Learning Montezuma's Revenge from a Single Demonstration"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1812.03381","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:c3775dbc4d45363d878da70116757176c761d594cc5e2fd34e28acd4124d4b73","target":"record","created_at":"2026-05-17T23:58:46Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"beb8f9be761a911387cf528326b4148e2b88c1e801988656d9b30a154237fe95","cross_cats_sorted":["cs.AI","cs.NE","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-12-08T20:16:16Z","title_canon_sha256":"2a2e9ea613d8670dfef47d18bc9f0969fa15b14c4bf94d77e99b240d0bf01e68"},"schema_version":"1.0","source":{"id":"1812.03381","kind":"arxiv","version":1}},"canonical_sha256":"5c935ab34103065705169b5d87c9775ce675c754b8c14eff351c49c9330b288a","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"5c935ab34103065705169b5d87c9775ce675c754b8c14eff351c49c9330b288a","first_computed_at":"2026-05-17T23:58:46.402195Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:58:46.402195Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"DLzovSXbF+f9BavzOTnmUZPJ2Oa8bfc17oB546rs570CBF/Eub474OrXeeZvcqOe9YT7oR+8dr5dWZ0Fy0sbBQ==","signature_status":"signed_v1","signed_at":"2026-05-17T23:58:46.402827Z","signed_message":"canonical_sha256_bytes"},"source_id":"1812.03381","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:c3775dbc4d45363d878da70116757176c761d594cc5e2fd34e28acd4124d4b73","sha256:537f8a230e863ed87c7a3b42130f0a2bf9fa66609c5accb5deb9eff62db893e1"],"state_sha256":"9dc8f69e9b570d0395f568e9ab9012373e2e09817e6d60c7ae0bcc8a287118df"}