{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:ARC2YQRYPS2PF2SH5QHSZCWITE","short_pith_number":"pith:ARC2YQRY","schema_version":"1.0","canonical_sha256":"0445ac42387cb4f2ea47ec0f2c8ac89900fb47a7c935c9ada0f0ab785505e955","source":{"kind":"arxiv","id":"1807.04742","version":2},"attestation_state":"computed","paper":{"title":"Visual Reinforcement Learning with Imagined Goals","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CV","cs.RO","stat.ML"],"primary_cat":"cs.LG","authors_text":"Ashvin Nair, Murtaza Dalal, Sergey Levine, Shikhar Bahl, Steven Lin, Vitchyr Pong","submitted_at":"2018-07-12T17:51:16Z","abstract_excerpt":"For an autonomous agent to fulfill a wide range of user-specified goals at test time, it must be able to learn broadly applicable and general-purpose skill repertoires. Furthermore, to provide the requisite level of generality, these skills must handle raw sensory input such as images. In this paper, we propose an algorithm that acquires such general-purpose skills by combining unsupervised representation learning and reinforcement learning of goal-conditioned policies. Since the particular goals that might be required at test-time are not known in advance, the agent performs a self-supervised"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1807.04742","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-07-12T17:51:16Z","cross_cats_sorted":["cs.CV","cs.RO","stat.ML"],"title_canon_sha256":"dc56d94c4334e95b2f2ba5db058048fcbdfe6a63e650884fc1378ae23b0e557a","abstract_canon_sha256":"f369cf5237e888ac876267a032adac7c8bb1911548ef036267a043adbc009f1c"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:59:16.705731Z","signature_b64":"TY9s4HlHRBD621qylGW+34wqzJ0JKT6wfH8qCanhm1jzM0YeTVCBD/JPhOV6lfw6mCxLvqt20+x9CkCb+aMtDA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"0445ac42387cb4f2ea47ec0f2c8ac89900fb47a7c935c9ada0f0ab785505e955","last_reissued_at":"2026-05-17T23:59:16.705092Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:59:16.705092Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Visual Reinforcement Learning with Imagined Goals","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CV","cs.RO","stat.ML"],"primary_cat":"cs.LG","authors_text":"Ashvin Nair, Murtaza Dalal, Sergey Levine, Shikhar Bahl, Steven Lin, Vitchyr Pong","submitted_at":"2018-07-12T17:51:16Z","abstract_excerpt":"For an autonomous agent to fulfill a wide range of user-specified goals at test time, it must be able to learn broadly applicable and general-purpose skill repertoires. Furthermore, to provide the requisite level of generality, these skills must handle raw sensory input such as images. In this paper, we propose an algorithm that acquires such general-purpose skills by combining unsupervised representation learning and reinforcement learning of goal-conditioned policies. Since the particular goals that might be required at test-time are not known in advance, the agent performs a self-supervised"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1807.04742","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1807.04742","created_at":"2026-05-17T23:59:16.705189+00:00"},{"alias_kind":"arxiv_version","alias_value":"1807.04742v2","created_at":"2026-05-17T23:59:16.705189+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1807.04742","created_at":"2026-05-17T23:59:16.705189+00:00"},{"alias_kind":"pith_short_12","alias_value":"ARC2YQRYPS2P","created_at":"2026-05-18T12:32:13.499390+00:00"},{"alias_kind":"pith_short_16","alias_value":"ARC2YQRYPS2PF2SH","created_at":"2026-05-18T12:32:13.499390+00:00"},{"alias_kind":"pith_short_8","alias_value":"ARC2YQRY","created_at":"2026-05-18T12:32:13.499390+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":3,"internal_anchor_count":2,"sample":[{"citing_arxiv_id":"2605.12651","citing_title":"Runtime Monitoring of Perception-Based Autonomous Systems via Embedding Temporal Logic","ref_index":138,"is_internal_anchor":true},{"citing_arxiv_id":"2605.12651","citing_title":"Runtime Monitoring of Perception-Based Autonomous Systems via Embedding Temporal Logic","ref_index":138,"is_internal_anchor":true},{"citing_arxiv_id":"2605.01242","citing_title":"Breaking the Computational Barrier: Provably Efficient Actor-Critic for Low-Rank MDPs","ref_index":58,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/ARC2YQRYPS2PF2SH5QHSZCWITE","json":"https://pith.science/pith/ARC2YQRYPS2PF2SH5QHSZCWITE.json","graph_json":"https://pith.science/api/pith-number/ARC2YQRYPS2PF2SH5QHSZCWITE/graph.json","events_json":"https://pith.science/api/pith-number/ARC2YQRYPS2PF2SH5QHSZCWITE/events.json","paper":"https://pith.science/paper/ARC2YQRY"},"agent_actions":{"view_html":"https://pith.science/pith/ARC2YQRYPS2PF2SH5QHSZCWITE","download_json":"https://pith.science/pith/ARC2YQRYPS2PF2SH5QHSZCWITE.json","view_paper":"https://pith.science/paper/ARC2YQRY","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1807.04742&json=true","fetch_graph":"https://pith.science/api/pith-number/ARC2YQRYPS2PF2SH5QHSZCWITE/graph.json","fetch_events":"https://pith.science/api/pith-number/ARC2YQRYPS2PF2SH5QHSZCWITE/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/ARC2YQRYPS2PF2SH5QHSZCWITE/action/timestamp_anchor","attest_storage":"https://pith.science/pith/ARC2YQRYPS2PF2SH5QHSZCWITE/action/storage_attestation","attest_author":"https://pith.science/pith/ARC2YQRYPS2PF2SH5QHSZCWITE/action/author_attestation","sign_citation":"https://pith.science/pith/ARC2YQRYPS2PF2SH5QHSZCWITE/action/citation_signature","submit_replication":"https://pith.science/pith/ARC2YQRYPS2PF2SH5QHSZCWITE/action/replication_record"}},"created_at":"2026-05-17T23:59:16.705189+00:00","updated_at":"2026-05-17T23:59:16.705189+00:00"}