{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:O7LW4JMU4SULLTSUPFBCAG4MUV","short_pith_number":"pith:O7LW4JMU","schema_version":"1.0","canonical_sha256":"77d76e2594e4a8b5ce547942201b8ca550ec2ff2bef1fbfd6ae599257f7bf19c","source":{"kind":"arxiv","id":"1802.07245","version":1},"attestation_state":"computed","paper":{"title":"Meta-Reinforcement Learning of Structured Exploration Strategies","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.NE"],"primary_cat":"cs.LG","authors_text":"Abhishek Gupta, Pieter Abbeel, Russell Mendonca, Sergey Levine, Yuxuan Liu","submitted_at":"2018-02-20T18:40:57Z","abstract_excerpt":"Exploration is a fundamental challenge in reinforcement learning (RL). Many of the current exploration methods for deep RL use task-agnostic objectives, such as information gain or bonuses based on state visitation. However, many practical applications of RL involve learning more than a single task, and prior tasks can be used to inform how exploration should be performed in new tasks. In this work, we explore how prior tasks can inform an agent about how to explore effectively in new situations. We introduce a novel gradient-based fast adaptation algorithm -- model agnostic exploration with s"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1802.07245","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-02-20T18:40:57Z","cross_cats_sorted":["cs.AI","cs.NE"],"title_canon_sha256":"e4657d6b71435b47c9a8ab90862f428f69fbfec11d365866cb6c725a00894ce6","abstract_canon_sha256":"ad1f0988e7671ba7ab88ea241cf18532fd42ad70cd89f3e2058cc14198f7b62f"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:22:53.209938Z","signature_b64":"iWyADwtkzrJk7iBBonp3V9Z4VA1BWgrxkCaVKSuh8saYK/MTvCl4twaceiJS3jmo7gdWfN9DqTOY9hCFIuFvAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"77d76e2594e4a8b5ce547942201b8ca550ec2ff2bef1fbfd6ae599257f7bf19c","last_reissued_at":"2026-05-18T00:22:53.209458Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:22:53.209458Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Meta-Reinforcement Learning of Structured Exploration Strategies","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.NE"],"primary_cat":"cs.LG","authors_text":"Abhishek Gupta, Pieter Abbeel, Russell Mendonca, Sergey Levine, Yuxuan Liu","submitted_at":"2018-02-20T18:40:57Z","abstract_excerpt":"Exploration is a fundamental challenge in reinforcement learning (RL). Many of the current exploration methods for deep RL use task-agnostic objectives, such as information gain or bonuses based on state visitation. However, many practical applications of RL involve learning more than a single task, and prior tasks can be used to inform how exploration should be performed in new tasks. In this work, we explore how prior tasks can inform an agent about how to explore effectively in new situations. We introduce a novel gradient-based fast adaptation algorithm -- model agnostic exploration with s"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1802.07245","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1802.07245","created_at":"2026-05-18T00:22:53.209522+00:00"},{"alias_kind":"arxiv_version","alias_value":"1802.07245v1","created_at":"2026-05-18T00:22:53.209522+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1802.07245","created_at":"2026-05-18T00:22:53.209522+00:00"},{"alias_kind":"pith_short_12","alias_value":"O7LW4JMU4SUL","created_at":"2026-05-18T12:32:43.782077+00:00"},{"alias_kind":"pith_short_16","alias_value":"O7LW4JMU4SULLTSU","created_at":"2026-05-18T12:32:43.782077+00:00"},{"alias_kind":"pith_short_8","alias_value":"O7LW4JMU","created_at":"2026-05-18T12:32:43.782077+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":4,"internal_anchor_count":2,"sample":[{"citing_arxiv_id":"1906.09223","citing_title":"Disentangled Skill Embeddings for Reinforcement Learning","ref_index":19,"is_internal_anchor":true},{"citing_arxiv_id":"2605.20189","citing_title":"SOLAR: A Self-Optimizing Open-Ended Autonomous Agent for Lifelong Learning and Continual Adaptation","ref_index":24,"is_internal_anchor":true},{"citing_arxiv_id":"1805.00909","citing_title":"Reinforcement Learning and Control as Probabilistic Inference: Tutorial and Review","ref_index":13,"is_internal_anchor":false},{"citing_arxiv_id":"2605.02300","citing_title":"A Meta Reinforcement Learning Approach to Goals-Based Wealth Management","ref_index":85,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/O7LW4JMU4SULLTSUPFBCAG4MUV","json":"https://pith.science/pith/O7LW4JMU4SULLTSUPFBCAG4MUV.json","graph_json":"https://pith.science/api/pith-number/O7LW4JMU4SULLTSUPFBCAG4MUV/graph.json","events_json":"https://pith.science/api/pith-number/O7LW4JMU4SULLTSUPFBCAG4MUV/events.json","paper":"https://pith.science/paper/O7LW4JMU"},"agent_actions":{"view_html":"https://pith.science/pith/O7LW4JMU4SULLTSUPFBCAG4MUV","download_json":"https://pith.science/pith/O7LW4JMU4SULLTSUPFBCAG4MUV.json","view_paper":"https://pith.science/paper/O7LW4JMU","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1802.07245&json=true","fetch_graph":"https://pith.science/api/pith-number/O7LW4JMU4SULLTSUPFBCAG4MUV/graph.json","fetch_events":"https://pith.science/api/pith-number/O7LW4JMU4SULLTSUPFBCAG4MUV/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/O7LW4JMU4SULLTSUPFBCAG4MUV/action/timestamp_anchor","attest_storage":"https://pith.science/pith/O7LW4JMU4SULLTSUPFBCAG4MUV/action/storage_attestation","attest_author":"https://pith.science/pith/O7LW4JMU4SULLTSUPFBCAG4MUV/action/author_attestation","sign_citation":"https://pith.science/pith/O7LW4JMU4SULLTSUPFBCAG4MUV/action/citation_signature","submit_replication":"https://pith.science/pith/O7LW4JMU4SULLTSUPFBCAG4MUV/action/replication_record"}},"created_at":"2026-05-18T00:22:53.209522+00:00","updated_at":"2026-05-18T00:22:53.209522+00:00"}