{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:RGV6GGTR5VZXHVHJ44K6WX5RAH","short_pith_number":"pith:RGV6GGTR","schema_version":"1.0","canonical_sha256":"89abe31a71ed7373d4e9e715eb5fb101dbef6656146d1c01c8810fd211c54806","source":{"kind":"arxiv","id":"1805.10413","version":1},"attestation_state":"computed","paper":{"title":"Fast Policy Learning through Imitation and Reinforcement","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Byron Boots, Ching-An Cheng, Nolan Wagener, Xinyan Yan","submitted_at":"2018-05-26T02:18:01Z","abstract_excerpt":"Imitation learning (IL) consists of a set of tools that leverage expert demonstrations to quickly learn policies. However, if the expert is suboptimal, IL can yield policies with inferior performance compared to reinforcement learning (RL). In this paper, we aim to provide an algorithm that combines the best aspects of RL and IL. We accomplish this by formulating several popular RL and IL algorithms in a common mirror descent framework, showing that these algorithms can be viewed as a variation on a single approach. We then propose LOKI, a strategy for policy learning that first performs a sma"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1805.10413","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-05-26T02:18:01Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"88ccbeaf5c930dfb3913c82a110ded64fcc5831afc6d764f965606bd9adcaadf","abstract_canon_sha256":"5116c06369804a10314e1eae5e52daf804590b56e489ed98260646ced349b4d8"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:14:52.587417Z","signature_b64":"PSLiZImeBzPEtbqReU2c+F962d1AKqp8P/iVCXg9EXucbEcoq0CvtNcIEcPoUtiXDC25Kr9omYyyoLa9Lir7CA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"89abe31a71ed7373d4e9e715eb5fb101dbef6656146d1c01c8810fd211c54806","last_reissued_at":"2026-05-18T00:14:52.586721Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:14:52.586721Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Fast Policy Learning through Imitation and Reinforcement","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Byron Boots, Ching-An Cheng, Nolan Wagener, Xinyan Yan","submitted_at":"2018-05-26T02:18:01Z","abstract_excerpt":"Imitation learning (IL) consists of a set of tools that leverage expert demonstrations to quickly learn policies. However, if the expert is suboptimal, IL can yield policies with inferior performance compared to reinforcement learning (RL). In this paper, we aim to provide an algorithm that combines the best aspects of RL and IL. We accomplish this by formulating several popular RL and IL algorithms in a common mirror descent framework, showing that these algorithms can be viewed as a variation on a single approach. We then propose LOKI, a strategy for policy learning that first performs a sma"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1805.10413","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1805.10413","created_at":"2026-05-18T00:14:52.586816+00:00"},{"alias_kind":"arxiv_version","alias_value":"1805.10413v1","created_at":"2026-05-18T00:14:52.586816+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1805.10413","created_at":"2026-05-18T00:14:52.586816+00:00"},{"alias_kind":"pith_short_12","alias_value":"RGV6GGTR5VZX","created_at":"2026-05-18T12:32:50.500415+00:00"},{"alias_kind":"pith_short_16","alias_value":"RGV6GGTR5VZXHVHJ","created_at":"2026-05-18T12:32:50.500415+00:00"},{"alias_kind":"pith_short_8","alias_value":"RGV6GGTR","created_at":"2026-05-18T12:32:50.500415+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":2,"internal_anchor_count":2,"sample":[{"citing_arxiv_id":"1907.07238","citing_title":"Leveraging Experience in Lazy Search","ref_index":26,"is_internal_anchor":true},{"citing_arxiv_id":"2603.13842","citing_title":"Fine-tuning is Not Enough: A Parallel Framework for Collaborative Imitation and Reinforcement Learning in End-to-end Autonomous Driving","ref_index":7,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/RGV6GGTR5VZXHVHJ44K6WX5RAH","json":"https://pith.science/pith/RGV6GGTR5VZXHVHJ44K6WX5RAH.json","graph_json":"https://pith.science/api/pith-number/RGV6GGTR5VZXHVHJ44K6WX5RAH/graph.json","events_json":"https://pith.science/api/pith-number/RGV6GGTR5VZXHVHJ44K6WX5RAH/events.json","paper":"https://pith.science/paper/RGV6GGTR"},"agent_actions":{"view_html":"https://pith.science/pith/RGV6GGTR5VZXHVHJ44K6WX5RAH","download_json":"https://pith.science/pith/RGV6GGTR5VZXHVHJ44K6WX5RAH.json","view_paper":"https://pith.science/paper/RGV6GGTR","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1805.10413&json=true","fetch_graph":"https://pith.science/api/pith-number/RGV6GGTR5VZXHVHJ44K6WX5RAH/graph.json","fetch_events":"https://pith.science/api/pith-number/RGV6GGTR5VZXHVHJ44K6WX5RAH/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/RGV6GGTR5VZXHVHJ44K6WX5RAH/action/timestamp_anchor","attest_storage":"https://pith.science/pith/RGV6GGTR5VZXHVHJ44K6WX5RAH/action/storage_attestation","attest_author":"https://pith.science/pith/RGV6GGTR5VZXHVHJ44K6WX5RAH/action/author_attestation","sign_citation":"https://pith.science/pith/RGV6GGTR5VZXHVHJ44K6WX5RAH/action/citation_signature","submit_replication":"https://pith.science/pith/RGV6GGTR5VZXHVHJ44K6WX5RAH/action/replication_record"}},"created_at":"2026-05-18T00:14:52.586816+00:00","updated_at":"2026-05-18T00:14:52.586816+00:00"}