{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2019:F2FQECM6UVM3MXQXMMUXU7IZQZ","short_pith_number":"pith:F2FQECM6","schema_version":"1.0","canonical_sha256":"2e8b02099ea559b65e1763297a7d1986460aa73a2ee2d55e18f5d1a9322c0fa0","source":{"kind":"arxiv","id":"1901.03559","version":2},"attestation_state":"computed","paper":{"title":"An investigation of model-free planning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Adam Santoro, Arthur Guez, David Raposo, David Silver, Greg Wayne, Karol Gregor, Laurent Orseau, Mehdi Mirza, Rishabh Kabra, S\\'ebastien Racani\\`ere, Th\\'eophane Weber, Timothy Lillicrap, Tom Eccles","submitted_at":"2019-01-11T11:42:51Z","abstract_excerpt":"The field of reinforcement learning (RL) is facing increasingly challenging domains with combinatorial complexity. For an RL agent to address these challenges, it is essential that it can plan effectively. Prior work has typically utilized an explicit model of the environment, combined with a specific planning algorithm (such as tree search). More recently, a new family of methods have been proposed that learn how to plan, by providing the structure for planning via an inductive bias in the function approximator (such as a tree structured neural network), trained end-to-end by a model-free RL "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1901.03559","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-01-11T11:42:51Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"9d6380f4d5398e25c0941b262596c966de2a81a3a8d92cf77c72321cbf334c59","abstract_canon_sha256":"22fd62daa0362e78ef7e09add4dd30d6ff4598347ca8e8c91f98ba8302c8f85c"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:45:52.294625Z","signature_b64":"ZTMb4MeStTXNvRYw/Sd2TdSU8kYxXcmhfpMjHEBo8g6MhCrTYKV+9kpHl3VOQpBx1ytI9O5+LC2UNxRZWTI7Aw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"2e8b02099ea559b65e1763297a7d1986460aa73a2ee2d55e18f5d1a9322c0fa0","last_reissued_at":"2026-05-17T23:45:52.294178Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:45:52.294178Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"An investigation of model-free planning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Adam Santoro, Arthur Guez, David Raposo, David Silver, Greg Wayne, Karol Gregor, Laurent Orseau, Mehdi Mirza, Rishabh Kabra, S\\'ebastien Racani\\`ere, Th\\'eophane Weber, Timothy Lillicrap, Tom Eccles","submitted_at":"2019-01-11T11:42:51Z","abstract_excerpt":"The field of reinforcement learning (RL) is facing increasingly challenging domains with combinatorial complexity. For an RL agent to address these challenges, it is essential that it can plan effectively. Prior work has typically utilized an explicit model of the environment, combined with a specific planning algorithm (such as tree search). More recently, a new family of methods have been proposed that learn how to plan, by providing the structure for planning via an inductive bias in the function approximator (such as a tree structured neural network), trained end-to-end by a model-free RL "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1901.03559","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1901.03559","created_at":"2026-05-17T23:45:52.294244+00:00"},{"alias_kind":"arxiv_version","alias_value":"1901.03559v2","created_at":"2026-05-17T23:45:52.294244+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1901.03559","created_at":"2026-05-17T23:45:52.294244+00:00"},{"alias_kind":"pith_short_12","alias_value":"F2FQECM6UVM3","created_at":"2026-05-18T12:33:15.570797+00:00"},{"alias_kind":"pith_short_16","alias_value":"F2FQECM6UVM3MXQX","created_at":"2026-05-18T12:33:15.570797+00:00"},{"alias_kind":"pith_short_8","alias_value":"F2FQECM6","created_at":"2026-05-18T12:33:15.570797+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/F2FQECM6UVM3MXQXMMUXU7IZQZ","json":"https://pith.science/pith/F2FQECM6UVM3MXQXMMUXU7IZQZ.json","graph_json":"https://pith.science/api/pith-number/F2FQECM6UVM3MXQXMMUXU7IZQZ/graph.json","events_json":"https://pith.science/api/pith-number/F2FQECM6UVM3MXQXMMUXU7IZQZ/events.json","paper":"https://pith.science/paper/F2FQECM6"},"agent_actions":{"view_html":"https://pith.science/pith/F2FQECM6UVM3MXQXMMUXU7IZQZ","download_json":"https://pith.science/pith/F2FQECM6UVM3MXQXMMUXU7IZQZ.json","view_paper":"https://pith.science/paper/F2FQECM6","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1901.03559&json=true","fetch_graph":"https://pith.science/api/pith-number/F2FQECM6UVM3MXQXMMUXU7IZQZ/graph.json","fetch_events":"https://pith.science/api/pith-number/F2FQECM6UVM3MXQXMMUXU7IZQZ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/F2FQECM6UVM3MXQXMMUXU7IZQZ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/F2FQECM6UVM3MXQXMMUXU7IZQZ/action/storage_attestation","attest_author":"https://pith.science/pith/F2FQECM6UVM3MXQXMMUXU7IZQZ/action/author_attestation","sign_citation":"https://pith.science/pith/F2FQECM6UVM3MXQXMMUXU7IZQZ/action/citation_signature","submit_replication":"https://pith.science/pith/F2FQECM6UVM3MXQXMMUXU7IZQZ/action/replication_record"}},"created_at":"2026-05-17T23:45:52.294244+00:00","updated_at":"2026-05-17T23:45:52.294244+00:00"}