{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:OFCZ32NAQV7Z576DPQLT5ZQNL4","short_pith_number":"pith:OFCZ32NA","schema_version":"1.0","canonical_sha256":"71459de9a0857f9effc37c173ee60d5f1b8678edf2e4ae3a278a68f6eeaa2327","source":{"kind":"arxiv","id":"1806.05898","version":1},"attestation_state":"computed","paper":{"title":"Improving width-based planning with compact policies","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Anders Jonsson, Miquel Junyent, Vicen\\c{c} G\\'omez","submitted_at":"2018-06-15T10:41:23Z","abstract_excerpt":"Optimal action selection in decision problems characterized by sparse, delayed rewards is still an open challenge. For these problems, current deep reinforcement learning methods require enormous amounts of data to learn controllers that reach human-level performance. In this work, we propose a method that interleaves planning and learning to address this issue. The planning step hinges on the Iterated-Width (IW) planner, a state of the art planner that makes explicit use of the state representation to perform structured exploration. IW is able to scale up to problems independently of the size"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1806.05898","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-06-15T10:41:23Z","cross_cats_sorted":[],"title_canon_sha256":"d534f9d5ac20b910190d2d1b30394e27786df8470637f7fd30a00d5629868302","abstract_canon_sha256":"376bcae4502712e3ec622456dc6e380535049c34a1f7c347c96ecd7a85faaa07"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:13:09.118786Z","signature_b64":"VSKNhPDAbCKLMTbLuyNTLWIwWQbA6eSdve80YHn5DF5L1Kxrof9FNX96/rTVn5jKm32Qu72ERy0SqRRhZ9dmAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"71459de9a0857f9effc37c173ee60d5f1b8678edf2e4ae3a278a68f6eeaa2327","last_reissued_at":"2026-05-18T00:13:09.118200Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:13:09.118200Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Improving width-based planning with compact policies","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Anders Jonsson, Miquel Junyent, Vicen\\c{c} G\\'omez","submitted_at":"2018-06-15T10:41:23Z","abstract_excerpt":"Optimal action selection in decision problems characterized by sparse, delayed rewards is still an open challenge. For these problems, current deep reinforcement learning methods require enormous amounts of data to learn controllers that reach human-level performance. In this work, we propose a method that interleaves planning and learning to address this issue. The planning step hinges on the Iterated-Width (IW) planner, a state of the art planner that makes explicit use of the state representation to perform structured exploration. IW is able to scale up to problems independently of the size"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1806.05898","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1806.05898","created_at":"2026-05-18T00:13:09.118291+00:00"},{"alias_kind":"arxiv_version","alias_value":"1806.05898v1","created_at":"2026-05-18T00:13:09.118291+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1806.05898","created_at":"2026-05-18T00:13:09.118291+00:00"},{"alias_kind":"pith_short_12","alias_value":"OFCZ32NAQV7Z","created_at":"2026-05-18T12:32:43.782077+00:00"},{"alias_kind":"pith_short_16","alias_value":"OFCZ32NAQV7Z576D","created_at":"2026-05-18T12:32:43.782077+00:00"},{"alias_kind":"pith_short_8","alias_value":"OFCZ32NA","created_at":"2026-05-18T12:32:43.782077+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/OFCZ32NAQV7Z576DPQLT5ZQNL4","json":"https://pith.science/pith/OFCZ32NAQV7Z576DPQLT5ZQNL4.json","graph_json":"https://pith.science/api/pith-number/OFCZ32NAQV7Z576DPQLT5ZQNL4/graph.json","events_json":"https://pith.science/api/pith-number/OFCZ32NAQV7Z576DPQLT5ZQNL4/events.json","paper":"https://pith.science/paper/OFCZ32NA"},"agent_actions":{"view_html":"https://pith.science/pith/OFCZ32NAQV7Z576DPQLT5ZQNL4","download_json":"https://pith.science/pith/OFCZ32NAQV7Z576DPQLT5ZQNL4.json","view_paper":"https://pith.science/paper/OFCZ32NA","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1806.05898&json=true","fetch_graph":"https://pith.science/api/pith-number/OFCZ32NAQV7Z576DPQLT5ZQNL4/graph.json","fetch_events":"https://pith.science/api/pith-number/OFCZ32NAQV7Z576DPQLT5ZQNL4/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/OFCZ32NAQV7Z576DPQLT5ZQNL4/action/timestamp_anchor","attest_storage":"https://pith.science/pith/OFCZ32NAQV7Z576DPQLT5ZQNL4/action/storage_attestation","attest_author":"https://pith.science/pith/OFCZ32NAQV7Z576DPQLT5ZQNL4/action/author_attestation","sign_citation":"https://pith.science/pith/OFCZ32NAQV7Z576DPQLT5ZQNL4/action/citation_signature","submit_replication":"https://pith.science/pith/OFCZ32NAQV7Z576DPQLT5ZQNL4/action/replication_record"}},"created_at":"2026-05-18T00:13:09.118291+00:00","updated_at":"2026-05-18T00:13:09.118291+00:00"}