{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:2FODTPQPQWGVBOPJFNHT7G6XAM","short_pith_number":"pith:2FODTPQP","schema_version":"1.0","canonical_sha256":"d15c39be0f858d50b9e92b4f3f9bd70300cda031e6ed36483944b7d143fa938f","source":{"kind":"arxiv","id":"1808.09819","version":2},"attestation_state":"computed","paper":{"title":"Approximate Exploration through State Abstraction","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Aaron Courville, Adrien Ali Ta\\\"iga, Marc G. Bellemare","submitted_at":"2018-08-29T13:41:33Z","abstract_excerpt":"Although exploration in reinforcement learning is well understood from a theoretical point of view, provably correct methods remain impractical. In this paper we study the interplay between exploration and approximation, what we call approximate exploration. Our main goal is to further our theoretical understanding of pseudo-count based exploration bonuses (Bellemare et al., 2016), a practical exploration scheme based on density modelling. As a warm-up, we quantify the performance of an exploration algorithm, MBIE-EB (Strehl and Littman, 2008), when explicitly combined with state aggregation. "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1808.09819","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-08-29T13:41:33Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"a9b724ef3823abc8c3d0d4686665705e53e02c783124f946c8f3c11985fd5f5d","abstract_canon_sha256":"a0bbb6d697b752783d856c34f896df1332337c6d68ad94fedf54980feeb33318"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:55:38.401885Z","signature_b64":"0HGlom79LI9jyd10MIO/WCeTqfhKp6rHh6ys/kd1BDfyEqNIkTjYQhrqdd2S8ESdwq9EhM+3Vk1u8y0+q4EaDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"d15c39be0f858d50b9e92b4f3f9bd70300cda031e6ed36483944b7d143fa938f","last_reissued_at":"2026-05-17T23:55:38.401338Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:55:38.401338Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Approximate Exploration through State Abstraction","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Aaron Courville, Adrien Ali Ta\\\"iga, Marc G. Bellemare","submitted_at":"2018-08-29T13:41:33Z","abstract_excerpt":"Although exploration in reinforcement learning is well understood from a theoretical point of view, provably correct methods remain impractical. In this paper we study the interplay between exploration and approximation, what we call approximate exploration. Our main goal is to further our theoretical understanding of pseudo-count based exploration bonuses (Bellemare et al., 2016), a practical exploration scheme based on density modelling. As a warm-up, we quantify the performance of an exploration algorithm, MBIE-EB (Strehl and Littman, 2008), when explicitly combined with state aggregation. "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1808.09819","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1808.09819","created_at":"2026-05-17T23:55:38.401419+00:00"},{"alias_kind":"arxiv_version","alias_value":"1808.09819v2","created_at":"2026-05-17T23:55:38.401419+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1808.09819","created_at":"2026-05-17T23:55:38.401419+00:00"},{"alias_kind":"pith_short_12","alias_value":"2FODTPQPQWGV","created_at":"2026-05-18T12:32:02.567920+00:00"},{"alias_kind":"pith_short_16","alias_value":"2FODTPQPQWGVBOPJ","created_at":"2026-05-18T12:32:02.567920+00:00"},{"alias_kind":"pith_short_8","alias_value":"2FODTPQP","created_at":"2026-05-18T12:32:02.567920+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/2FODTPQPQWGVBOPJFNHT7G6XAM","json":"https://pith.science/pith/2FODTPQPQWGVBOPJFNHT7G6XAM.json","graph_json":"https://pith.science/api/pith-number/2FODTPQPQWGVBOPJFNHT7G6XAM/graph.json","events_json":"https://pith.science/api/pith-number/2FODTPQPQWGVBOPJFNHT7G6XAM/events.json","paper":"https://pith.science/paper/2FODTPQP"},"agent_actions":{"view_html":"https://pith.science/pith/2FODTPQPQWGVBOPJFNHT7G6XAM","download_json":"https://pith.science/pith/2FODTPQPQWGVBOPJFNHT7G6XAM.json","view_paper":"https://pith.science/paper/2FODTPQP","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1808.09819&json=true","fetch_graph":"https://pith.science/api/pith-number/2FODTPQPQWGVBOPJFNHT7G6XAM/graph.json","fetch_events":"https://pith.science/api/pith-number/2FODTPQPQWGVBOPJFNHT7G6XAM/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/2FODTPQPQWGVBOPJFNHT7G6XAM/action/timestamp_anchor","attest_storage":"https://pith.science/pith/2FODTPQPQWGVBOPJFNHT7G6XAM/action/storage_attestation","attest_author":"https://pith.science/pith/2FODTPQPQWGVBOPJFNHT7G6XAM/action/author_attestation","sign_citation":"https://pith.science/pith/2FODTPQPQWGVBOPJFNHT7G6XAM/action/citation_signature","submit_replication":"https://pith.science/pith/2FODTPQPQWGVBOPJFNHT7G6XAM/action/replication_record"}},"created_at":"2026-05-17T23:55:38.401419+00:00","updated_at":"2026-05-17T23:55:38.401419+00:00"}