{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:K24JGMXPM5SQXZX3TENPV2IEM6","short_pith_number":"pith:K24JGMXP","schema_version":"1.0","canonical_sha256":"56b89332ef67650be6fb991afae90467ad566931673aa5ea33d19f0cb4ce7229","source":{"kind":"arxiv","id":"1709.07796","version":2},"attestation_state":"computed","paper":{"title":"On overfitting and asymptotic bias in batch reinforcement learning with partial observability","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.LG"],"primary_cat":"stat.ML","authors_text":"Damien Ernst, Guillaume Rabusseau, Joelle Pineau, Raphael Fonteneau, Vincent Francois-Lavet","submitted_at":"2017-09-22T14:56:35Z","abstract_excerpt":"This paper provides an analysis of the tradeoff between asymptotic bias (suboptimality with unlimited data) and overfitting (additional suboptimality due to limited data) in the context of reinforcement learning with partial observability. Our theoretical analysis formally characterizes that while potentially increasing the asymptotic bias, a smaller state representation decreases the risk of overfitting. This analysis relies on expressing the quality of a state representation by bounding L1 error terms of the associated belief states. Theoretical results are empirically illustrated when the s"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1709.07796","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2017-09-22T14:56:35Z","cross_cats_sorted":["cs.AI","cs.LG"],"title_canon_sha256":"9d6d16a12efccba1b3fbf5ff5c07a99320a8a0424e5a8f45f27f6905e477d914","abstract_canon_sha256":"a8b950fc4144c6c24344c0199c739131462e53a1de18cee26ac8cbac94870610"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:54:41.769360Z","signature_b64":"OxOhFuG5orD5C27lTMlB71Pyj0JiX79IIowLh3ckRcWZEkv9Be2W10xCy8i4BAUlYGmnKw2m5zlhDGWzHyD7AQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"56b89332ef67650be6fb991afae90467ad566931673aa5ea33d19f0cb4ce7229","last_reissued_at":"2026-05-17T23:54:41.768858Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:54:41.768858Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"On overfitting and asymptotic bias in batch reinforcement learning with partial observability","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.LG"],"primary_cat":"stat.ML","authors_text":"Damien Ernst, Guillaume Rabusseau, Joelle Pineau, Raphael Fonteneau, Vincent Francois-Lavet","submitted_at":"2017-09-22T14:56:35Z","abstract_excerpt":"This paper provides an analysis of the tradeoff between asymptotic bias (suboptimality with unlimited data) and overfitting (additional suboptimality due to limited data) in the context of reinforcement learning with partial observability. Our theoretical analysis formally characterizes that while potentially increasing the asymptotic bias, a smaller state representation decreases the risk of overfitting. This analysis relies on expressing the quality of a state representation by bounding L1 error terms of the associated belief states. Theoretical results are empirically illustrated when the s"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1709.07796","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1709.07796","created_at":"2026-05-17T23:54:41.768935+00:00"},{"alias_kind":"arxiv_version","alias_value":"1709.07796v2","created_at":"2026-05-17T23:54:41.768935+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1709.07796","created_at":"2026-05-17T23:54:41.768935+00:00"},{"alias_kind":"pith_short_12","alias_value":"K24JGMXPM5SQ","created_at":"2026-05-18T12:31:24.725408+00:00"},{"alias_kind":"pith_short_16","alias_value":"K24JGMXPM5SQXZX3","created_at":"2026-05-18T12:31:24.725408+00:00"},{"alias_kind":"pith_short_8","alias_value":"K24JGMXP","created_at":"2026-05-18T12:31:24.725408+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/K24JGMXPM5SQXZX3TENPV2IEM6","json":"https://pith.science/pith/K24JGMXPM5SQXZX3TENPV2IEM6.json","graph_json":"https://pith.science/api/pith-number/K24JGMXPM5SQXZX3TENPV2IEM6/graph.json","events_json":"https://pith.science/api/pith-number/K24JGMXPM5SQXZX3TENPV2IEM6/events.json","paper":"https://pith.science/paper/K24JGMXP"},"agent_actions":{"view_html":"https://pith.science/pith/K24JGMXPM5SQXZX3TENPV2IEM6","download_json":"https://pith.science/pith/K24JGMXPM5SQXZX3TENPV2IEM6.json","view_paper":"https://pith.science/paper/K24JGMXP","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1709.07796&json=true","fetch_graph":"https://pith.science/api/pith-number/K24JGMXPM5SQXZX3TENPV2IEM6/graph.json","fetch_events":"https://pith.science/api/pith-number/K24JGMXPM5SQXZX3TENPV2IEM6/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/K24JGMXPM5SQXZX3TENPV2IEM6/action/timestamp_anchor","attest_storage":"https://pith.science/pith/K24JGMXPM5SQXZX3TENPV2IEM6/action/storage_attestation","attest_author":"https://pith.science/pith/K24JGMXPM5SQXZX3TENPV2IEM6/action/author_attestation","sign_citation":"https://pith.science/pith/K24JGMXPM5SQXZX3TENPV2IEM6/action/citation_signature","submit_replication":"https://pith.science/pith/K24JGMXPM5SQXZX3TENPV2IEM6/action/replication_record"}},"created_at":"2026-05-17T23:54:41.768935+00:00","updated_at":"2026-05-17T23:54:41.768935+00:00"}