{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2015:CO3SRGKYTPPBDPAQA2SI2JZGXH","short_pith_number":"pith:CO3SRGKY","schema_version":"1.0","canonical_sha256":"13b72899589bde11bc1006a48d2726b9e6416713c1c5efa2ae850e7f7986005e","source":{"kind":"arxiv","id":"1510.02874","version":1},"attestation_state":"computed","paper":{"title":"TSEB: More Efficient Thompson Sampling for Policy Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Balaraman Ravindran, P. Prasanna, Sarath Chandar","submitted_at":"2015-10-10T04:16:08Z","abstract_excerpt":"In model-based solution approaches to the problem of learning in an unknown environment, exploring to learn the model parameters takes a toll on the regret. The optimal performance with respect to regret or PAC bounds is achievable, if the algorithm exploits with respect to reward or explores with respect to the model parameters, respectively. In this paper, we propose TSEB, a Thompson Sampling based algorithm with adaptive exploration bonus that aims to solve the problem with tighter PAC guarantees, while being cautious on the regret as well. The proposed approach maintains distributions over"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1510.02874","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2015-10-10T04:16:08Z","cross_cats_sorted":[],"title_canon_sha256":"d370ce1b7478adf01b7483d62a008d5ff21b5b6bade1f7a424159ed25915f96d","abstract_canon_sha256":"8923ee0129bfeba63a0badfc05a87aac4146e154fa077e5d259c570978e0817e"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:30:33.510223Z","signature_b64":"fyhh7xrAO1BzsTGNdu7T9ou8ajjMkcRjJTZlw1ucSmE7HrYHAcMUy7YjF75Jwc0db1hE4n9MVmBXZZK2nejQBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"13b72899589bde11bc1006a48d2726b9e6416713c1c5efa2ae850e7f7986005e","last_reissued_at":"2026-05-18T01:30:33.509599Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:30:33.509599Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"TSEB: More Efficient Thompson Sampling for Policy Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Balaraman Ravindran, P. Prasanna, Sarath Chandar","submitted_at":"2015-10-10T04:16:08Z","abstract_excerpt":"In model-based solution approaches to the problem of learning in an unknown environment, exploring to learn the model parameters takes a toll on the regret. The optimal performance with respect to regret or PAC bounds is achievable, if the algorithm exploits with respect to reward or explores with respect to the model parameters, respectively. In this paper, we propose TSEB, a Thompson Sampling based algorithm with adaptive exploration bonus that aims to solve the problem with tighter PAC guarantees, while being cautious on the regret as well. The proposed approach maintains distributions over"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1510.02874","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1510.02874","created_at":"2026-05-18T01:30:33.509686+00:00"},{"alias_kind":"arxiv_version","alias_value":"1510.02874v1","created_at":"2026-05-18T01:30:33.509686+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1510.02874","created_at":"2026-05-18T01:30:33.509686+00:00"},{"alias_kind":"pith_short_12","alias_value":"CO3SRGKYTPPB","created_at":"2026-05-18T12:29:17.054201+00:00"},{"alias_kind":"pith_short_16","alias_value":"CO3SRGKYTPPBDPAQ","created_at":"2026-05-18T12:29:17.054201+00:00"},{"alias_kind":"pith_short_8","alias_value":"CO3SRGKY","created_at":"2026-05-18T12:29:17.054201+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/CO3SRGKYTPPBDPAQA2SI2JZGXH","json":"https://pith.science/pith/CO3SRGKYTPPBDPAQA2SI2JZGXH.json","graph_json":"https://pith.science/api/pith-number/CO3SRGKYTPPBDPAQA2SI2JZGXH/graph.json","events_json":"https://pith.science/api/pith-number/CO3SRGKYTPPBDPAQA2SI2JZGXH/events.json","paper":"https://pith.science/paper/CO3SRGKY"},"agent_actions":{"view_html":"https://pith.science/pith/CO3SRGKYTPPBDPAQA2SI2JZGXH","download_json":"https://pith.science/pith/CO3SRGKYTPPBDPAQA2SI2JZGXH.json","view_paper":"https://pith.science/paper/CO3SRGKY","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1510.02874&json=true","fetch_graph":"https://pith.science/api/pith-number/CO3SRGKYTPPBDPAQA2SI2JZGXH/graph.json","fetch_events":"https://pith.science/api/pith-number/CO3SRGKYTPPBDPAQA2SI2JZGXH/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/CO3SRGKYTPPBDPAQA2SI2JZGXH/action/timestamp_anchor","attest_storage":"https://pith.science/pith/CO3SRGKYTPPBDPAQA2SI2JZGXH/action/storage_attestation","attest_author":"https://pith.science/pith/CO3SRGKYTPPBDPAQA2SI2JZGXH/action/author_attestation","sign_citation":"https://pith.science/pith/CO3SRGKYTPPBDPAQA2SI2JZGXH/action/citation_signature","submit_replication":"https://pith.science/pith/CO3SRGKYTPPBDPAQA2SI2JZGXH/action/replication_record"}},"created_at":"2026-05-18T01:30:33.509686+00:00","updated_at":"2026-05-18T01:30:33.509686+00:00"}