{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2014:R437CDWGXIULEYFKUVZM4LHMF5","short_pith_number":"pith:R437CDWG","schema_version":"1.0","canonical_sha256":"8f37f10ec6ba28b260aaa572ce2cec2f501e7e59c27ade5991f18ceba833c685","source":{"kind":"arxiv","id":"1405.3316","version":2},"attestation_state":"computed","paper":{"title":"Optimal Exploration-Exploitation in a Multi-Armed-Bandit Problem with Non-stationary Rewards","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["math.OC","math.PR","stat.ML"],"primary_cat":"cs.LG","authors_text":"Assaf Zeevi, Omar Besbes, Yonatan Gur","submitted_at":"2014-05-13T22:15:06Z","abstract_excerpt":"In a multi-armed bandit (MAB) problem a gambler needs to choose at each round of play one of K arms, each characterized by an unknown reward distribution. Reward realizations are only observed when an arm is selected, and the gambler's objective is to maximize his cumulative expected earnings over some given horizon of play T. To do this, the gambler needs to acquire information about arms (exploration) while simultaneously optimizing immediate rewards (exploitation); the price paid due to this trade off is often referred to as the regret, and the main question is how small can this price be a"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1405.3316","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2014-05-13T22:15:06Z","cross_cats_sorted":["math.OC","math.PR","stat.ML"],"title_canon_sha256":"8bdd579dad52f09c46f0add9ddec0c5a1c58a937386c4756993384f5eff6b66d","abstract_canon_sha256":"4ef868d33e84b32b2cb32f3b59bc405ce02c1a2244412db3af5b05eb58421b91"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:43:54.496560Z","signature_b64":"Q7R5noKs2QeK5Vm4GrJieakFQkQuukuhymTzHOpWvBsMONluDwkscP7euKbIENCG3Gk5NVxXUw3Ng3XmVRcJBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"8f37f10ec6ba28b260aaa572ce2cec2f501e7e59c27ade5991f18ceba833c685","last_reissued_at":"2026-05-17T23:43:54.496028Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:43:54.496028Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Optimal Exploration-Exploitation in a Multi-Armed-Bandit Problem with Non-stationary Rewards","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["math.OC","math.PR","stat.ML"],"primary_cat":"cs.LG","authors_text":"Assaf Zeevi, Omar Besbes, Yonatan Gur","submitted_at":"2014-05-13T22:15:06Z","abstract_excerpt":"In a multi-armed bandit (MAB) problem a gambler needs to choose at each round of play one of K arms, each characterized by an unknown reward distribution. Reward realizations are only observed when an arm is selected, and the gambler's objective is to maximize his cumulative expected earnings over some given horizon of play T. To do this, the gambler needs to acquire information about arms (exploration) while simultaneously optimizing immediate rewards (exploitation); the price paid due to this trade off is often referred to as the regret, and the main question is how small can this price be a"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1405.3316","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1405.3316","created_at":"2026-05-17T23:43:54.496129+00:00"},{"alias_kind":"arxiv_version","alias_value":"1405.3316v2","created_at":"2026-05-17T23:43:54.496129+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1405.3316","created_at":"2026-05-17T23:43:54.496129+00:00"},{"alias_kind":"pith_short_12","alias_value":"R437CDWGXIUL","created_at":"2026-05-18T12:28:46.137349+00:00"},{"alias_kind":"pith_short_16","alias_value":"R437CDWGXIULEYFK","created_at":"2026-05-18T12:28:46.137349+00:00"},{"alias_kind":"pith_short_8","alias_value":"R437CDWG","created_at":"2026-05-18T12:28:46.137349+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/R437CDWGXIULEYFKUVZM4LHMF5","json":"https://pith.science/pith/R437CDWGXIULEYFKUVZM4LHMF5.json","graph_json":"https://pith.science/api/pith-number/R437CDWGXIULEYFKUVZM4LHMF5/graph.json","events_json":"https://pith.science/api/pith-number/R437CDWGXIULEYFKUVZM4LHMF5/events.json","paper":"https://pith.science/paper/R437CDWG"},"agent_actions":{"view_html":"https://pith.science/pith/R437CDWGXIULEYFKUVZM4LHMF5","download_json":"https://pith.science/pith/R437CDWGXIULEYFKUVZM4LHMF5.json","view_paper":"https://pith.science/paper/R437CDWG","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1405.3316&json=true","fetch_graph":"https://pith.science/api/pith-number/R437CDWGXIULEYFKUVZM4LHMF5/graph.json","fetch_events":"https://pith.science/api/pith-number/R437CDWGXIULEYFKUVZM4LHMF5/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/R437CDWGXIULEYFKUVZM4LHMF5/action/timestamp_anchor","attest_storage":"https://pith.science/pith/R437CDWGXIULEYFKUVZM4LHMF5/action/storage_attestation","attest_author":"https://pith.science/pith/R437CDWGXIULEYFKUVZM4LHMF5/action/author_attestation","sign_citation":"https://pith.science/pith/R437CDWGXIULEYFKUVZM4LHMF5/action/citation_signature","submit_replication":"https://pith.science/pith/R437CDWGXIULEYFKUVZM4LHMF5/action/replication_record"}},"created_at":"2026-05-17T23:43:54.496129+00:00","updated_at":"2026-05-17T23:43:54.496129+00:00"}