{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2013:32EPSIB7YJNRUMTBRN2FV355UI","short_pith_number":"pith:32EPSIB7","schema_version":"1.0","canonical_sha256":"de88f9203fc25b1a32618b745aefbda2083c7fb91f40da4e2b722b9b87dd87b7","source":{"kind":"arxiv","id":"1311.0466","version":1},"attestation_state":"computed","paper":{"title":"Thompson Sampling for Complex Bandit Problems","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"stat.ML","authors_text":"Aditya Gopalan, Shie Mannor, Yishay Mansour","submitted_at":"2013-11-03T13:51:55Z","abstract_excerpt":"We consider stochastic multi-armed bandit problems with complex actions over a set of basic arms, where the decision maker plays a complex action rather than a basic arm in each round. The reward of the complex action is some function of the basic arms' rewards, and the feedback observed may not necessarily be the reward per-arm. For instance, when the complex actions are subsets of the arms, we may only observe the maximum reward over the chosen subset. Thus, feedback across complex actions may be coupled due to the nature of the reward function. We prove a frequentist regret bound for Thomps"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1311.0466","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2013-11-03T13:51:55Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"532829dcba137dece484e427562e3ae3debb4af6fce37c75a8c028e9ac33b2b4","abstract_canon_sha256":"9f7e91c84b821b1c9c154c8f4b090094a41bbc11d4ffdb8e65409edb0db1c5b7"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T03:08:05.863208Z","signature_b64":"sj+zLnTlE8sMXNzh6/sEgoPk9ukrS4OPOyi+akSFVN3NnLXoK/9bDmtaMJ7gDgrrXv/6S13oYm/MT7/S9JmxAQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"de88f9203fc25b1a32618b745aefbda2083c7fb91f40da4e2b722b9b87dd87b7","last_reissued_at":"2026-05-18T03:08:05.862661Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T03:08:05.862661Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Thompson Sampling for Complex Bandit Problems","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"stat.ML","authors_text":"Aditya Gopalan, Shie Mannor, Yishay Mansour","submitted_at":"2013-11-03T13:51:55Z","abstract_excerpt":"We consider stochastic multi-armed bandit problems with complex actions over a set of basic arms, where the decision maker plays a complex action rather than a basic arm in each round. The reward of the complex action is some function of the basic arms' rewards, and the feedback observed may not necessarily be the reward per-arm. For instance, when the complex actions are subsets of the arms, we may only observe the maximum reward over the chosen subset. Thus, feedback across complex actions may be coupled due to the nature of the reward function. We prove a frequentist regret bound for Thomps"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1311.0466","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1311.0466","created_at":"2026-05-18T03:08:05.862743+00:00"},{"alias_kind":"arxiv_version","alias_value":"1311.0466v1","created_at":"2026-05-18T03:08:05.862743+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1311.0466","created_at":"2026-05-18T03:08:05.862743+00:00"},{"alias_kind":"pith_short_12","alias_value":"32EPSIB7YJNR","created_at":"2026-05-18T12:27:32.513160+00:00"},{"alias_kind":"pith_short_16","alias_value":"32EPSIB7YJNRUMTB","created_at":"2026-05-18T12:27:32.513160+00:00"},{"alias_kind":"pith_short_8","alias_value":"32EPSIB7","created_at":"2026-05-18T12:27:32.513160+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/32EPSIB7YJNRUMTBRN2FV355UI","json":"https://pith.science/pith/32EPSIB7YJNRUMTBRN2FV355UI.json","graph_json":"https://pith.science/api/pith-number/32EPSIB7YJNRUMTBRN2FV355UI/graph.json","events_json":"https://pith.science/api/pith-number/32EPSIB7YJNRUMTBRN2FV355UI/events.json","paper":"https://pith.science/paper/32EPSIB7"},"agent_actions":{"view_html":"https://pith.science/pith/32EPSIB7YJNRUMTBRN2FV355UI","download_json":"https://pith.science/pith/32EPSIB7YJNRUMTBRN2FV355UI.json","view_paper":"https://pith.science/paper/32EPSIB7","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1311.0466&json=true","fetch_graph":"https://pith.science/api/pith-number/32EPSIB7YJNRUMTBRN2FV355UI/graph.json","fetch_events":"https://pith.science/api/pith-number/32EPSIB7YJNRUMTBRN2FV355UI/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/32EPSIB7YJNRUMTBRN2FV355UI/action/timestamp_anchor","attest_storage":"https://pith.science/pith/32EPSIB7YJNRUMTBRN2FV355UI/action/storage_attestation","attest_author":"https://pith.science/pith/32EPSIB7YJNRUMTBRN2FV355UI/action/author_attestation","sign_citation":"https://pith.science/pith/32EPSIB7YJNRUMTBRN2FV355UI/action/citation_signature","submit_replication":"https://pith.science/pith/32EPSIB7YJNRUMTBRN2FV355UI/action/replication_record"}},"created_at":"2026-05-18T03:08:05.862743+00:00","updated_at":"2026-05-18T03:08:05.862743+00:00"}