{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2015:SDYYAM4UQJEITPUFUSFKAXIAD4","short_pith_number":"pith:SDYYAM4U","schema_version":"1.0","canonical_sha256":"90f1803394824889be85a48aa05d001f148e5c4490963e9f68767ca76302c0c0","source":{"kind":"arxiv","id":"1505.00146","version":1},"attestation_state":"computed","paper":{"title":"Thompson Sampling for Budgeted Multi-armed Bandits","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Haifang Li, Nenghai Yu, Tao Qin, Tie-Yan Liu, Yingce Xia","submitted_at":"2015-05-01T10:35:35Z","abstract_excerpt":"Thompson sampling is one of the earliest randomized algorithms for multi-armed bandits (MAB). In this paper, we extend the Thompson sampling to Budgeted MAB, where there is random cost for pulling an arm and the total cost is constrained by a budget. We start with the case of Bernoulli bandits, in which the random rewards (costs) of an arm are independently sampled from a Bernoulli distribution. To implement the Thompson sampling algorithm in this case, at each round, we sample two numbers from the posterior distributions of the reward and cost for each arm, obtain their ratio, select the arm "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1505.00146","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2015-05-01T10:35:35Z","cross_cats_sorted":[],"title_canon_sha256":"da575a1efd446d0570b17b1265526c09ee1da855e2c24b605b24ad758e4b0ae1","abstract_canon_sha256":"bc7941a6e5f04a4d91183a3c9637e93e7e452dc15fae947f8e2209dd2d28c6cd"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T02:17:13.388216Z","signature_b64":"g+k6DU2Vg3j48Ta/h+U6XZZXhotEP3/ncwWOeLDLypnBvOd99EON/zWhauWHkb+lwWBNEiJPisBhKl9ortbrCw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"90f1803394824889be85a48aa05d001f148e5c4490963e9f68767ca76302c0c0","last_reissued_at":"2026-05-18T02:17:13.387442Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T02:17:13.387442Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Thompson Sampling for Budgeted Multi-armed Bandits","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Haifang Li, Nenghai Yu, Tao Qin, Tie-Yan Liu, Yingce Xia","submitted_at":"2015-05-01T10:35:35Z","abstract_excerpt":"Thompson sampling is one of the earliest randomized algorithms for multi-armed bandits (MAB). In this paper, we extend the Thompson sampling to Budgeted MAB, where there is random cost for pulling an arm and the total cost is constrained by a budget. We start with the case of Bernoulli bandits, in which the random rewards (costs) of an arm are independently sampled from a Bernoulli distribution. To implement the Thompson sampling algorithm in this case, at each round, we sample two numbers from the posterior distributions of the reward and cost for each arm, obtain their ratio, select the arm "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1505.00146","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1505.00146","created_at":"2026-05-18T02:17:13.387569+00:00"},{"alias_kind":"arxiv_version","alias_value":"1505.00146v1","created_at":"2026-05-18T02:17:13.387569+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1505.00146","created_at":"2026-05-18T02:17:13.387569+00:00"},{"alias_kind":"pith_short_12","alias_value":"SDYYAM4UQJEI","created_at":"2026-05-18T12:29:39.896362+00:00"},{"alias_kind":"pith_short_16","alias_value":"SDYYAM4UQJEITPUF","created_at":"2026-05-18T12:29:39.896362+00:00"},{"alias_kind":"pith_short_8","alias_value":"SDYYAM4U","created_at":"2026-05-18T12:29:39.896362+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":2,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"2508.13657","citing_title":"In-Context Decision Making for Optimizing Complex AutoML Pipelines","ref_index":56,"is_internal_anchor":true},{"citing_arxiv_id":"2604.19672","citing_title":"Budgeted Online Influence Maximization","ref_index":33,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/SDYYAM4UQJEITPUFUSFKAXIAD4","json":"https://pith.science/pith/SDYYAM4UQJEITPUFUSFKAXIAD4.json","graph_json":"https://pith.science/api/pith-number/SDYYAM4UQJEITPUFUSFKAXIAD4/graph.json","events_json":"https://pith.science/api/pith-number/SDYYAM4UQJEITPUFUSFKAXIAD4/events.json","paper":"https://pith.science/paper/SDYYAM4U"},"agent_actions":{"view_html":"https://pith.science/pith/SDYYAM4UQJEITPUFUSFKAXIAD4","download_json":"https://pith.science/pith/SDYYAM4UQJEITPUFUSFKAXIAD4.json","view_paper":"https://pith.science/paper/SDYYAM4U","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1505.00146&json=true","fetch_graph":"https://pith.science/api/pith-number/SDYYAM4UQJEITPUFUSFKAXIAD4/graph.json","fetch_events":"https://pith.science/api/pith-number/SDYYAM4UQJEITPUFUSFKAXIAD4/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/SDYYAM4UQJEITPUFUSFKAXIAD4/action/timestamp_anchor","attest_storage":"https://pith.science/pith/SDYYAM4UQJEITPUFUSFKAXIAD4/action/storage_attestation","attest_author":"https://pith.science/pith/SDYYAM4UQJEITPUFUSFKAXIAD4/action/author_attestation","sign_citation":"https://pith.science/pith/SDYYAM4UQJEITPUFUSFKAXIAD4/action/citation_signature","submit_replication":"https://pith.science/pith/SDYYAM4UQJEITPUFUSFKAXIAD4/action/replication_record"}},"created_at":"2026-05-18T02:17:13.387569+00:00","updated_at":"2026-05-18T02:17:13.387569+00:00"}