{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:WJWSWF5MMJZTN72HMBLGBNSKRA","short_pith_number":"pith:WJWSWF5M","schema_version":"1.0","canonical_sha256":"b26d2b17ac627336ff47605660b64a880ff6f4a95dbed2f1289bf1f932ce908c","source":{"kind":"arxiv","id":"1802.09127","version":1},"attestation_state":"computed","paper":{"title":"Deep Bayesian Bandits Showdown: An Empirical Comparison of Bayesian Deep Networks for Thompson Sampling","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"stat.ML","authors_text":"Carlos Riquelme, George Tucker, Jasper Snoek","submitted_at":"2018-02-26T02:04:57Z","abstract_excerpt":"Recent advances in deep reinforcement learning have made significant strides in performance on applications such as Go and Atari games. However, developing practical methods to balance exploration and exploitation in complex domains remains largely unsolved. Thompson Sampling and its extension to reinforcement learning provide an elegant approach to exploration that only requires access to posterior samples of the model. At the same time, advances in approximate Bayesian methods have made posterior approximation for flexible neural network models practical. Thus, it is attractive to consider a"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1802.09127","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2018-02-26T02:04:57Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"9405e7320da7ad5d43df69ab24a31abcccd22e147a0c6f5ab14667d903340e19","abstract_canon_sha256":"c35508a5a28fc31e5344039b853bcaceb7a5884e2cc8d377ee595125c3d65e3a"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:22:34.694652Z","signature_b64":"Epu/dHa4cAtz9RnJtvoko3GWj3t74GcWYJVurOQoCQxqezQ3vt5AnMNqkli2N135ALxlpq1y1LH/NXgV99TfDA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"b26d2b17ac627336ff47605660b64a880ff6f4a95dbed2f1289bf1f932ce908c","last_reissued_at":"2026-05-18T00:22:34.693991Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:22:34.693991Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Deep Bayesian Bandits Showdown: An Empirical Comparison of Bayesian Deep Networks for Thompson Sampling","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"stat.ML","authors_text":"Carlos Riquelme, George Tucker, Jasper Snoek","submitted_at":"2018-02-26T02:04:57Z","abstract_excerpt":"Recent advances in deep reinforcement learning have made significant strides in performance on applications such as Go and Atari games. However, developing practical methods to balance exploration and exploitation in complex domains remains largely unsolved. Thompson Sampling and its extension to reinforcement learning provide an elegant approach to exploration that only requires access to posterior samples of the model. At the same time, advances in approximate Bayesian methods have made posterior approximation for flexible neural network models practical. Thus, it is attractive to consider a"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1802.09127","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1802.09127","created_at":"2026-05-18T00:22:34.694081+00:00"},{"alias_kind":"arxiv_version","alias_value":"1802.09127v1","created_at":"2026-05-18T00:22:34.694081+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1802.09127","created_at":"2026-05-18T00:22:34.694081+00:00"},{"alias_kind":"pith_short_12","alias_value":"WJWSWF5MMJZT","created_at":"2026-05-18T12:32:59.047623+00:00"},{"alias_kind":"pith_short_16","alias_value":"WJWSWF5MMJZTN72H","created_at":"2026-05-18T12:32:59.047623+00:00"},{"alias_kind":"pith_short_8","alias_value":"WJWSWF5M","created_at":"2026-05-18T12:32:59.047623+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":5,"internal_anchor_count":3,"sample":[{"citing_arxiv_id":"1906.11537","citing_title":"'In-Between' Uncertainty in Bayesian Neural Networks","ref_index":21,"is_internal_anchor":true},{"citing_arxiv_id":"2305.03784","citing_title":"Neural Exploitation and Exploration of Contextual Bandits","ref_index":7,"is_internal_anchor":true},{"citing_arxiv_id":"2503.13113","citing_title":"Exploring the Potential of Bilevel Optimization for Calibrating Neural Networks","ref_index":18,"is_internal_anchor":true},{"citing_arxiv_id":"2604.03404","citing_title":"Diffusion Policy with Bayesian Expert Selection for Active Multi-Target Tracking","ref_index":45,"is_internal_anchor":false},{"citing_arxiv_id":"2605.10784","citing_title":"MASS-DPO: Multi-negative Active Sample Selection for Direct Policy Optimization","ref_index":51,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/WJWSWF5MMJZTN72HMBLGBNSKRA","json":"https://pith.science/pith/WJWSWF5MMJZTN72HMBLGBNSKRA.json","graph_json":"https://pith.science/api/pith-number/WJWSWF5MMJZTN72HMBLGBNSKRA/graph.json","events_json":"https://pith.science/api/pith-number/WJWSWF5MMJZTN72HMBLGBNSKRA/events.json","paper":"https://pith.science/paper/WJWSWF5M"},"agent_actions":{"view_html":"https://pith.science/pith/WJWSWF5MMJZTN72HMBLGBNSKRA","download_json":"https://pith.science/pith/WJWSWF5MMJZTN72HMBLGBNSKRA.json","view_paper":"https://pith.science/paper/WJWSWF5M","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1802.09127&json=true","fetch_graph":"https://pith.science/api/pith-number/WJWSWF5MMJZTN72HMBLGBNSKRA/graph.json","fetch_events":"https://pith.science/api/pith-number/WJWSWF5MMJZTN72HMBLGBNSKRA/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/WJWSWF5MMJZTN72HMBLGBNSKRA/action/timestamp_anchor","attest_storage":"https://pith.science/pith/WJWSWF5MMJZTN72HMBLGBNSKRA/action/storage_attestation","attest_author":"https://pith.science/pith/WJWSWF5MMJZTN72HMBLGBNSKRA/action/author_attestation","sign_citation":"https://pith.science/pith/WJWSWF5MMJZTN72HMBLGBNSKRA/action/citation_signature","submit_replication":"https://pith.science/pith/WJWSWF5MMJZTN72HMBLGBNSKRA/action/replication_record"}},"created_at":"2026-05-18T00:22:34.694081+00:00","updated_at":"2026-05-18T00:22:34.694081+00:00"}