{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2015:WCSQQ5AB53THKBSYMGDKHOU5PB","short_pith_number":"pith:WCSQQ5AB","schema_version":"1.0","canonical_sha256":"b0a5087401eee67506586186a3ba9d78768671472db37c5b29292c3eef91fd00","source":{"kind":"arxiv","id":"1502.02362","version":2},"attestation_state":"computed","paper":{"title":"Counterfactual Risk Minimization: Learning from Logged Bandit Feedback","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Adith Swaminathan, Thorsten Joachims","submitted_at":"2015-02-09T05:09:25Z","abstract_excerpt":"We develop a learning principle and an efficient algorithm for batch learning from logged bandit feedback. This learning setting is ubiquitous in online systems (e.g., ad placement, web search, recommendation), where an algorithm makes a prediction (e.g., ad ranking) for a given input (e.g., query) and observes bandit feedback (e.g., user clicks on presented ads). We first address the counterfactual nature of the learning problem through propensity scoring. Next, we prove generalization error bounds that account for the variance of the propensity-weighted empirical risk estimator. These constr"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1502.02362","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2015-02-09T05:09:25Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"8a9739de27cf2da5a3028bc655b9bf93c9324de670e28cc2f102991ffc3032bd","abstract_canon_sha256":"611913bb6505bcf136cc3ce5c217d47a2e653181bbd5997140a4708206bd39f7"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T02:03:54.906525Z","signature_b64":"o4/wDHiUAYINatWCBR5tFWxyyuxeOyeU8E4cjGPdJWUuPDmUN/TJNsxG+eZxhbnAs5g895b7tnu7CtWYrnrNDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"b0a5087401eee67506586186a3ba9d78768671472db37c5b29292c3eef91fd00","last_reissued_at":"2026-05-18T02:03:54.905892Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T02:03:54.905892Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Counterfactual Risk Minimization: Learning from Logged Bandit Feedback","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Adith Swaminathan, Thorsten Joachims","submitted_at":"2015-02-09T05:09:25Z","abstract_excerpt":"We develop a learning principle and an efficient algorithm for batch learning from logged bandit feedback. This learning setting is ubiquitous in online systems (e.g., ad placement, web search, recommendation), where an algorithm makes a prediction (e.g., ad ranking) for a given input (e.g., query) and observes bandit feedback (e.g., user clicks on presented ads). We first address the counterfactual nature of the learning problem through propensity scoring. Next, we prove generalization error bounds that account for the variance of the propensity-weighted empirical risk estimator. These constr"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1502.02362","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1502.02362","created_at":"2026-05-18T02:03:54.905994+00:00"},{"alias_kind":"arxiv_version","alias_value":"1502.02362v2","created_at":"2026-05-18T02:03:54.905994+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1502.02362","created_at":"2026-05-18T02:03:54.905994+00:00"},{"alias_kind":"pith_short_12","alias_value":"WCSQQ5AB53TH","created_at":"2026-05-18T12:29:47.479230+00:00"},{"alias_kind":"pith_short_16","alias_value":"WCSQQ5AB53THKBSY","created_at":"2026-05-18T12:29:47.479230+00:00"},{"alias_kind":"pith_short_8","alias_value":"WCSQQ5AB","created_at":"2026-05-18T12:29:47.479230+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":0,"sample":[{"citing_arxiv_id":"1606.06565","citing_title":"Concrete Problems in AI Safety","ref_index":152,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/WCSQQ5AB53THKBSYMGDKHOU5PB","json":"https://pith.science/pith/WCSQQ5AB53THKBSYMGDKHOU5PB.json","graph_json":"https://pith.science/api/pith-number/WCSQQ5AB53THKBSYMGDKHOU5PB/graph.json","events_json":"https://pith.science/api/pith-number/WCSQQ5AB53THKBSYMGDKHOU5PB/events.json","paper":"https://pith.science/paper/WCSQQ5AB"},"agent_actions":{"view_html":"https://pith.science/pith/WCSQQ5AB53THKBSYMGDKHOU5PB","download_json":"https://pith.science/pith/WCSQQ5AB53THKBSYMGDKHOU5PB.json","view_paper":"https://pith.science/paper/WCSQQ5AB","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1502.02362&json=true","fetch_graph":"https://pith.science/api/pith-number/WCSQQ5AB53THKBSYMGDKHOU5PB/graph.json","fetch_events":"https://pith.science/api/pith-number/WCSQQ5AB53THKBSYMGDKHOU5PB/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/WCSQQ5AB53THKBSYMGDKHOU5PB/action/timestamp_anchor","attest_storage":"https://pith.science/pith/WCSQQ5AB53THKBSYMGDKHOU5PB/action/storage_attestation","attest_author":"https://pith.science/pith/WCSQQ5AB53THKBSYMGDKHOU5PB/action/author_attestation","sign_citation":"https://pith.science/pith/WCSQQ5AB53THKBSYMGDKHOU5PB/action/citation_signature","submit_replication":"https://pith.science/pith/WCSQQ5AB53THKBSYMGDKHOU5PB/action/replication_record"}},"created_at":"2026-05-18T02:03:54.905994+00:00","updated_at":"2026-05-18T02:03:54.905994+00:00"}