{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:45BBJE7VYARV7UFUNGI3LNZV2P","short_pith_number":"pith:45BBJE7V","canonical_record":{"source":{"id":"1806.00892","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2018-06-03T23:20:29Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"515e8c0eecc293ebd77737b59bfca12c5f45ac855b720b09b1a3351aaa76d882","abstract_canon_sha256":"d706ba2cd64cca8348f45831cd8663f9f823a7b4d231c850a0b93a0e47f9c113"},"schema_version":"1.0"},"canonical_sha256":"e7421493f5c0235fd0b46991b5b735d3c66c9d2866004dda45ada2ccc4fca073","source":{"kind":"arxiv","id":"1806.00892","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1806.00892","created_at":"2026-05-18T00:14:18Z"},{"alias_kind":"arxiv_version","alias_value":"1806.00892v1","created_at":"2026-05-18T00:14:18Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1806.00892","created_at":"2026-05-18T00:14:18Z"},{"alias_kind":"pith_short_12","alias_value":"45BBJE7VYARV","created_at":"2026-05-18T12:32:05Z"},{"alias_kind":"pith_short_16","alias_value":"45BBJE7VYARV7UFU","created_at":"2026-05-18T12:32:05Z"},{"alias_kind":"pith_short_8","alias_value":"45BBJE7V","created_at":"2026-05-18T12:32:05Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:45BBJE7VYARV7UFUNGI3LNZV2P","target":"record","payload":{"canonical_record":{"source":{"id":"1806.00892","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2018-06-03T23:20:29Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"515e8c0eecc293ebd77737b59bfca12c5f45ac855b720b09b1a3351aaa76d882","abstract_canon_sha256":"d706ba2cd64cca8348f45831cd8663f9f823a7b4d231c850a0b93a0e47f9c113"},"schema_version":"1.0"},"canonical_sha256":"e7421493f5c0235fd0b46991b5b735d3c66c9d2866004dda45ada2ccc4fca073","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:14:18.103457Z","signature_b64":"24US3p94zPcFqOhwVPSQe/xFOoS5RtCtulgpreNe9/SLQuomjMHfGe9pAjeX6jwiHpQBe6vNukD4hlixlw5MCw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"e7421493f5c0235fd0b46991b5b735d3c66c9d2866004dda45ada2ccc4fca073","last_reissued_at":"2026-05-18T00:14:18.102987Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:14:18.102987Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1806.00892","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:14:18Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"8g5SDI1lvncmS/YF4918uy0KjAb/vjLofF8IW1ww13oSRm3QZDxPLkC2qdY99lHGipwKG872AOpsdl/bIM5mCw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-05T11:14:42.923472Z"},"content_sha256":"b2b7bd57ee680828b0d3f636c2cbc2d5ae22380af00dbae3ae4bc2034f50bce6","schema_version":"1.0","event_id":"sha256:b2b7bd57ee680828b0d3f636c2cbc2d5ae22380af00dbae3ae4bc2034f50bce6"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:45BBJE7VYARV7UFUNGI3LNZV2P","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Conservative Exploration using Interleaving","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"stat.ML","authors_text":"Branislav Kveton, Sumeet Katariya, Vamsi K. Potluru, Zheng Wen","submitted_at":"2018-06-03T23:20:29Z","abstract_excerpt":"In many practical problems, a learning agent may want to learn the best action in hindsight without ever taking a bad action, which is significantly worse than the default production action. In general, this is impossible because the agent has to explore unknown actions, some of which can be bad, to learn better actions. However, when the actions are combinatorial, this may be possible if the unknown action can be evaluated by interleaving it with the production action. We formalize this concept as learning in stochastic combinatorial semi-bandits with exchangeable actions. We design efficient"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1806.00892","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:14:18Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"dlGJrhJZeynm423o4CYIVVcVLAaGse8AyZkBreYNW4e+mEYP738bbecwxIAeWfNgRx6riXXHA7ZxWr7wEl2sAQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-05T11:14:42.923820Z"},"content_sha256":"a1bd3feac5bfc522385557e31f92815a230e7a9d21aaaf27318a345288302b87","schema_version":"1.0","event_id":"sha256:a1bd3feac5bfc522385557e31f92815a230e7a9d21aaaf27318a345288302b87"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/45BBJE7VYARV7UFUNGI3LNZV2P/bundle.json","state_url":"https://pith.science/pith/45BBJE7VYARV7UFUNGI3LNZV2P/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/45BBJE7VYARV7UFUNGI3LNZV2P/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-05T11:14:42Z","links":{"resolver":"https://pith.science/pith/45BBJE7VYARV7UFUNGI3LNZV2P","bundle":"https://pith.science/pith/45BBJE7VYARV7UFUNGI3LNZV2P/bundle.json","state":"https://pith.science/pith/45BBJE7VYARV7UFUNGI3LNZV2P/state.json","well_known_bundle":"https://pith.science/.well-known/pith/45BBJE7VYARV7UFUNGI3LNZV2P/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:45BBJE7VYARV7UFUNGI3LNZV2P","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"d706ba2cd64cca8348f45831cd8663f9f823a7b4d231c850a0b93a0e47f9c113","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2018-06-03T23:20:29Z","title_canon_sha256":"515e8c0eecc293ebd77737b59bfca12c5f45ac855b720b09b1a3351aaa76d882"},"schema_version":"1.0","source":{"id":"1806.00892","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1806.00892","created_at":"2026-05-18T00:14:18Z"},{"alias_kind":"arxiv_version","alias_value":"1806.00892v1","created_at":"2026-05-18T00:14:18Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1806.00892","created_at":"2026-05-18T00:14:18Z"},{"alias_kind":"pith_short_12","alias_value":"45BBJE7VYARV","created_at":"2026-05-18T12:32:05Z"},{"alias_kind":"pith_short_16","alias_value":"45BBJE7VYARV7UFU","created_at":"2026-05-18T12:32:05Z"},{"alias_kind":"pith_short_8","alias_value":"45BBJE7V","created_at":"2026-05-18T12:32:05Z"}],"graph_snapshots":[{"event_id":"sha256:a1bd3feac5bfc522385557e31f92815a230e7a9d21aaaf27318a345288302b87","target":"graph","created_at":"2026-05-18T00:14:18Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"In many practical problems, a learning agent may want to learn the best action in hindsight without ever taking a bad action, which is significantly worse than the default production action. In general, this is impossible because the agent has to explore unknown actions, some of which can be bad, to learn better actions. However, when the actions are combinatorial, this may be possible if the unknown action can be evaluated by interleaving it with the production action. We formalize this concept as learning in stochastic combinatorial semi-bandits with exchangeable actions. We design efficient","authors_text":"Branislav Kveton, Sumeet Katariya, Vamsi K. Potluru, Zheng Wen","cross_cats":["cs.LG"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2018-06-03T23:20:29Z","title":"Conservative Exploration using Interleaving"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1806.00892","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:b2b7bd57ee680828b0d3f636c2cbc2d5ae22380af00dbae3ae4bc2034f50bce6","target":"record","created_at":"2026-05-18T00:14:18Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"d706ba2cd64cca8348f45831cd8663f9f823a7b4d231c850a0b93a0e47f9c113","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2018-06-03T23:20:29Z","title_canon_sha256":"515e8c0eecc293ebd77737b59bfca12c5f45ac855b720b09b1a3351aaa76d882"},"schema_version":"1.0","source":{"id":"1806.00892","kind":"arxiv","version":1}},"canonical_sha256":"e7421493f5c0235fd0b46991b5b735d3c66c9d2866004dda45ada2ccc4fca073","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"e7421493f5c0235fd0b46991b5b735d3c66c9d2866004dda45ada2ccc4fca073","first_computed_at":"2026-05-18T00:14:18.102987Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:14:18.102987Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"24US3p94zPcFqOhwVPSQe/xFOoS5RtCtulgpreNe9/SLQuomjMHfGe9pAjeX6jwiHpQBe6vNukD4hlixlw5MCw==","signature_status":"signed_v1","signed_at":"2026-05-18T00:14:18.103457Z","signed_message":"canonical_sha256_bytes"},"source_id":"1806.00892","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:b2b7bd57ee680828b0d3f636c2cbc2d5ae22380af00dbae3ae4bc2034f50bce6","sha256:a1bd3feac5bfc522385557e31f92815a230e7a9d21aaaf27318a345288302b87"],"state_sha256":"8c5385c647b5b0b6577ead642dc58c9c1231470abc635326dbafe54df4d7662d"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"WEIhoVBwUFxlmBZDdU8QIdUjr4nKf/6DKY6e24d30/yHaD9ysVy6QKQ0zNvH8ohdFBz89RPYWBmk8M6vY/bGCA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-05T11:14:42.925849Z","bundle_sha256":"bb82132d4115c4263f7a909b559cd80feca675adf06933a175f3b5b5cc3dabe6"}}