{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:AS5ISBTRBRYIJA534JOOB23JRR","short_pith_number":"pith:AS5ISBTR","canonical_record":{"source":{"id":"1902.02778","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-02-07T18:59:16Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"8bec0318806f3e445e7c3310277fac59c7eafe1595b9fcbd037b396be744876f","abstract_canon_sha256":"29e077f0e618a47c62daa98ede61f82ab544c5613e012d2cb11756b6592e2611"},"schema_version":"1.0"},"canonical_sha256":"04ba8906710c708483bbe25ce0eb698c574773e11765c48f6ae4d42ff53b8d71","source":{"kind":"arxiv","id":"1902.02778","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1902.02778","created_at":"2026-05-17T23:54:32Z"},{"alias_kind":"arxiv_version","alias_value":"1902.02778v1","created_at":"2026-05-17T23:54:32Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1902.02778","created_at":"2026-05-17T23:54:32Z"},{"alias_kind":"pith_short_12","alias_value":"AS5ISBTRBRYI","created_at":"2026-05-18T12:33:12Z"},{"alias_kind":"pith_short_16","alias_value":"AS5ISBTRBRYIJA53","created_at":"2026-05-18T12:33:12Z"},{"alias_kind":"pith_short_8","alias_value":"AS5ISBTR","created_at":"2026-05-18T12:33:12Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:AS5ISBTRBRYIJA534JOOB23JRR","target":"record","payload":{"canonical_record":{"source":{"id":"1902.02778","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-02-07T18:59:16Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"8bec0318806f3e445e7c3310277fac59c7eafe1595b9fcbd037b396be744876f","abstract_canon_sha256":"29e077f0e618a47c62daa98ede61f82ab544c5613e012d2cb11756b6592e2611"},"schema_version":"1.0"},"canonical_sha256":"04ba8906710c708483bbe25ce0eb698c574773e11765c48f6ae4d42ff53b8d71","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:54:32.223877Z","signature_b64":"E8Ti7Q9bgrq+80c/zJJ7ERM8oehFhFyuGhnrap+KsCbOPClrMTED2kkC64m/C4YfRpmJX50sf3Bxclsc0k3/BA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"04ba8906710c708483bbe25ce0eb698c574773e11765c48f6ae4d42ff53b8d71","last_reissued_at":"2026-05-17T23:54:32.223278Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:54:32.223278Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1902.02778","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:54:32Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"YbIAc0swFtx2OVu/3Z/jh3fTfzRXPdIrfLXUUgkN5dAozXZWK1l+TL3qsvzPIZVoUQIyhjlXJbrR1uQoqru7BA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-04T03:02:01.807729Z"},"content_sha256":"d195cc2ba9a00f842603932caa7ff8e178d458319f0a68a560b6f637d9491c77","schema_version":"1.0","event_id":"sha256:d195cc2ba9a00f842603932caa7ff8e178d458319f0a68a560b6f637d9491c77"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:AS5ISBTRBRYIJA534JOOB23JRR","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"KLUCB Approach to Copeland Bandits","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Nischal Agrawal, Prasanna Chaporkar","submitted_at":"2019-02-07T18:59:16Z","abstract_excerpt":"Multi-armed bandit(MAB) problem is a reinforcement learning framework where an agent tries to maximise her profit by proper selection of actions through absolute feedback for each action. The dueling bandits problem is a variation of MAB problem in which an agent chooses a pair of actions and receives relative feedback for the chosen action pair. The dueling bandits problem is well suited for modelling a setting in which it is not possible to provide quantitative feedback for each action, but qualitative feedback for each action is preferred as in the case of human feedback. The dueling bandit"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1902.02778","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:54:32Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"TxF7TBNuQWvHtvNq1z0SI+2yksB5KmnvMJJl1fsH31CpxmHEd0qbSfjx8+MdH3M6cAlP13pbs+Yl8tw9kE6sAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-04T03:02:01.808380Z"},"content_sha256":"b0b791ac32ff80114856708c4585a517f23e4d4440691bc2c82b8191c658b489","schema_version":"1.0","event_id":"sha256:b0b791ac32ff80114856708c4585a517f23e4d4440691bc2c82b8191c658b489"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/AS5ISBTRBRYIJA534JOOB23JRR/bundle.json","state_url":"https://pith.science/pith/AS5ISBTRBRYIJA534JOOB23JRR/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/AS5ISBTRBRYIJA534JOOB23JRR/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-04T03:02:01Z","links":{"resolver":"https://pith.science/pith/AS5ISBTRBRYIJA534JOOB23JRR","bundle":"https://pith.science/pith/AS5ISBTRBRYIJA534JOOB23JRR/bundle.json","state":"https://pith.science/pith/AS5ISBTRBRYIJA534JOOB23JRR/state.json","well_known_bundle":"https://pith.science/.well-known/pith/AS5ISBTRBRYIJA534JOOB23JRR/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:AS5ISBTRBRYIJA534JOOB23JRR","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"29e077f0e618a47c62daa98ede61f82ab544c5613e012d2cb11756b6592e2611","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-02-07T18:59:16Z","title_canon_sha256":"8bec0318806f3e445e7c3310277fac59c7eafe1595b9fcbd037b396be744876f"},"schema_version":"1.0","source":{"id":"1902.02778","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1902.02778","created_at":"2026-05-17T23:54:32Z"},{"alias_kind":"arxiv_version","alias_value":"1902.02778v1","created_at":"2026-05-17T23:54:32Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1902.02778","created_at":"2026-05-17T23:54:32Z"},{"alias_kind":"pith_short_12","alias_value":"AS5ISBTRBRYI","created_at":"2026-05-18T12:33:12Z"},{"alias_kind":"pith_short_16","alias_value":"AS5ISBTRBRYIJA53","created_at":"2026-05-18T12:33:12Z"},{"alias_kind":"pith_short_8","alias_value":"AS5ISBTR","created_at":"2026-05-18T12:33:12Z"}],"graph_snapshots":[{"event_id":"sha256:b0b791ac32ff80114856708c4585a517f23e4d4440691bc2c82b8191c658b489","target":"graph","created_at":"2026-05-17T23:54:32Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Multi-armed bandit(MAB) problem is a reinforcement learning framework where an agent tries to maximise her profit by proper selection of actions through absolute feedback for each action. The dueling bandits problem is a variation of MAB problem in which an agent chooses a pair of actions and receives relative feedback for the chosen action pair. The dueling bandits problem is well suited for modelling a setting in which it is not possible to provide quantitative feedback for each action, but qualitative feedback for each action is preferred as in the case of human feedback. The dueling bandit","authors_text":"Nischal Agrawal, Prasanna Chaporkar","cross_cats":["stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-02-07T18:59:16Z","title":"KLUCB Approach to Copeland Bandits"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1902.02778","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:d195cc2ba9a00f842603932caa7ff8e178d458319f0a68a560b6f637d9491c77","target":"record","created_at":"2026-05-17T23:54:32Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"29e077f0e618a47c62daa98ede61f82ab544c5613e012d2cb11756b6592e2611","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-02-07T18:59:16Z","title_canon_sha256":"8bec0318806f3e445e7c3310277fac59c7eafe1595b9fcbd037b396be744876f"},"schema_version":"1.0","source":{"id":"1902.02778","kind":"arxiv","version":1}},"canonical_sha256":"04ba8906710c708483bbe25ce0eb698c574773e11765c48f6ae4d42ff53b8d71","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"04ba8906710c708483bbe25ce0eb698c574773e11765c48f6ae4d42ff53b8d71","first_computed_at":"2026-05-17T23:54:32.223278Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:54:32.223278Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"E8Ti7Q9bgrq+80c/zJJ7ERM8oehFhFyuGhnrap+KsCbOPClrMTED2kkC64m/C4YfRpmJX50sf3Bxclsc0k3/BA==","signature_status":"signed_v1","signed_at":"2026-05-17T23:54:32.223877Z","signed_message":"canonical_sha256_bytes"},"source_id":"1902.02778","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:d195cc2ba9a00f842603932caa7ff8e178d458319f0a68a560b6f637d9491c77","sha256:b0b791ac32ff80114856708c4585a517f23e4d4440691bc2c82b8191c658b489"],"state_sha256":"b35891e0f042c390fd4656ae078353712aaedb697db6b8f2d560d648a02f80d9"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"2GXSA2CRszyFhrk0Wh8R9ILlYybfT/zC7ZjCbJ7i4Qum92G9+FoBCFSfBI0I7rnsjPKJkO7fx+B/eC3otiEsCA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-04T03:02:01.811564Z","bundle_sha256":"4982f970b3d85bf4a9269b0e54e76ca86575ab67e016166c0a51a3393469b74a"}}