{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2023:3JWPUXRCHNCZ6EL6TORSCMSGAM","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"995bc66d02acf5a976abb8fbaee3f22c2aa691e319aa1b591a6791bdcb5a2090","cross_cats_sorted":["cs.AI","cs.HC"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2023-07-24T16:36:04Z","title_canon_sha256":"e2b243fff97332797215da0a9b28e0445a6f8934d3e659893dc2e373ae5ce5e2"},"schema_version":"1.0","source":{"id":"2307.12926","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2307.12926","created_at":"2026-07-05T06:34:00Z"},{"alias_kind":"arxiv_version","alias_value":"2307.12926v1","created_at":"2026-07-05T06:34:00Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2307.12926","created_at":"2026-07-05T06:34:00Z"},{"alias_kind":"pith_short_12","alias_value":"3JWPUXRCHNCZ","created_at":"2026-07-05T06:34:00Z"},{"alias_kind":"pith_short_16","alias_value":"3JWPUXRCHNCZ6EL6","created_at":"2026-07-05T06:34:00Z"},{"alias_kind":"pith_short_8","alias_value":"3JWPUXRC","created_at":"2026-07-05T06:34:00Z"}],"graph_snapshots":[{"event_id":"sha256:87050b2de5db136d61f7df3cc3ac94dfa48cd3155a66169cdd3ea330c2cb9778","target":"graph","created_at":"2026-07-05T06:34:00Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2307.12926/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"We consider the problem of contextual bandits and imitation learning, where the learner lacks direct knowledge of the executed action's reward. Instead, the learner can actively query an expert at each round to compare two actions and receive noisy preference feedback. The learner's objective is two-fold: to minimize the regret associated with the executed actions, while simultaneously, minimizing the number of comparison queries made to the expert. In this paper, we assume that the learner has access to a function class that can represent the expert's preference model under appropriate link f","authors_text":"Ayush Sekhari, Karthik Sridharan, Runzhe Wu, Wen Sun","cross_cats":["cs.AI","cs.HC"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2023-07-24T16:36:04Z","title":"Contextual Bandits and Imitation Learning via Preference-Based Active Queries"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2307.12926","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:20c8336ee7a8398ae3b3452d1e5661322a368b290ea85039e48a6810bb94a8dc","target":"record","created_at":"2026-07-05T06:34:00Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"995bc66d02acf5a976abb8fbaee3f22c2aa691e319aa1b591a6791bdcb5a2090","cross_cats_sorted":["cs.AI","cs.HC"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2023-07-24T16:36:04Z","title_canon_sha256":"e2b243fff97332797215da0a9b28e0445a6f8934d3e659893dc2e373ae5ce5e2"},"schema_version":"1.0","source":{"id":"2307.12926","kind":"arxiv","version":1}},"canonical_sha256":"da6cfa5e223b459f117e9ba32132460312fe5efbdde66ab576de1cc7fbc48ff4","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"da6cfa5e223b459f117e9ba32132460312fe5efbdde66ab576de1cc7fbc48ff4","first_computed_at":"2026-07-05T06:34:00.129385Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-07-05T06:34:00.129385Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"9qZmYxooQ84Q1tRBsOzppGAvglR6v0S96MO5Sn2fRq+CUiuQeX7LO06nQfTL5wxYRmI1Pt3sSc4pAdx9c7mIBg==","signature_status":"signed_v1","signed_at":"2026-07-05T06:34:00.129800Z","signed_message":"canonical_sha256_bytes"},"source_id":"2307.12926","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:20c8336ee7a8398ae3b3452d1e5661322a368b290ea85039e48a6810bb94a8dc","sha256:87050b2de5db136d61f7df3cc3ac94dfa48cd3155a66169cdd3ea330c2cb9778"],"state_sha256":"383ccc4c77ead889d799b867d089f3b0c6c565f292594d60e5cdf54db2a6d079"}