{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:XSQP44HVJIPECV22SZYVIVDH4U","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"f8add97b79f599f11b93a576ceed0204c4f30074f9e5928e50ed9185bc024afc","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-03-11T18:44:50Z","title_canon_sha256":"6438c37fb396d65e53c1434987d26fa9f501eb851b5bc39897cfbb22c893c064"},"schema_version":"1.0","source":{"id":"1803.04008","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1803.04008","created_at":"2026-05-17T23:52:18Z"},{"alias_kind":"arxiv_version","alias_value":"1803.04008v2","created_at":"2026-05-17T23:52:18Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1803.04008","created_at":"2026-05-17T23:52:18Z"},{"alias_kind":"pith_short_12","alias_value":"XSQP44HVJIPE","created_at":"2026-05-18T12:33:01Z"},{"alias_kind":"pith_short_16","alias_value":"XSQP44HVJIPECV22","created_at":"2026-05-18T12:33:01Z"},{"alias_kind":"pith_short_8","alias_value":"XSQP44HV","created_at":"2026-05-18T12:33:01Z"}],"graph_snapshots":[{"event_id":"sha256:12ffbbbe78e124d6e32be825a55a6ffca0c8a6fdb5bcf659b4391c936f0a1d7b","target":"graph","created_at":"2026-05-17T23:52:18Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We study a multi-armed bandit problem in a dynamic environment where arm rewards evolve in a correlated fashion according to a Markov chain. Different than much of the work on related problems, in our formulation a learning algorithm does not have access to either a priori information or observations of the state of the Markov chain and only observes smoothed reward feedback following time intervals we refer to as epochs. We demonstrate that existing methods such as UCB and $\\varepsilon$-greedy can suffer linear regret in such an environment. Employing mixing-time bounds on Markov chains, we d","authors_text":"Lillian J. Ratliff, Shreyas Sekar, Tanner Fiez","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-03-11T18:44:50Z","title":"Multi-Armed Bandits for Correlated Markovian Environments with Smoothed Reward Feedback"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1803.04008","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:7be36b4ce1fae4cd8ce961c04408e2f461a8ad0aceb094605583ccb6a7aaa58e","target":"record","created_at":"2026-05-17T23:52:18Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"f8add97b79f599f11b93a576ceed0204c4f30074f9e5928e50ed9185bc024afc","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-03-11T18:44:50Z","title_canon_sha256":"6438c37fb396d65e53c1434987d26fa9f501eb851b5bc39897cfbb22c893c064"},"schema_version":"1.0","source":{"id":"1803.04008","kind":"arxiv","version":2}},"canonical_sha256":"bca0fe70f54a1e41575a9671545467e52503fbe106be2767b5677c5f970d255e","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"bca0fe70f54a1e41575a9671545467e52503fbe106be2767b5677c5f970d255e","first_computed_at":"2026-05-17T23:52:18.501916Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:52:18.501916Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"Kn43TZvRG5Tkp9Q6QlSqK9QcrvlzRBjZK3zrmPoaMB6CMUP3p/Xg6tPNATzghqexkSh9Skg/HbLnueGYnDLfDQ==","signature_status":"signed_v1","signed_at":"2026-05-17T23:52:18.502584Z","signed_message":"canonical_sha256_bytes"},"source_id":"1803.04008","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:7be36b4ce1fae4cd8ce961c04408e2f461a8ad0aceb094605583ccb6a7aaa58e","sha256:12ffbbbe78e124d6e32be825a55a6ffca0c8a6fdb5bcf659b4391c936f0a1d7b"],"state_sha256":"6f400d18352a59f30f0c32f02031aaa29506057c0270da54315032bb7c394e28"}