{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2010:WNWU57OLEMMF7YBUIPWCDFNMMZ","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"783df7fdbecfa0e76be3f1e799d0540d7c4cbecf5916746ea8025653a80a4d5b","cross_cats_sorted":["cs.LG","math.PR"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"math.OC","submitted_at":"2010-11-22T22:39:47Z","title_canon_sha256":"955149f1fff158e1407669b23015daf0200a36a450b5a71986c682e6f0c3a1e7"},"schema_version":"1.0","source":{"id":"1011.4969","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1011.4969","created_at":"2026-05-18T04:05:38Z"},{"alias_kind":"arxiv_version","alias_value":"1011.4969v2","created_at":"2026-05-18T04:05:38Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1011.4969","created_at":"2026-05-18T04:05:38Z"},{"alias_kind":"pith_short_12","alias_value":"WNWU57OLEMMF","created_at":"2026-05-18T12:26:17Z"},{"alias_kind":"pith_short_16","alias_value":"WNWU57OLEMMF7YBU","created_at":"2026-05-18T12:26:17Z"},{"alias_kind":"pith_short_8","alias_value":"WNWU57OL","created_at":"2026-05-18T12:26:17Z"}],"graph_snapshots":[{"event_id":"sha256:d439468b62fd6e5b82291171c18bee4036208dbf34302663ce9df6fcca728415","target":"graph","created_at":"2026-05-18T04:05:38Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We consider the restless multi-armed bandit (RMAB) problem with unknown dynamics in which a player chooses M out of N arms to play at each time. The reward state of each arm transits according to an unknown Markovian rule when it is played and evolves according to an arbitrary unknown random process when it is passive. The performance of an arm selection policy is measured by regret, defined as the reward loss with respect to the case where the player knows which M arms are the most rewarding and always plays the M best arms. We construct a policy with an interleaving exploration and exploitat","authors_text":"Haoyang Liu, Keqin Liu, Qing Zhao","cross_cats":["cs.LG","math.PR"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"math.OC","submitted_at":"2010-11-22T22:39:47Z","title":"Learning in A Changing World: Restless Multi-Armed Bandit with Unknown Dynamics"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1011.4969","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:1fd4ee060e54f5153471f443da1a0ea34e71088bbd71db3c638a3a2247df573a","target":"record","created_at":"2026-05-18T04:05:38Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"783df7fdbecfa0e76be3f1e799d0540d7c4cbecf5916746ea8025653a80a4d5b","cross_cats_sorted":["cs.LG","math.PR"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"math.OC","submitted_at":"2010-11-22T22:39:47Z","title_canon_sha256":"955149f1fff158e1407669b23015daf0200a36a450b5a71986c682e6f0c3a1e7"},"schema_version":"1.0","source":{"id":"1011.4969","kind":"arxiv","version":2}},"canonical_sha256":"b36d4efdcb23185fe03443ec2195ac6645efa6d09bfa665fdb0a7b8618c2a29c","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"b36d4efdcb23185fe03443ec2195ac6645efa6d09bfa665fdb0a7b8618c2a29c","first_computed_at":"2026-05-18T04:05:38.930662Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T04:05:38.930662Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"gnko4t2XDQdBqjjIilDvuhAEHV0qRcvHPJcfNOufMxdrCHbBJDAncTq2tMmhykci36FkhbN8FPbyVaX3IWD5Dg==","signature_status":"signed_v1","signed_at":"2026-05-18T04:05:38.931169Z","signed_message":"canonical_sha256_bytes"},"source_id":"1011.4969","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:1fd4ee060e54f5153471f443da1a0ea34e71088bbd71db3c638a3a2247df573a","sha256:d439468b62fd6e5b82291171c18bee4036208dbf34302663ce9df6fcca728415"],"state_sha256":"2bab8b05fb215ac3c64274111850bdb6f7f31024ffdb57515ba244c31f4b67fb"}