{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:UOKW5GGPRT3H6UKMQR6QMM4ZT7","short_pith_number":"pith:UOKW5GGP","canonical_record":{"source":{"id":"1805.08052","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-05-21T13:44:10Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"110631dc59382c218817c1d8a1e07936eb3bed7001507e9f001e94da50ff17d6","abstract_canon_sha256":"772d02ce65bbd8ab1e95a660b37fd2325556f4684ec8fd8a29fb35ad9fa04021"},"schema_version":"1.0"},"canonical_sha256":"a3956e98cf8cf67f514c847d0633999ff736a778f69cfc9b26144561cb060f70","source":{"kind":"arxiv","id":"1805.08052","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1805.08052","created_at":"2026-05-17T23:57:02Z"},{"alias_kind":"arxiv_version","alias_value":"1805.08052v2","created_at":"2026-05-17T23:57:02Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1805.08052","created_at":"2026-05-17T23:57:02Z"},{"alias_kind":"pith_short_12","alias_value":"UOKW5GGPRT3H","created_at":"2026-05-18T12:32:56Z"},{"alias_kind":"pith_short_16","alias_value":"UOKW5GGPRT3H6UKM","created_at":"2026-05-18T12:32:56Z"},{"alias_kind":"pith_short_8","alias_value":"UOKW5GGP","created_at":"2026-05-18T12:32:56Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:UOKW5GGPRT3H6UKMQR6QMM4ZT7","target":"record","payload":{"canonical_record":{"source":{"id":"1805.08052","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-05-21T13:44:10Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"110631dc59382c218817c1d8a1e07936eb3bed7001507e9f001e94da50ff17d6","abstract_canon_sha256":"772d02ce65bbd8ab1e95a660b37fd2325556f4684ec8fd8a29fb35ad9fa04021"},"schema_version":"1.0"},"canonical_sha256":"a3956e98cf8cf67f514c847d0633999ff736a778f69cfc9b26144561cb060f70","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:57:02.926598Z","signature_b64":"0WD1O3FIR9daii43BUft52YqYsEv1z19WvxBXB3daQ6W0ccHOL3QoDHGnbabEPRuQ8i8Ghkcu2z5TWzWWG59Cg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"a3956e98cf8cf67f514c847d0633999ff736a778f69cfc9b26144561cb060f70","last_reissued_at":"2026-05-17T23:57:02.925996Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:57:02.925996Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1805.08052","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:57:02Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"EbiJ8/8tuTMX4HWktFPw4UAREHpsULnA+jTnfKlsAiaYd9jma7yLFCMu52QvgrPyo2eQ1k+zSJLGFsUnltltBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T23:42:02.612246Z"},"content_sha256":"20144db9daf4e0eef932cd92937ccd4d2fc62a137e302e80cef06cb374f0bcb7","schema_version":"1.0","event_id":"sha256:20144db9daf4e0eef932cd92937ccd4d2fc62a137e302e80cef06cb374f0bcb7"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:UOKW5GGPRT3H6UKMQR6QMM4ZT7","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Online Learning in Kernelized Markov Decision Processes","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Aditya Gopalan, Sayak Ray Chowdhury","submitted_at":"2018-05-21T13:44:10Z","abstract_excerpt":"We consider online learning for minimizing regret in unknown, episodic Markov decision processes (MDPs) with continuous states and actions. We develop variants of the UCRL and posterior sampling algorithms that employ nonparametric Gaussian process priors to generalize across the state and action spaces. When the transition and reward functions of the true MDP are members of the associated Reproducing Kernel Hilbert Spaces of functions induced by symmetric psd kernels (frequentist setting), we show that the algorithms enjoy sublinear regret bounds. The bounds are in terms of explicit structura"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1805.08052","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:57:02Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"WTdm5EhyHZGGcffHSZyEqmkAvbZWEQtKxpPM8ktz40fSLiczJvyVjPujspcD2BR++bvoVPSbxAE/4QhE7iX5AQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T23:42:02.612984Z"},"content_sha256":"0f4136f36320726ecea5edf14735a2111bb7ca1fa1796e3a4a4b0e9e11c44001","schema_version":"1.0","event_id":"sha256:0f4136f36320726ecea5edf14735a2111bb7ca1fa1796e3a4a4b0e9e11c44001"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/UOKW5GGPRT3H6UKMQR6QMM4ZT7/bundle.json","state_url":"https://pith.science/pith/UOKW5GGPRT3H6UKMQR6QMM4ZT7/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/UOKW5GGPRT3H6UKMQR6QMM4ZT7/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-25T23:42:02Z","links":{"resolver":"https://pith.science/pith/UOKW5GGPRT3H6UKMQR6QMM4ZT7","bundle":"https://pith.science/pith/UOKW5GGPRT3H6UKMQR6QMM4ZT7/bundle.json","state":"https://pith.science/pith/UOKW5GGPRT3H6UKMQR6QMM4ZT7/state.json","well_known_bundle":"https://pith.science/.well-known/pith/UOKW5GGPRT3H6UKMQR6QMM4ZT7/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:UOKW5GGPRT3H6UKMQR6QMM4ZT7","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"772d02ce65bbd8ab1e95a660b37fd2325556f4684ec8fd8a29fb35ad9fa04021","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-05-21T13:44:10Z","title_canon_sha256":"110631dc59382c218817c1d8a1e07936eb3bed7001507e9f001e94da50ff17d6"},"schema_version":"1.0","source":{"id":"1805.08052","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1805.08052","created_at":"2026-05-17T23:57:02Z"},{"alias_kind":"arxiv_version","alias_value":"1805.08052v2","created_at":"2026-05-17T23:57:02Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1805.08052","created_at":"2026-05-17T23:57:02Z"},{"alias_kind":"pith_short_12","alias_value":"UOKW5GGPRT3H","created_at":"2026-05-18T12:32:56Z"},{"alias_kind":"pith_short_16","alias_value":"UOKW5GGPRT3H6UKM","created_at":"2026-05-18T12:32:56Z"},{"alias_kind":"pith_short_8","alias_value":"UOKW5GGP","created_at":"2026-05-18T12:32:56Z"}],"graph_snapshots":[{"event_id":"sha256:0f4136f36320726ecea5edf14735a2111bb7ca1fa1796e3a4a4b0e9e11c44001","target":"graph","created_at":"2026-05-17T23:57:02Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We consider online learning for minimizing regret in unknown, episodic Markov decision processes (MDPs) with continuous states and actions. We develop variants of the UCRL and posterior sampling algorithms that employ nonparametric Gaussian process priors to generalize across the state and action spaces. When the transition and reward functions of the true MDP are members of the associated Reproducing Kernel Hilbert Spaces of functions induced by symmetric psd kernels (frequentist setting), we show that the algorithms enjoy sublinear regret bounds. The bounds are in terms of explicit structura","authors_text":"Aditya Gopalan, Sayak Ray Chowdhury","cross_cats":["stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-05-21T13:44:10Z","title":"Online Learning in Kernelized Markov Decision Processes"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1805.08052","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:20144db9daf4e0eef932cd92937ccd4d2fc62a137e302e80cef06cb374f0bcb7","target":"record","created_at":"2026-05-17T23:57:02Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"772d02ce65bbd8ab1e95a660b37fd2325556f4684ec8fd8a29fb35ad9fa04021","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-05-21T13:44:10Z","title_canon_sha256":"110631dc59382c218817c1d8a1e07936eb3bed7001507e9f001e94da50ff17d6"},"schema_version":"1.0","source":{"id":"1805.08052","kind":"arxiv","version":2}},"canonical_sha256":"a3956e98cf8cf67f514c847d0633999ff736a778f69cfc9b26144561cb060f70","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"a3956e98cf8cf67f514c847d0633999ff736a778f69cfc9b26144561cb060f70","first_computed_at":"2026-05-17T23:57:02.925996Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:57:02.925996Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"0WD1O3FIR9daii43BUft52YqYsEv1z19WvxBXB3daQ6W0ccHOL3QoDHGnbabEPRuQ8i8Ghkcu2z5TWzWWG59Cg==","signature_status":"signed_v1","signed_at":"2026-05-17T23:57:02.926598Z","signed_message":"canonical_sha256_bytes"},"source_id":"1805.08052","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:20144db9daf4e0eef932cd92937ccd4d2fc62a137e302e80cef06cb374f0bcb7","sha256:0f4136f36320726ecea5edf14735a2111bb7ca1fa1796e3a4a4b0e9e11c44001"],"state_sha256":"eb7d575b761ca07aff40fa69947dd3149648a31e56ae407547b6e323521cb6d4"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"wpXZCJeXvInYAQW6AScGk6EOpSNVHrji7CVpZp6XSI/7l/4VN2wiGP9MiNMqeW1uQlB/XieCKkKN/nh4cEIXAA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-25T23:42:02.616894Z","bundle_sha256":"be960b3949b9b4b67d17a4f21dd8deba656f939ace44200765ec2228ceb4a15b"}}