{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:OQLCKNVRR2E743BNGMBKYYQPBQ","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"b443fc7fcc02a3491f40dcf3710a2f02ba56467abf166eb3803547675b65bd7b","cross_cats_sorted":["cs.LG","cs.SY"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"eess.SY","submitted_at":"2019-06-21T09:57:50Z","title_canon_sha256":"8101a090bdd4f7899ad97e6a97e188d72f4d670dfa78c235ffe6624f940e9595"},"schema_version":"1.0","source":{"id":"1906.09035","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1906.09035","created_at":"2026-05-17T23:42:44Z"},{"alias_kind":"arxiv_version","alias_value":"1906.09035v1","created_at":"2026-05-17T23:42:44Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1906.09035","created_at":"2026-05-17T23:42:44Z"},{"alias_kind":"pith_short_12","alias_value":"OQLCKNVRR2E7","created_at":"2026-05-18T12:33:24Z"},{"alias_kind":"pith_short_16","alias_value":"OQLCKNVRR2E743BN","created_at":"2026-05-18T12:33:24Z"},{"alias_kind":"pith_short_8","alias_value":"OQLCKNVR","created_at":"2026-05-18T12:33:24Z"}],"graph_snapshots":[{"event_id":"sha256:078e1527e607a6452ea59f83b9ae0a50688bf2a34ba1c7a492b5eea80056b9cd","target":"graph","created_at":"2026-05-17T23:42:44Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Stochastic control with both inherent random system noise and lack of knowledge on system parameters constitutes the core and fundamental topic in reinforcement learning (RL), especially under non-episodic situations where online learning is much more demanding. This challenge has been notably addressed in Bayesian RL recently where some approximation techniques have been developed to find suboptimal policies. While existing approaches mainly focus on approximating the value function, or on involving Thompson sampling, we propose a novel two-layer solution scheme in this paper to approximate t","authors_text":"Daniel Zhuoyu Long, Duan Li, Xin Huang","cross_cats":["cs.LG","cs.SY"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"eess.SY","submitted_at":"2019-06-21T09:57:50Z","title":"Revised Progressive-Hedging-Algorithm Based Two-layer Solution Scheme for Bayesian Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1906.09035","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:b0338eab4f791a05f815c5651daa2705ec1b9b47a54a46e839cf3dfdebb48c17","target":"record","created_at":"2026-05-17T23:42:44Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"b443fc7fcc02a3491f40dcf3710a2f02ba56467abf166eb3803547675b65bd7b","cross_cats_sorted":["cs.LG","cs.SY"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"eess.SY","submitted_at":"2019-06-21T09:57:50Z","title_canon_sha256":"8101a090bdd4f7899ad97e6a97e188d72f4d670dfa78c235ffe6624f940e9595"},"schema_version":"1.0","source":{"id":"1906.09035","kind":"arxiv","version":1}},"canonical_sha256":"74162536b18e89fe6c2d3302ac620f0c24358578c09e89f5ffb4624228311850","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"74162536b18e89fe6c2d3302ac620f0c24358578c09e89f5ffb4624228311850","first_computed_at":"2026-05-17T23:42:44.760943Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:42:44.760943Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"ygf7ds6D93qYPS8nC98Fkm0UZXNMCCPqCiZv6Hx//ZSys+aQPOAZ8WkQbrcpeCCSxfV843CDK4+HdmVRsyLmBw==","signature_status":"signed_v1","signed_at":"2026-05-17T23:42:44.761700Z","signed_message":"canonical_sha256_bytes"},"source_id":"1906.09035","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:b0338eab4f791a05f815c5651daa2705ec1b9b47a54a46e839cf3dfdebb48c17","sha256:078e1527e607a6452ea59f83b9ae0a50688bf2a34ba1c7a492b5eea80056b9cd"],"state_sha256":"e47beb9661ac53a8923b72f9b82669d3b2f35ea6b94920c7ce181bbcab64c52a"}