{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:HT2VVHDD7YSAP5KJWGSPXQQPDJ","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"528791cd8168d30971c3eba5213a69611292cea8d2bd66515fcd4eaf4d13a1cc","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2017-05-30T11:41:00Z","title_canon_sha256":"2dd962f0c61eea312d092d05c6d22ead85184e19fed2d382ab990576a8cbed6b"},"schema_version":"1.0","source":{"id":"1705.10557","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1705.10557","created_at":"2026-05-18T00:43:24Z"},{"alias_kind":"arxiv_version","alias_value":"1705.10557v1","created_at":"2026-05-18T00:43:24Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1705.10557","created_at":"2026-05-18T00:43:24Z"},{"alias_kind":"pith_short_12","alias_value":"HT2VVHDD7YSA","created_at":"2026-05-18T12:31:18Z"},{"alias_kind":"pith_short_16","alias_value":"HT2VVHDD7YSAP5KJ","created_at":"2026-05-18T12:31:18Z"},{"alias_kind":"pith_short_8","alias_value":"HT2VVHDD","created_at":"2026-05-18T12:31:18Z"}],"graph_snapshots":[{"event_id":"sha256:9878f7c58f13a577e9f0a143c2c4f33dee46f778891434c3e3da1b39174d82e9","target":"graph","created_at":"2026-05-18T00:43:24Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Many state-of-the-art reinforcement learning (RL) algorithms typically assume that the environment is an ergodic Markov Decision Process (MDP). In contrast, the field of universal reinforcement learning (URL) is concerned with algorithms that make as few assumptions as possible about the environment. The universal Bayesian agent AIXI and a family of related URL algorithms have been developed in this setting. While numerous theoretical optimality results have been proven for these agents, there has been no empirical investigation of their behavior to date. We present a short and accessible surv","authors_text":"Jan Leike, John Aslanides, Marcus Hutter","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2017-05-30T11:41:00Z","title":"Universal Reinforcement Learning Algorithms: Survey and Experiments"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1705.10557","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:20944d7c288b3350e014a25b6350deda4fdc86fbadaa3898218a3f5780dd8896","target":"record","created_at":"2026-05-18T00:43:24Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"528791cd8168d30971c3eba5213a69611292cea8d2bd66515fcd4eaf4d13a1cc","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2017-05-30T11:41:00Z","title_canon_sha256":"2dd962f0c61eea312d092d05c6d22ead85184e19fed2d382ab990576a8cbed6b"},"schema_version":"1.0","source":{"id":"1705.10557","kind":"arxiv","version":1}},"canonical_sha256":"3cf55a9c63fe2407f549b1a4fbc20f1a4d3e2f95302e921b4e82635bc20d47de","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"3cf55a9c63fe2407f549b1a4fbc20f1a4d3e2f95302e921b4e82635bc20d47de","first_computed_at":"2026-05-18T00:43:24.258285Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:43:24.258285Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"dp9XhT/C8018MrATMrvTW2CS+EV42j0Bd/qsRDIXp1xq7GfydjmS+WRREDtco43LBlEP08ZpxJyEa8g77wMzBA==","signature_status":"signed_v1","signed_at":"2026-05-18T00:43:24.258963Z","signed_message":"canonical_sha256_bytes"},"source_id":"1705.10557","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:20944d7c288b3350e014a25b6350deda4fdc86fbadaa3898218a3f5780dd8896","sha256:9878f7c58f13a577e9f0a143c2c4f33dee46f778891434c3e3da1b39174d82e9"],"state_sha256":"8102dee61dc225c5f3eba26b269b1811ce5266045791b9afc871b3f41b43809d"}