{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:HT2VVHDD7YSAP5KJWGSPXQQPDJ","short_pith_number":"pith:HT2VVHDD","canonical_record":{"source":{"id":"1705.10557","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2017-05-30T11:41:00Z","cross_cats_sorted":[],"title_canon_sha256":"2dd962f0c61eea312d092d05c6d22ead85184e19fed2d382ab990576a8cbed6b","abstract_canon_sha256":"528791cd8168d30971c3eba5213a69611292cea8d2bd66515fcd4eaf4d13a1cc"},"schema_version":"1.0"},"canonical_sha256":"3cf55a9c63fe2407f549b1a4fbc20f1a4d3e2f95302e921b4e82635bc20d47de","source":{"kind":"arxiv","id":"1705.10557","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1705.10557","created_at":"2026-05-18T00:43:24Z"},{"alias_kind":"arxiv_version","alias_value":"1705.10557v1","created_at":"2026-05-18T00:43:24Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1705.10557","created_at":"2026-05-18T00:43:24Z"},{"alias_kind":"pith_short_12","alias_value":"HT2VVHDD7YSA","created_at":"2026-05-18T12:31:18Z"},{"alias_kind":"pith_short_16","alias_value":"HT2VVHDD7YSAP5KJ","created_at":"2026-05-18T12:31:18Z"},{"alias_kind":"pith_short_8","alias_value":"HT2VVHDD","created_at":"2026-05-18T12:31:18Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:HT2VVHDD7YSAP5KJWGSPXQQPDJ","target":"record","payload":{"canonical_record":{"source":{"id":"1705.10557","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2017-05-30T11:41:00Z","cross_cats_sorted":[],"title_canon_sha256":"2dd962f0c61eea312d092d05c6d22ead85184e19fed2d382ab990576a8cbed6b","abstract_canon_sha256":"528791cd8168d30971c3eba5213a69611292cea8d2bd66515fcd4eaf4d13a1cc"},"schema_version":"1.0"},"canonical_sha256":"3cf55a9c63fe2407f549b1a4fbc20f1a4d3e2f95302e921b4e82635bc20d47de","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:43:24.258963Z","signature_b64":"dp9XhT/C8018MrATMrvTW2CS+EV42j0Bd/qsRDIXp1xq7GfydjmS+WRREDtco43LBlEP08ZpxJyEa8g77wMzBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"3cf55a9c63fe2407f549b1a4fbc20f1a4d3e2f95302e921b4e82635bc20d47de","last_reissued_at":"2026-05-18T00:43:24.258285Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:43:24.258285Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1705.10557","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:43:24Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"vjDC+JdBOwrmz/ceV1TuSgqU1f0YjD+fm75hWtrSchavgeJ86i/9IuZ6Awdu9Am91jH1CoFTV+ElvPLROCDcBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T19:14:05.416926Z"},"content_sha256":"20944d7c288b3350e014a25b6350deda4fdc86fbadaa3898218a3f5780dd8896","schema_version":"1.0","event_id":"sha256:20944d7c288b3350e014a25b6350deda4fdc86fbadaa3898218a3f5780dd8896"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:HT2VVHDD7YSAP5KJWGSPXQQPDJ","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Universal Reinforcement Learning Algorithms: Survey and Experiments","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Jan Leike, John Aslanides, Marcus Hutter","submitted_at":"2017-05-30T11:41:00Z","abstract_excerpt":"Many state-of-the-art reinforcement learning (RL) algorithms typically assume that the environment is an ergodic Markov Decision Process (MDP). In contrast, the field of universal reinforcement learning (URL) is concerned with algorithms that make as few assumptions as possible about the environment. The universal Bayesian agent AIXI and a family of related URL algorithms have been developed in this setting. While numerous theoretical optimality results have been proven for these agents, there has been no empirical investigation of their behavior to date. We present a short and accessible surv"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1705.10557","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:43:24Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"wsnL7ytld0QB4aymnv2S9O2VlkVuGTdT8mmE/mSJmenrlyYkphTt3NTJNlY1eSMgrDGIsSyUX1A4x0kBsTAfDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T19:14:05.417604Z"},"content_sha256":"9878f7c58f13a577e9f0a143c2c4f33dee46f778891434c3e3da1b39174d82e9","schema_version":"1.0","event_id":"sha256:9878f7c58f13a577e9f0a143c2c4f33dee46f778891434c3e3da1b39174d82e9"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/HT2VVHDD7YSAP5KJWGSPXQQPDJ/bundle.json","state_url":"https://pith.science/pith/HT2VVHDD7YSAP5KJWGSPXQQPDJ/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/HT2VVHDD7YSAP5KJWGSPXQQPDJ/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-27T19:14:05Z","links":{"resolver":"https://pith.science/pith/HT2VVHDD7YSAP5KJWGSPXQQPDJ","bundle":"https://pith.science/pith/HT2VVHDD7YSAP5KJWGSPXQQPDJ/bundle.json","state":"https://pith.science/pith/HT2VVHDD7YSAP5KJWGSPXQQPDJ/state.json","well_known_bundle":"https://pith.science/.well-known/pith/HT2VVHDD7YSAP5KJWGSPXQQPDJ/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:HT2VVHDD7YSAP5KJWGSPXQQPDJ","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"528791cd8168d30971c3eba5213a69611292cea8d2bd66515fcd4eaf4d13a1cc","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2017-05-30T11:41:00Z","title_canon_sha256":"2dd962f0c61eea312d092d05c6d22ead85184e19fed2d382ab990576a8cbed6b"},"schema_version":"1.0","source":{"id":"1705.10557","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1705.10557","created_at":"2026-05-18T00:43:24Z"},{"alias_kind":"arxiv_version","alias_value":"1705.10557v1","created_at":"2026-05-18T00:43:24Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1705.10557","created_at":"2026-05-18T00:43:24Z"},{"alias_kind":"pith_short_12","alias_value":"HT2VVHDD7YSA","created_at":"2026-05-18T12:31:18Z"},{"alias_kind":"pith_short_16","alias_value":"HT2VVHDD7YSAP5KJ","created_at":"2026-05-18T12:31:18Z"},{"alias_kind":"pith_short_8","alias_value":"HT2VVHDD","created_at":"2026-05-18T12:31:18Z"}],"graph_snapshots":[{"event_id":"sha256:9878f7c58f13a577e9f0a143c2c4f33dee46f778891434c3e3da1b39174d82e9","target":"graph","created_at":"2026-05-18T00:43:24Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Many state-of-the-art reinforcement learning (RL) algorithms typically assume that the environment is an ergodic Markov Decision Process (MDP). In contrast, the field of universal reinforcement learning (URL) is concerned with algorithms that make as few assumptions as possible about the environment. The universal Bayesian agent AIXI and a family of related URL algorithms have been developed in this setting. While numerous theoretical optimality results have been proven for these agents, there has been no empirical investigation of their behavior to date. We present a short and accessible surv","authors_text":"Jan Leike, John Aslanides, Marcus Hutter","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2017-05-30T11:41:00Z","title":"Universal Reinforcement Learning Algorithms: Survey and Experiments"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1705.10557","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:20944d7c288b3350e014a25b6350deda4fdc86fbadaa3898218a3f5780dd8896","target":"record","created_at":"2026-05-18T00:43:24Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"528791cd8168d30971c3eba5213a69611292cea8d2bd66515fcd4eaf4d13a1cc","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2017-05-30T11:41:00Z","title_canon_sha256":"2dd962f0c61eea312d092d05c6d22ead85184e19fed2d382ab990576a8cbed6b"},"schema_version":"1.0","source":{"id":"1705.10557","kind":"arxiv","version":1}},"canonical_sha256":"3cf55a9c63fe2407f549b1a4fbc20f1a4d3e2f95302e921b4e82635bc20d47de","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"3cf55a9c63fe2407f549b1a4fbc20f1a4d3e2f95302e921b4e82635bc20d47de","first_computed_at":"2026-05-18T00:43:24.258285Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:43:24.258285Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"dp9XhT/C8018MrATMrvTW2CS+EV42j0Bd/qsRDIXp1xq7GfydjmS+WRREDtco43LBlEP08ZpxJyEa8g77wMzBA==","signature_status":"signed_v1","signed_at":"2026-05-18T00:43:24.258963Z","signed_message":"canonical_sha256_bytes"},"source_id":"1705.10557","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:20944d7c288b3350e014a25b6350deda4fdc86fbadaa3898218a3f5780dd8896","sha256:9878f7c58f13a577e9f0a143c2c4f33dee46f778891434c3e3da1b39174d82e9"],"state_sha256":"8102dee61dc225c5f3eba26b269b1811ce5266045791b9afc871b3f41b43809d"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"PZm7E9ec4AomrC3ru0Wxv6Gte9+vovAds3Ofe04L+ns+bhLgsKtCz/RMmCUJrxcXOzY3hN+HArJU5IobNXgMAg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-27T19:14:05.421660Z","bundle_sha256":"61bf91b1705adf89b25fe3b24b55b3d8515704030a345535024e74fad04e6fb0"}}