{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2013:YFCN7YBAVJG6GEFYQAV5YELC7A","short_pith_number":"pith:YFCN7YBA","canonical_record":{"source":{"id":"1303.3163","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2013-03-13T14:06:21Z","cross_cats_sorted":["cs.LG","stat.ML"],"title_canon_sha256":"e45dc72d0f8023e621595ed4be2f94532c9a8352a604d989332a8415a3f518a6","abstract_canon_sha256":"00abbf44100a346f8d328813a5281dd643623275f8e11af1e62da931d13ac860"},"schema_version":"1.0"},"canonical_sha256":"c144dfe020aa4de310b8802bdc1162f80a91113f0ed5054068ec2a02b65785f0","source":{"kind":"arxiv","id":"1303.3163","version":3},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1303.3163","created_at":"2026-05-18T03:21:00Z"},{"alias_kind":"arxiv_version","alias_value":"1303.3163v3","created_at":"2026-05-18T03:21:00Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1303.3163","created_at":"2026-05-18T03:21:00Z"},{"alias_kind":"pith_short_12","alias_value":"YFCN7YBAVJG6","created_at":"2026-05-18T12:28:06Z"},{"alias_kind":"pith_short_16","alias_value":"YFCN7YBAVJG6GEFY","created_at":"2026-05-18T12:28:06Z"},{"alias_kind":"pith_short_8","alias_value":"YFCN7YBA","created_at":"2026-05-18T12:28:06Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2013:YFCN7YBAVJG6GEFYQAV5YELC7A","target":"record","payload":{"canonical_record":{"source":{"id":"1303.3163","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2013-03-13T14:06:21Z","cross_cats_sorted":["cs.LG","stat.ML"],"title_canon_sha256":"e45dc72d0f8023e621595ed4be2f94532c9a8352a604d989332a8415a3f518a6","abstract_canon_sha256":"00abbf44100a346f8d328813a5281dd643623275f8e11af1e62da931d13ac860"},"schema_version":"1.0"},"canonical_sha256":"c144dfe020aa4de310b8802bdc1162f80a91113f0ed5054068ec2a02b65785f0","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T03:21:00.400460Z","signature_b64":"3XFeF6+o7FmrZ8xXCkCxs00J1Vho6dowyqOr2LyZPFwmGXM3JeUOnd/HYAN06BIeW+cOKGqGgwtItMdw+os9BQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"c144dfe020aa4de310b8802bdc1162f80a91113f0ed5054068ec2a02b65785f0","last_reissued_at":"2026-05-18T03:21:00.399758Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T03:21:00.399758Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1303.3163","source_version":3,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T03:21:00Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"pleFcMca8LWtHX1MJDHVx0oC0peRdtYlg8kjnDGPhzZ98I4Rk2MOkptbIww8JPjR7EyJZopTOkRDOH9SKVo1DQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-06T16:59:44.190701Z"},"content_sha256":"44789da7e87addffffa7be3619b052fa36fa80ed4a93c08ec13a1e110cb9f091","schema_version":"1.0","event_id":"sha256:44789da7e87addffffa7be3619b052fa36fa80ed4a93c08ec13a1e110cb9f091"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2013:YFCN7YBAVJG6GEFYQAV5YELC7A","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"A Greedy Approximation of Bayesian Reinforcement Learning with Probably Optimistic Transition Model","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG","stat.ML"],"primary_cat":"cs.AI","authors_text":"Kenji Kawaguchi, Mauricio Araya","submitted_at":"2013-03-13T14:06:21Z","abstract_excerpt":"Bayesian Reinforcement Learning (RL) is capable of not only incorporating domain knowledge, but also solving the exploration-exploitation dilemma in a natural way. As Bayesian RL is intractable except for special cases, previous work has proposed several approximation methods. However, these methods are usually too sensitive to parameter values, and finding an acceptable parameter setting is practically impossible in many applications. In this paper, we propose a new algorithm that greedily approximates Bayesian RL to achieve robustness in parameter space. We show that for a desired learning b"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1303.3163","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T03:21:00Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"PRGPEI7EGVf05zNHYGvGboCTvcfuHpUPyQiFb3n0MtAPaFqlXZpVqitKz4q2uEHokTHufTpll0mowLiQ4suWAQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-06T16:59:44.191483Z"},"content_sha256":"8f135775848f54b3dcbac90c3925a5d50209fc81b5fa8da701aae4ee4ef22b22","schema_version":"1.0","event_id":"sha256:8f135775848f54b3dcbac90c3925a5d50209fc81b5fa8da701aae4ee4ef22b22"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/YFCN7YBAVJG6GEFYQAV5YELC7A/bundle.json","state_url":"https://pith.science/pith/YFCN7YBAVJG6GEFYQAV5YELC7A/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/YFCN7YBAVJG6GEFYQAV5YELC7A/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-06T16:59:44Z","links":{"resolver":"https://pith.science/pith/YFCN7YBAVJG6GEFYQAV5YELC7A","bundle":"https://pith.science/pith/YFCN7YBAVJG6GEFYQAV5YELC7A/bundle.json","state":"https://pith.science/pith/YFCN7YBAVJG6GEFYQAV5YELC7A/state.json","well_known_bundle":"https://pith.science/.well-known/pith/YFCN7YBAVJG6GEFYQAV5YELC7A/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2013:YFCN7YBAVJG6GEFYQAV5YELC7A","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"00abbf44100a346f8d328813a5281dd643623275f8e11af1e62da931d13ac860","cross_cats_sorted":["cs.LG","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2013-03-13T14:06:21Z","title_canon_sha256":"e45dc72d0f8023e621595ed4be2f94532c9a8352a604d989332a8415a3f518a6"},"schema_version":"1.0","source":{"id":"1303.3163","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1303.3163","created_at":"2026-05-18T03:21:00Z"},{"alias_kind":"arxiv_version","alias_value":"1303.3163v3","created_at":"2026-05-18T03:21:00Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1303.3163","created_at":"2026-05-18T03:21:00Z"},{"alias_kind":"pith_short_12","alias_value":"YFCN7YBAVJG6","created_at":"2026-05-18T12:28:06Z"},{"alias_kind":"pith_short_16","alias_value":"YFCN7YBAVJG6GEFY","created_at":"2026-05-18T12:28:06Z"},{"alias_kind":"pith_short_8","alias_value":"YFCN7YBA","created_at":"2026-05-18T12:28:06Z"}],"graph_snapshots":[{"event_id":"sha256:8f135775848f54b3dcbac90c3925a5d50209fc81b5fa8da701aae4ee4ef22b22","target":"graph","created_at":"2026-05-18T03:21:00Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Bayesian Reinforcement Learning (RL) is capable of not only incorporating domain knowledge, but also solving the exploration-exploitation dilemma in a natural way. As Bayesian RL is intractable except for special cases, previous work has proposed several approximation methods. However, these methods are usually too sensitive to parameter values, and finding an acceptable parameter setting is practically impossible in many applications. In this paper, we propose a new algorithm that greedily approximates Bayesian RL to achieve robustness in parameter space. We show that for a desired learning b","authors_text":"Kenji Kawaguchi, Mauricio Araya","cross_cats":["cs.LG","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2013-03-13T14:06:21Z","title":"A Greedy Approximation of Bayesian Reinforcement Learning with Probably Optimistic Transition Model"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1303.3163","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:44789da7e87addffffa7be3619b052fa36fa80ed4a93c08ec13a1e110cb9f091","target":"record","created_at":"2026-05-18T03:21:00Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"00abbf44100a346f8d328813a5281dd643623275f8e11af1e62da931d13ac860","cross_cats_sorted":["cs.LG","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2013-03-13T14:06:21Z","title_canon_sha256":"e45dc72d0f8023e621595ed4be2f94532c9a8352a604d989332a8415a3f518a6"},"schema_version":"1.0","source":{"id":"1303.3163","kind":"arxiv","version":3}},"canonical_sha256":"c144dfe020aa4de310b8802bdc1162f80a91113f0ed5054068ec2a02b65785f0","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"c144dfe020aa4de310b8802bdc1162f80a91113f0ed5054068ec2a02b65785f0","first_computed_at":"2026-05-18T03:21:00.399758Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T03:21:00.399758Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"3XFeF6+o7FmrZ8xXCkCxs00J1Vho6dowyqOr2LyZPFwmGXM3JeUOnd/HYAN06BIeW+cOKGqGgwtItMdw+os9BQ==","signature_status":"signed_v1","signed_at":"2026-05-18T03:21:00.400460Z","signed_message":"canonical_sha256_bytes"},"source_id":"1303.3163","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:44789da7e87addffffa7be3619b052fa36fa80ed4a93c08ec13a1e110cb9f091","sha256:8f135775848f54b3dcbac90c3925a5d50209fc81b5fa8da701aae4ee4ef22b22"],"state_sha256":"4900a173c23820d6f4ab6b9f6a3a76ad85c3fda54d7849e86f2a1ee0a96d8783"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ahagIvH2oOlaUD53fxqrMWBPVDk0krlVv/qn2iIINr3uDQ1DIvEWXC8AGnpwA0QW6G+VAKwH7ID9535WaVf3Bg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-06T16:59:44.195424Z","bundle_sha256":"fa321cc089059a353a001e682eb9d23ae826ea7054cc1f66a0f8bae68293c729"}}