{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2013:NYEUTGUEIEKXDSCA3Z4CRAOD7I","short_pith_number":"pith:NYEUTGUE","canonical_record":{"source":{"id":"1301.6690","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2013-01-23T15:57:38Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"b58270ff6e266ee4cf737f3d97ba79043a91f94de8bb7ba1fd06f8f011243b6f","abstract_canon_sha256":"1d5ff1108bd01334448f973075020263830dfcbbb013474052cbaafdcffe88bc"},"schema_version":"1.0"},"canonical_sha256":"6e09499a84411571c840de782881c3fa242248647f461da6814d3105d3115b0b","source":{"kind":"arxiv","id":"1301.6690","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1301.6690","created_at":"2026-05-18T03:35:12Z"},{"alias_kind":"arxiv_version","alias_value":"1301.6690v1","created_at":"2026-05-18T03:35:12Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1301.6690","created_at":"2026-05-18T03:35:12Z"},{"alias_kind":"pith_short_12","alias_value":"NYEUTGUEIEKX","created_at":"2026-05-18T12:27:52Z"},{"alias_kind":"pith_short_16","alias_value":"NYEUTGUEIEKXDSCA","created_at":"2026-05-18T12:27:52Z"},{"alias_kind":"pith_short_8","alias_value":"NYEUTGUE","created_at":"2026-05-18T12:27:52Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2013:NYEUTGUEIEKXDSCA3Z4CRAOD7I","target":"record","payload":{"canonical_record":{"source":{"id":"1301.6690","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2013-01-23T15:57:38Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"b58270ff6e266ee4cf737f3d97ba79043a91f94de8bb7ba1fd06f8f011243b6f","abstract_canon_sha256":"1d5ff1108bd01334448f973075020263830dfcbbb013474052cbaafdcffe88bc"},"schema_version":"1.0"},"canonical_sha256":"6e09499a84411571c840de782881c3fa242248647f461da6814d3105d3115b0b","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T03:35:12.993193Z","signature_b64":"KxrQ2PTYghVC7V5q09mwW25GJOTv/z0abw/XC2ANxtHMa+fjj67YMiY8neDxCG8lfZ+AgmUftxuUFXO2IK8xCQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"6e09499a84411571c840de782881c3fa242248647f461da6814d3105d3115b0b","last_reissued_at":"2026-05-18T03:35:12.992502Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T03:35:12.992502Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1301.6690","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T03:35:12Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"UE5e+ZrUgOcS9lK8f3fiPGGd6qwCVZSHFkOvR3V+8oaQRanaffz7UO/IPcAHsRZU0AxfflnX61fyY/JFYvuCDQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-29T17:49:36.196821Z"},"content_sha256":"0dc01d08fdab9726835b4acd89b47346f8bc3969c6b13f72b34f52b3937937d2","schema_version":"1.0","event_id":"sha256:0dc01d08fdab9726835b4acd89b47346f8bc3969c6b13f72b34f52b3937937d2"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2013:NYEUTGUEIEKXDSCA3Z4CRAOD7I","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Model-Based Bayesian Exploration","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.AI","authors_text":"David Andre, Nir Friedman, Richard Dearden","submitted_at":"2013-01-23T15:57:38Z","abstract_excerpt":"Reinforcement learning systems are often concerned with balancing exploration of untested actions against exploitation of actions that are known to be good. The benefit of exploration can be estimated using the classical notion of Value of Information - the expected improvement in future decision quality arising from the information acquired by exploration. Estimating this quantity requires an assessment of the agent's uncertainty about its current value estimates for states. In this paper we investigate ways of representing and reasoning about this uncertainty in algorithms where the system a"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1301.6690","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T03:35:12Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"u4xyjHeh+BSA1yZnrrWXVbgLGpVW3CPPwec4YimCnnz/OciG/2W6kZmgxDS/HfU/J8K+TZmzOusvGj0isJtTBQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-29T17:49:36.197526Z"},"content_sha256":"90318208b38c43d108a11d3d8fa88bc3e433c7c981597220633a3e2234c6185c","schema_version":"1.0","event_id":"sha256:90318208b38c43d108a11d3d8fa88bc3e433c7c981597220633a3e2234c6185c"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/NYEUTGUEIEKXDSCA3Z4CRAOD7I/bundle.json","state_url":"https://pith.science/pith/NYEUTGUEIEKXDSCA3Z4CRAOD7I/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/NYEUTGUEIEKXDSCA3Z4CRAOD7I/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-29T17:49:36Z","links":{"resolver":"https://pith.science/pith/NYEUTGUEIEKXDSCA3Z4CRAOD7I","bundle":"https://pith.science/pith/NYEUTGUEIEKXDSCA3Z4CRAOD7I/bundle.json","state":"https://pith.science/pith/NYEUTGUEIEKXDSCA3Z4CRAOD7I/state.json","well_known_bundle":"https://pith.science/.well-known/pith/NYEUTGUEIEKXDSCA3Z4CRAOD7I/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2013:NYEUTGUEIEKXDSCA3Z4CRAOD7I","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"1d5ff1108bd01334448f973075020263830dfcbbb013474052cbaafdcffe88bc","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2013-01-23T15:57:38Z","title_canon_sha256":"b58270ff6e266ee4cf737f3d97ba79043a91f94de8bb7ba1fd06f8f011243b6f"},"schema_version":"1.0","source":{"id":"1301.6690","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1301.6690","created_at":"2026-05-18T03:35:12Z"},{"alias_kind":"arxiv_version","alias_value":"1301.6690v1","created_at":"2026-05-18T03:35:12Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1301.6690","created_at":"2026-05-18T03:35:12Z"},{"alias_kind":"pith_short_12","alias_value":"NYEUTGUEIEKX","created_at":"2026-05-18T12:27:52Z"},{"alias_kind":"pith_short_16","alias_value":"NYEUTGUEIEKXDSCA","created_at":"2026-05-18T12:27:52Z"},{"alias_kind":"pith_short_8","alias_value":"NYEUTGUE","created_at":"2026-05-18T12:27:52Z"}],"graph_snapshots":[{"event_id":"sha256:90318208b38c43d108a11d3d8fa88bc3e433c7c981597220633a3e2234c6185c","target":"graph","created_at":"2026-05-18T03:35:12Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Reinforcement learning systems are often concerned with balancing exploration of untested actions against exploitation of actions that are known to be good. The benefit of exploration can be estimated using the classical notion of Value of Information - the expected improvement in future decision quality arising from the information acquired by exploration. Estimating this quantity requires an assessment of the agent's uncertainty about its current value estimates for states. In this paper we investigate ways of representing and reasoning about this uncertainty in algorithms where the system a","authors_text":"David Andre, Nir Friedman, Richard Dearden","cross_cats":["cs.LG"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2013-01-23T15:57:38Z","title":"Model-Based Bayesian Exploration"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1301.6690","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:0dc01d08fdab9726835b4acd89b47346f8bc3969c6b13f72b34f52b3937937d2","target":"record","created_at":"2026-05-18T03:35:12Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"1d5ff1108bd01334448f973075020263830dfcbbb013474052cbaafdcffe88bc","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2013-01-23T15:57:38Z","title_canon_sha256":"b58270ff6e266ee4cf737f3d97ba79043a91f94de8bb7ba1fd06f8f011243b6f"},"schema_version":"1.0","source":{"id":"1301.6690","kind":"arxiv","version":1}},"canonical_sha256":"6e09499a84411571c840de782881c3fa242248647f461da6814d3105d3115b0b","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"6e09499a84411571c840de782881c3fa242248647f461da6814d3105d3115b0b","first_computed_at":"2026-05-18T03:35:12.992502Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T03:35:12.992502Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"KxrQ2PTYghVC7V5q09mwW25GJOTv/z0abw/XC2ANxtHMa+fjj67YMiY8neDxCG8lfZ+AgmUftxuUFXO2IK8xCQ==","signature_status":"signed_v1","signed_at":"2026-05-18T03:35:12.993193Z","signed_message":"canonical_sha256_bytes"},"source_id":"1301.6690","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:0dc01d08fdab9726835b4acd89b47346f8bc3969c6b13f72b34f52b3937937d2","sha256:90318208b38c43d108a11d3d8fa88bc3e433c7c981597220633a3e2234c6185c"],"state_sha256":"f7541a15ec1114684ff9a58488113f780b890a6596815526c8c990b9c268b1b7"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"mhbffMrDkEHHOU63avegSaMvbzlh4GsdHXSWw3MZoV8L2t0aO/AjOzRhG08THABn5LqWg1OixTwrowb9SjyNCA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-29T17:49:36.201537Z","bundle_sha256":"60e01a5df29ff01fa7942a332af39e46147e1f1745a05ac388c334461dc84648"}}