{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2012:WBEMYJDQX4HJ5G5GGNV74LEWEA","short_pith_number":"pith:WBEMYJDQ","canonical_record":{"source":{"id":"1206.3281","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2012-06-13T15:43:32Z","cross_cats_sorted":[],"title_canon_sha256":"ae74b03bb2e3193dafd06b4470e8f3d02ea36dd4c47a7d65c2b721b8954a0926","abstract_canon_sha256":"d650911061ffe9997adc124cc337fa7495e8248da502995f869770370a0c30b6"},"schema_version":"1.0"},"canonical_sha256":"b048cc2470bf0e9e9ba6336bfe2c962031708e85b09579655d40fb5848c07a2e","source":{"kind":"arxiv","id":"1206.3281","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1206.3281","created_at":"2026-05-18T03:53:30Z"},{"alias_kind":"arxiv_version","alias_value":"1206.3281v1","created_at":"2026-05-18T03:53:30Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1206.3281","created_at":"2026-05-18T03:53:30Z"},{"alias_kind":"pith_short_12","alias_value":"WBEMYJDQX4HJ","created_at":"2026-05-18T12:27:25Z"},{"alias_kind":"pith_short_16","alias_value":"WBEMYJDQX4HJ5G5G","created_at":"2026-05-18T12:27:25Z"},{"alias_kind":"pith_short_8","alias_value":"WBEMYJDQ","created_at":"2026-05-18T12:27:25Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2012:WBEMYJDQX4HJ5G5GGNV74LEWEA","target":"record","payload":{"canonical_record":{"source":{"id":"1206.3281","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2012-06-13T15:43:32Z","cross_cats_sorted":[],"title_canon_sha256":"ae74b03bb2e3193dafd06b4470e8f3d02ea36dd4c47a7d65c2b721b8954a0926","abstract_canon_sha256":"d650911061ffe9997adc124cc337fa7495e8248da502995f869770370a0c30b6"},"schema_version":"1.0"},"canonical_sha256":"b048cc2470bf0e9e9ba6336bfe2c962031708e85b09579655d40fb5848c07a2e","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T03:53:30.782369Z","signature_b64":"2qGtmr+7qhKQ/YO75/iw9oP2EEGYTo6ggw/Lj2fD1oh2BftNj9Ow3XOiMRPOWkCCaTIzqBQW8N1bC3GmD+twDA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"b048cc2470bf0e9e9ba6336bfe2c962031708e85b09579655d40fb5848c07a2e","last_reissued_at":"2026-05-18T03:53:30.781407Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T03:53:30.781407Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1206.3281","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T03:53:30Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"BJG+T0OgOykg/WnsdOrjmXQs3k+9xP1GOHEiFJ+iDA63LXE43VXKfqcBPZBclBMuoXCWiS9KPVY/ikrrEjL1Ag==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-21T20:20:49.501279Z"},"content_sha256":"1d6ace97c63c9b43876b0fe108300109a90ed8d79c559c8579f380288f0e471d","schema_version":"1.0","event_id":"sha256:1d6ace97c63c9b43876b0fe108300109a90ed8d79c559c8579f380288f0e471d"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2012:WBEMYJDQX4HJ5G5GGNV74LEWEA","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Model-Based Bayesian Reinforcement Learning in Large Structured Domains","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Joelle Pineau, Stephane Ross","submitted_at":"2012-06-13T15:43:32Z","abstract_excerpt":"Model-based Bayesian reinforcement learning has generated significant interest in the AI community as it provides an elegant solution to the optimal exploration-exploitation tradeoff in classical reinforcement learning. Unfortunately, the applicability of this type of approach has been limited to small domains due to the high complexity of reasoning about the joint posterior over model parameters. In this paper, we consider the use of factored representations combined with online planning techniques, to improve scalability of these methods. The main contribution of this paper is a Bayesian fra"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1206.3281","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T03:53:30Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"xMKxSuS6RlcKsoN28YByS4otKls2MknReT9SkTOgPANiNf/lCrUfTQUwM9phPbtBGEMeNPnfdhUacn2/I8bvDw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-21T20:20:49.501660Z"},"content_sha256":"a12fd7ef04e18e466460fc74ac944737e500b2f940addb51b14e1eff277e1285","schema_version":"1.0","event_id":"sha256:a12fd7ef04e18e466460fc74ac944737e500b2f940addb51b14e1eff277e1285"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/WBEMYJDQX4HJ5G5GGNV74LEWEA/bundle.json","state_url":"https://pith.science/pith/WBEMYJDQX4HJ5G5GGNV74LEWEA/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/WBEMYJDQX4HJ5G5GGNV74LEWEA/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-21T20:20:49Z","links":{"resolver":"https://pith.science/pith/WBEMYJDQX4HJ5G5GGNV74LEWEA","bundle":"https://pith.science/pith/WBEMYJDQX4HJ5G5GGNV74LEWEA/bundle.json","state":"https://pith.science/pith/WBEMYJDQX4HJ5G5GGNV74LEWEA/state.json","well_known_bundle":"https://pith.science/.well-known/pith/WBEMYJDQX4HJ5G5GGNV74LEWEA/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2012:WBEMYJDQX4HJ5G5GGNV74LEWEA","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"d650911061ffe9997adc124cc337fa7495e8248da502995f869770370a0c30b6","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2012-06-13T15:43:32Z","title_canon_sha256":"ae74b03bb2e3193dafd06b4470e8f3d02ea36dd4c47a7d65c2b721b8954a0926"},"schema_version":"1.0","source":{"id":"1206.3281","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1206.3281","created_at":"2026-05-18T03:53:30Z"},{"alias_kind":"arxiv_version","alias_value":"1206.3281v1","created_at":"2026-05-18T03:53:30Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1206.3281","created_at":"2026-05-18T03:53:30Z"},{"alias_kind":"pith_short_12","alias_value":"WBEMYJDQX4HJ","created_at":"2026-05-18T12:27:25Z"},{"alias_kind":"pith_short_16","alias_value":"WBEMYJDQX4HJ5G5G","created_at":"2026-05-18T12:27:25Z"},{"alias_kind":"pith_short_8","alias_value":"WBEMYJDQ","created_at":"2026-05-18T12:27:25Z"}],"graph_snapshots":[{"event_id":"sha256:a12fd7ef04e18e466460fc74ac944737e500b2f940addb51b14e1eff277e1285","target":"graph","created_at":"2026-05-18T03:53:30Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Model-based Bayesian reinforcement learning has generated significant interest in the AI community as it provides an elegant solution to the optimal exploration-exploitation tradeoff in classical reinforcement learning. Unfortunately, the applicability of this type of approach has been limited to small domains due to the high complexity of reasoning about the joint posterior over model parameters. In this paper, we consider the use of factored representations combined with online planning techniques, to improve scalability of these methods. The main contribution of this paper is a Bayesian fra","authors_text":"Joelle Pineau, Stephane Ross","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2012-06-13T15:43:32Z","title":"Model-Based Bayesian Reinforcement Learning in Large Structured Domains"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1206.3281","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:1d6ace97c63c9b43876b0fe108300109a90ed8d79c559c8579f380288f0e471d","target":"record","created_at":"2026-05-18T03:53:30Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"d650911061ffe9997adc124cc337fa7495e8248da502995f869770370a0c30b6","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2012-06-13T15:43:32Z","title_canon_sha256":"ae74b03bb2e3193dafd06b4470e8f3d02ea36dd4c47a7d65c2b721b8954a0926"},"schema_version":"1.0","source":{"id":"1206.3281","kind":"arxiv","version":1}},"canonical_sha256":"b048cc2470bf0e9e9ba6336bfe2c962031708e85b09579655d40fb5848c07a2e","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"b048cc2470bf0e9e9ba6336bfe2c962031708e85b09579655d40fb5848c07a2e","first_computed_at":"2026-05-18T03:53:30.781407Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T03:53:30.781407Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"2qGtmr+7qhKQ/YO75/iw9oP2EEGYTo6ggw/Lj2fD1oh2BftNj9Ow3XOiMRPOWkCCaTIzqBQW8N1bC3GmD+twDA==","signature_status":"signed_v1","signed_at":"2026-05-18T03:53:30.782369Z","signed_message":"canonical_sha256_bytes"},"source_id":"1206.3281","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:1d6ace97c63c9b43876b0fe108300109a90ed8d79c559c8579f380288f0e471d","sha256:a12fd7ef04e18e466460fc74ac944737e500b2f940addb51b14e1eff277e1285"],"state_sha256":"d8d75d7e336a85ce0895e194904a86d494b42a8595fef48cf405b079be1f1b5c"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"J52+/M/hHrr5knIZXN/pr47UlbLJLtUn6pOKyWw3KTomYza1klMCUfaiJef/LhliL6uVQy0rpgrT3GI/L5G1Dw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-21T20:20:49.504900Z","bundle_sha256":"b0989e0090263cac6bb47c0de3126bf6d44f1f83ba872924e4881e8daaef0dd1"}}