{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:XH3VJZEAIN4M5HZFOZPDYYPGAR","short_pith_number":"pith:XH3VJZEA","canonical_record":{"source":{"id":"1810.00468","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-09-30T21:12:44Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"ac9f502d78924fbf8e8f25c9132b75cb609e374edbb14cbbd604a129edf6290c","abstract_canon_sha256":"1d973934b0d4835e06b134280f62a3461ad6f7a90cb8d15d3319152f0cdaa87c"},"schema_version":"1.0"},"canonical_sha256":"b9f754e4804378ce9f25765e3c61e604785e67673ea39148342884dd44637f49","source":{"kind":"arxiv","id":"1810.00468","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1810.00468","created_at":"2026-05-18T00:04:26Z"},{"alias_kind":"arxiv_version","alias_value":"1810.00468v1","created_at":"2026-05-18T00:04:26Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1810.00468","created_at":"2026-05-18T00:04:26Z"},{"alias_kind":"pith_short_12","alias_value":"XH3VJZEAIN4M","created_at":"2026-05-18T12:33:01Z"},{"alias_kind":"pith_short_16","alias_value":"XH3VJZEAIN4M5HZF","created_at":"2026-05-18T12:33:01Z"},{"alias_kind":"pith_short_8","alias_value":"XH3VJZEA","created_at":"2026-05-18T12:33:01Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:XH3VJZEAIN4M5HZFOZPDYYPGAR","target":"record","payload":{"canonical_record":{"source":{"id":"1810.00468","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-09-30T21:12:44Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"ac9f502d78924fbf8e8f25c9132b75cb609e374edbb14cbbd604a129edf6290c","abstract_canon_sha256":"1d973934b0d4835e06b134280f62a3461ad6f7a90cb8d15d3319152f0cdaa87c"},"schema_version":"1.0"},"canonical_sha256":"b9f754e4804378ce9f25765e3c61e604785e67673ea39148342884dd44637f49","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:04:26.077626Z","signature_b64":"I5fxS/I36tDcvqV+AWpaScDVCTVT9D0/GDzxF3rbKAflNNxTSmvb2fyyJndO1LzkTRSe679VZDoDwuQobaDKAQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"b9f754e4804378ce9f25765e3c61e604785e67673ea39148342884dd44637f49","last_reissued_at":"2026-05-18T00:04:26.077139Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:04:26.077139Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1810.00468","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:04:26Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"3oK22YozjF2xXUo4Ldqtto4u2WUtvDdfThAPZvdlJ5b7hgbZA6TlZYjUjYg7nWgmMIcw0Mq7oqZnVTGQxv9mDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-05T15:02:21.461632Z"},"content_sha256":"164e182b079bc8d4afe3736d806a55a3aafe08aef5ea686a5a42fcb61c595074","schema_version":"1.0","event_id":"sha256:164e182b079bc8d4afe3736d806a55a3aafe08aef5ea686a5a42fcb61c595074"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:XH3VJZEAIN4M5HZFOZPDYYPGAR","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Bayesian Transfer Reinforcement Learning with Prior Knowledge Rules","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Michalis K. Titsias, Sotirios Nikoloutsopoulos","submitted_at":"2018-09-30T21:12:44Z","abstract_excerpt":"We propose a probabilistic framework to directly insert prior knowledge in reinforcement learning (RL) algorithms by defining the behaviour policy as a Bayesian posterior distribution. Such a posterior combines task specific information with prior knowledge, thus allowing to achieve transfer learning across tasks. The resulting method is flexible and it can be easily incorporated to any standard off-policy and on-policy algorithms, such as those based on temporal differences and policy gradients. We develop a specific instance of this Bayesian transfer RL framework by expressing prior knowledg"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1810.00468","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:04:26Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Sz33CayOCD2/L0BNuKEhOgwbsqbhd59TwJPQPOJoTpmtiSiDDXqPHWc14yDihm5LLItSqRjOzyB9qCyJtZDdCw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-05T15:02:21.461981Z"},"content_sha256":"64710b9fcdc94e645cbdeab8827a4f90ff056c2fb92902eeac4ef9b355434e32","schema_version":"1.0","event_id":"sha256:64710b9fcdc94e645cbdeab8827a4f90ff056c2fb92902eeac4ef9b355434e32"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/XH3VJZEAIN4M5HZFOZPDYYPGAR/bundle.json","state_url":"https://pith.science/pith/XH3VJZEAIN4M5HZFOZPDYYPGAR/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/XH3VJZEAIN4M5HZFOZPDYYPGAR/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-05T15:02:21Z","links":{"resolver":"https://pith.science/pith/XH3VJZEAIN4M5HZFOZPDYYPGAR","bundle":"https://pith.science/pith/XH3VJZEAIN4M5HZFOZPDYYPGAR/bundle.json","state":"https://pith.science/pith/XH3VJZEAIN4M5HZFOZPDYYPGAR/state.json","well_known_bundle":"https://pith.science/.well-known/pith/XH3VJZEAIN4M5HZFOZPDYYPGAR/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:XH3VJZEAIN4M5HZFOZPDYYPGAR","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"1d973934b0d4835e06b134280f62a3461ad6f7a90cb8d15d3319152f0cdaa87c","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-09-30T21:12:44Z","title_canon_sha256":"ac9f502d78924fbf8e8f25c9132b75cb609e374edbb14cbbd604a129edf6290c"},"schema_version":"1.0","source":{"id":"1810.00468","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1810.00468","created_at":"2026-05-18T00:04:26Z"},{"alias_kind":"arxiv_version","alias_value":"1810.00468v1","created_at":"2026-05-18T00:04:26Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1810.00468","created_at":"2026-05-18T00:04:26Z"},{"alias_kind":"pith_short_12","alias_value":"XH3VJZEAIN4M","created_at":"2026-05-18T12:33:01Z"},{"alias_kind":"pith_short_16","alias_value":"XH3VJZEAIN4M5HZF","created_at":"2026-05-18T12:33:01Z"},{"alias_kind":"pith_short_8","alias_value":"XH3VJZEA","created_at":"2026-05-18T12:33:01Z"}],"graph_snapshots":[{"event_id":"sha256:64710b9fcdc94e645cbdeab8827a4f90ff056c2fb92902eeac4ef9b355434e32","target":"graph","created_at":"2026-05-18T00:04:26Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We propose a probabilistic framework to directly insert prior knowledge in reinforcement learning (RL) algorithms by defining the behaviour policy as a Bayesian posterior distribution. Such a posterior combines task specific information with prior knowledge, thus allowing to achieve transfer learning across tasks. The resulting method is flexible and it can be easily incorporated to any standard off-policy and on-policy algorithms, such as those based on temporal differences and policy gradients. We develop a specific instance of this Bayesian transfer RL framework by expressing prior knowledg","authors_text":"Michalis K. Titsias, Sotirios Nikoloutsopoulos","cross_cats":["stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-09-30T21:12:44Z","title":"Bayesian Transfer Reinforcement Learning with Prior Knowledge Rules"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1810.00468","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:164e182b079bc8d4afe3736d806a55a3aafe08aef5ea686a5a42fcb61c595074","target":"record","created_at":"2026-05-18T00:04:26Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"1d973934b0d4835e06b134280f62a3461ad6f7a90cb8d15d3319152f0cdaa87c","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-09-30T21:12:44Z","title_canon_sha256":"ac9f502d78924fbf8e8f25c9132b75cb609e374edbb14cbbd604a129edf6290c"},"schema_version":"1.0","source":{"id":"1810.00468","kind":"arxiv","version":1}},"canonical_sha256":"b9f754e4804378ce9f25765e3c61e604785e67673ea39148342884dd44637f49","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"b9f754e4804378ce9f25765e3c61e604785e67673ea39148342884dd44637f49","first_computed_at":"2026-05-18T00:04:26.077139Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:04:26.077139Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"I5fxS/I36tDcvqV+AWpaScDVCTVT9D0/GDzxF3rbKAflNNxTSmvb2fyyJndO1LzkTRSe679VZDoDwuQobaDKAQ==","signature_status":"signed_v1","signed_at":"2026-05-18T00:04:26.077626Z","signed_message":"canonical_sha256_bytes"},"source_id":"1810.00468","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:164e182b079bc8d4afe3736d806a55a3aafe08aef5ea686a5a42fcb61c595074","sha256:64710b9fcdc94e645cbdeab8827a4f90ff056c2fb92902eeac4ef9b355434e32"],"state_sha256":"d05e536fe44149908b37206a5b0ffbad1d1680c1f86a00f42501cc8ea311d726"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"gBkZGIWhC5KUJMgIzyPCeZ3big2Yv58+x3M2IgC+7PjdzL72tt0tPVcbJ2kvG7mGFtMKpSCIey9LoHG1vgaOAg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-05T15:02:21.463971Z","bundle_sha256":"3a49da5fbe2c4b79fdf4c83478c9c6c3d9baf4e3350e91dd2323c14f1affee7c"}}