{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:VEUYHKKF5CW4OG6I6F6RHP7IIM","short_pith_number":"pith:VEUYHKKF","canonical_record":{"source":{"id":"1806.02426","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-06-06T21:09:39Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"90e35c23333dfcc822e324481a2166ea4ac2e446a3724d7467372e8a7e2610fe","abstract_canon_sha256":"325e0ee5a8e3e6548cfaec032bc28aaead251dd38f87adc29dcdadcbc0b0c600"},"schema_version":"1.0"},"canonical_sha256":"a92983a945e8adc71bc8f17d13bfe843206c9122cf60f6994cea242e255f566d","source":{"kind":"arxiv","id":"1806.02426","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1806.02426","created_at":"2026-05-18T00:13:56Z"},{"alias_kind":"arxiv_version","alias_value":"1806.02426v1","created_at":"2026-05-18T00:13:56Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1806.02426","created_at":"2026-05-18T00:13:56Z"},{"alias_kind":"pith_short_12","alias_value":"VEUYHKKF5CW4","created_at":"2026-05-18T12:32:59Z"},{"alias_kind":"pith_short_16","alias_value":"VEUYHKKF5CW4OG6I","created_at":"2026-05-18T12:32:59Z"},{"alias_kind":"pith_short_8","alias_value":"VEUYHKKF","created_at":"2026-05-18T12:32:59Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:VEUYHKKF5CW4OG6I6F6RHP7IIM","target":"record","payload":{"canonical_record":{"source":{"id":"1806.02426","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-06-06T21:09:39Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"90e35c23333dfcc822e324481a2166ea4ac2e446a3724d7467372e8a7e2610fe","abstract_canon_sha256":"325e0ee5a8e3e6548cfaec032bc28aaead251dd38f87adc29dcdadcbc0b0c600"},"schema_version":"1.0"},"canonical_sha256":"a92983a945e8adc71bc8f17d13bfe843206c9122cf60f6994cea242e255f566d","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:13:56.863563Z","signature_b64":"dP806tJEaOa/5KBYy85r0CqskbeP3OcGC7S2q4iJU7LSDcnIQz6JMceUF7G+mA3kKeZh54Bbn6KfqLPyRm0LCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"a92983a945e8adc71bc8f17d13bfe843206c9122cf60f6994cea242e255f566d","last_reissued_at":"2026-05-18T00:13:56.862840Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:13:56.862840Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1806.02426","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:13:56Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"0IXu3hCXJw38D1ZnBH0WwOBAGuND0mWpjRg6SqaFXnF075vH3IBuius/8mY5Q2JFpDj3Kq3WV8GJSF/kPiLeDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T00:56:40.425815Z"},"content_sha256":"340c79fbf5df538c421a0e1a6357b4ce4e54efa2eef4ef47c944b24df87e4a39","schema_version":"1.0","event_id":"sha256:340c79fbf5df538c421a0e1a6357b4ce4e54efa2eef4ef47c944b24df87e4a39"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:VEUYHKKF5CW4OG6I6F6RHP7IIM","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Deep Variational Reinforcement Learning for POMDPs","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Frank Wood, Luisa Zintgraf, Maximilian Igl, Shimon Whiteson, Tuan Anh Le","submitted_at":"2018-06-06T21:09:39Z","abstract_excerpt":"Many real-world sequential decision making problems are partially observable by nature, and the environment model is typically unknown. Consequently, there is great need for reinforcement learning methods that can tackle such problems given only a stream of incomplete and noisy observations. In this paper, we propose deep variational reinforcement learning (DVRL), which introduces an inductive bias that allows an agent to learn a generative model of the environment and perform inference in that model to effectively aggregate the available information. We develop an n-step approximation to the "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1806.02426","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:13:56Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"JtcUullhBuWKeklNC5WXzIBrrahPKYSgsRORiaJXXwdLdeksyM+XKlPzsR2QdoRZ6iXvfvFX/JRspGQbKJDkCg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T00:56:40.426167Z"},"content_sha256":"f3655381f1cdd6e92d4913d4de46df29be4b7e0e25f2c5f649a0dbfe8ce4873e","schema_version":"1.0","event_id":"sha256:f3655381f1cdd6e92d4913d4de46df29be4b7e0e25f2c5f649a0dbfe8ce4873e"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/VEUYHKKF5CW4OG6I6F6RHP7IIM/bundle.json","state_url":"https://pith.science/pith/VEUYHKKF5CW4OG6I6F6RHP7IIM/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/VEUYHKKF5CW4OG6I6F6RHP7IIM/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-28T00:56:40Z","links":{"resolver":"https://pith.science/pith/VEUYHKKF5CW4OG6I6F6RHP7IIM","bundle":"https://pith.science/pith/VEUYHKKF5CW4OG6I6F6RHP7IIM/bundle.json","state":"https://pith.science/pith/VEUYHKKF5CW4OG6I6F6RHP7IIM/state.json","well_known_bundle":"https://pith.science/.well-known/pith/VEUYHKKF5CW4OG6I6F6RHP7IIM/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:VEUYHKKF5CW4OG6I6F6RHP7IIM","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"325e0ee5a8e3e6548cfaec032bc28aaead251dd38f87adc29dcdadcbc0b0c600","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-06-06T21:09:39Z","title_canon_sha256":"90e35c23333dfcc822e324481a2166ea4ac2e446a3724d7467372e8a7e2610fe"},"schema_version":"1.0","source":{"id":"1806.02426","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1806.02426","created_at":"2026-05-18T00:13:56Z"},{"alias_kind":"arxiv_version","alias_value":"1806.02426v1","created_at":"2026-05-18T00:13:56Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1806.02426","created_at":"2026-05-18T00:13:56Z"},{"alias_kind":"pith_short_12","alias_value":"VEUYHKKF5CW4","created_at":"2026-05-18T12:32:59Z"},{"alias_kind":"pith_short_16","alias_value":"VEUYHKKF5CW4OG6I","created_at":"2026-05-18T12:32:59Z"},{"alias_kind":"pith_short_8","alias_value":"VEUYHKKF","created_at":"2026-05-18T12:32:59Z"}],"graph_snapshots":[{"event_id":"sha256:f3655381f1cdd6e92d4913d4de46df29be4b7e0e25f2c5f649a0dbfe8ce4873e","target":"graph","created_at":"2026-05-18T00:13:56Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Many real-world sequential decision making problems are partially observable by nature, and the environment model is typically unknown. Consequently, there is great need for reinforcement learning methods that can tackle such problems given only a stream of incomplete and noisy observations. In this paper, we propose deep variational reinforcement learning (DVRL), which introduces an inductive bias that allows an agent to learn a generative model of the environment and perform inference in that model to effectively aggregate the available information. We develop an n-step approximation to the ","authors_text":"Frank Wood, Luisa Zintgraf, Maximilian Igl, Shimon Whiteson, Tuan Anh Le","cross_cats":["stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-06-06T21:09:39Z","title":"Deep Variational Reinforcement Learning for POMDPs"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1806.02426","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:340c79fbf5df538c421a0e1a6357b4ce4e54efa2eef4ef47c944b24df87e4a39","target":"record","created_at":"2026-05-18T00:13:56Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"325e0ee5a8e3e6548cfaec032bc28aaead251dd38f87adc29dcdadcbc0b0c600","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-06-06T21:09:39Z","title_canon_sha256":"90e35c23333dfcc822e324481a2166ea4ac2e446a3724d7467372e8a7e2610fe"},"schema_version":"1.0","source":{"id":"1806.02426","kind":"arxiv","version":1}},"canonical_sha256":"a92983a945e8adc71bc8f17d13bfe843206c9122cf60f6994cea242e255f566d","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"a92983a945e8adc71bc8f17d13bfe843206c9122cf60f6994cea242e255f566d","first_computed_at":"2026-05-18T00:13:56.862840Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:13:56.862840Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"dP806tJEaOa/5KBYy85r0CqskbeP3OcGC7S2q4iJU7LSDcnIQz6JMceUF7G+mA3kKeZh54Bbn6KfqLPyRm0LCg==","signature_status":"signed_v1","signed_at":"2026-05-18T00:13:56.863563Z","signed_message":"canonical_sha256_bytes"},"source_id":"1806.02426","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:340c79fbf5df538c421a0e1a6357b4ce4e54efa2eef4ef47c944b24df87e4a39","sha256:f3655381f1cdd6e92d4913d4de46df29be4b7e0e25f2c5f649a0dbfe8ce4873e"],"state_sha256":"f439ca01f9a4ed016d007f44b37c9531983e85b3b47871d83eb0002cfac9016a"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"zzljfg/o+rK1LE3Etui8lnXhuH+m78g4Vo47lj7MbZ6OyYe0rvJksCeEnOsn5X0sX6RErOgEc2geiF4Zk5XhBg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-28T00:56:40.428189Z","bundle_sha256":"74b9860ed03d51d3ea69aacdd50d46ac6aef91150896b4f9cd9c0412c6983306"}}