{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2014:AQ6Y2T4EC4IMWHVWVT4ZI6C52Y","short_pith_number":"pith:AQ6Y2T4E","canonical_record":{"source":{"id":"1411.5326","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2014-11-19T19:32:45Z","cross_cats_sorted":["cs.IT","math.IT"],"title_canon_sha256":"8c374aab4b8e9b16e379af11c662fb4865f073c508293589cc2cda4a95da3588","abstract_canon_sha256":"335c63856bacecd1348e042c292a0d2ce99bc52fd4cf8e92ee90fed54b7a80f5"},"schema_version":"1.0"},"canonical_sha256":"043d8d4f841710cb1eb6acf994785dd619c47bff03f115afeaceb30ecb55bebb","source":{"kind":"arxiv","id":"1411.5326","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1411.5326","created_at":"2026-05-18T02:34:39Z"},{"alias_kind":"arxiv_version","alias_value":"1411.5326v1","created_at":"2026-05-18T02:34:39Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1411.5326","created_at":"2026-05-18T02:34:39Z"},{"alias_kind":"pith_short_12","alias_value":"AQ6Y2T4EC4IM","created_at":"2026-05-18T12:28:19Z"},{"alias_kind":"pith_short_16","alias_value":"AQ6Y2T4EC4IMWHVW","created_at":"2026-05-18T12:28:19Z"},{"alias_kind":"pith_short_8","alias_value":"AQ6Y2T4E","created_at":"2026-05-18T12:28:19Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2014:AQ6Y2T4EC4IMWHVWVT4ZI6C52Y","target":"record","payload":{"canonical_record":{"source":{"id":"1411.5326","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2014-11-19T19:32:45Z","cross_cats_sorted":["cs.IT","math.IT"],"title_canon_sha256":"8c374aab4b8e9b16e379af11c662fb4865f073c508293589cc2cda4a95da3588","abstract_canon_sha256":"335c63856bacecd1348e042c292a0d2ce99bc52fd4cf8e92ee90fed54b7a80f5"},"schema_version":"1.0"},"canonical_sha256":"043d8d4f841710cb1eb6acf994785dd619c47bff03f115afeaceb30ecb55bebb","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T02:34:39.230514Z","signature_b64":"sZ0J/k0KbxpB63rbZ5OiORawTygBoxB/2OtMvXOZ49k+PeIOKb/lQy+mpSgk1u25bjvAgSP+iF8r4UNMbEpDCQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"043d8d4f841710cb1eb6acf994785dd619c47bff03f115afeaceb30ecb55bebb","last_reissued_at":"2026-05-18T02:34:39.229922Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T02:34:39.229922Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1411.5326","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T02:34:39Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"l6rfo1kBrsGYkUrxExB9H+5bDLJC7WTLfhkU/2+t1RuPDkeQjWfJGhj4GycNNjL17Z7O+mVN0F/WYTBp1ZLWBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T11:30:30.231688Z"},"content_sha256":"19ebcafdb35219c768a39ca913097783fbca1f1f4815b174f4da542b01431c0c","schema_version":"1.0","event_id":"sha256:19ebcafdb35219c768a39ca913097783fbca1f1f4815b174f4da542b01431c0c"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2014:AQ6Y2T4EC4IMWHVWVT4ZI6C52Y","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Compress and Control","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.IT","math.IT"],"primary_cat":"cs.AI","authors_text":"Alvin Chua, Guillaume Desjardins, Joel Veness, Marc G. Bellemare, Marcus Hutter","submitted_at":"2014-11-19T19:32:45Z","abstract_excerpt":"This paper describes a new information-theoretic policy evaluation technique for reinforcement learning. This technique converts any compression or density model into a corresponding estimate of value. Under appropriate stationarity and ergodicity conditions, we show that the use of a sufficiently powerful model gives rise to a consistent value function estimator. We also study the behavior of this technique when applied to various Atari 2600 video games, where the use of suboptimal modeling techniques is unavoidable. We consider three fundamentally different models, all too limited to perfect"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1411.5326","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T02:34:39Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"CF/FtjZHS0a3rppB0uij+QuaFJWtPkJHXzMY39suecCtgsYKbbUArVBrMJxhvvY+iZd93ymMoXFPy0h4DAkNDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T11:30:30.232043Z"},"content_sha256":"6024ed033ab87bfa5695f8385b61b4ced1be6664fc8206d22390240d4b53937a","schema_version":"1.0","event_id":"sha256:6024ed033ab87bfa5695f8385b61b4ced1be6664fc8206d22390240d4b53937a"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/AQ6Y2T4EC4IMWHVWVT4ZI6C52Y/bundle.json","state_url":"https://pith.science/pith/AQ6Y2T4EC4IMWHVWVT4ZI6C52Y/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/AQ6Y2T4EC4IMWHVWVT4ZI6C52Y/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-28T11:30:30Z","links":{"resolver":"https://pith.science/pith/AQ6Y2T4EC4IMWHVWVT4ZI6C52Y","bundle":"https://pith.science/pith/AQ6Y2T4EC4IMWHVWVT4ZI6C52Y/bundle.json","state":"https://pith.science/pith/AQ6Y2T4EC4IMWHVWVT4ZI6C52Y/state.json","well_known_bundle":"https://pith.science/.well-known/pith/AQ6Y2T4EC4IMWHVWVT4ZI6C52Y/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2014:AQ6Y2T4EC4IMWHVWVT4ZI6C52Y","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"335c63856bacecd1348e042c292a0d2ce99bc52fd4cf8e92ee90fed54b7a80f5","cross_cats_sorted":["cs.IT","math.IT"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2014-11-19T19:32:45Z","title_canon_sha256":"8c374aab4b8e9b16e379af11c662fb4865f073c508293589cc2cda4a95da3588"},"schema_version":"1.0","source":{"id":"1411.5326","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1411.5326","created_at":"2026-05-18T02:34:39Z"},{"alias_kind":"arxiv_version","alias_value":"1411.5326v1","created_at":"2026-05-18T02:34:39Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1411.5326","created_at":"2026-05-18T02:34:39Z"},{"alias_kind":"pith_short_12","alias_value":"AQ6Y2T4EC4IM","created_at":"2026-05-18T12:28:19Z"},{"alias_kind":"pith_short_16","alias_value":"AQ6Y2T4EC4IMWHVW","created_at":"2026-05-18T12:28:19Z"},{"alias_kind":"pith_short_8","alias_value":"AQ6Y2T4E","created_at":"2026-05-18T12:28:19Z"}],"graph_snapshots":[{"event_id":"sha256:6024ed033ab87bfa5695f8385b61b4ced1be6664fc8206d22390240d4b53937a","target":"graph","created_at":"2026-05-18T02:34:39Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"This paper describes a new information-theoretic policy evaluation technique for reinforcement learning. This technique converts any compression or density model into a corresponding estimate of value. Under appropriate stationarity and ergodicity conditions, we show that the use of a sufficiently powerful model gives rise to a consistent value function estimator. We also study the behavior of this technique when applied to various Atari 2600 video games, where the use of suboptimal modeling techniques is unavoidable. We consider three fundamentally different models, all too limited to perfect","authors_text":"Alvin Chua, Guillaume Desjardins, Joel Veness, Marc G. Bellemare, Marcus Hutter","cross_cats":["cs.IT","math.IT"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2014-11-19T19:32:45Z","title":"Compress and Control"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1411.5326","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:19ebcafdb35219c768a39ca913097783fbca1f1f4815b174f4da542b01431c0c","target":"record","created_at":"2026-05-18T02:34:39Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"335c63856bacecd1348e042c292a0d2ce99bc52fd4cf8e92ee90fed54b7a80f5","cross_cats_sorted":["cs.IT","math.IT"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2014-11-19T19:32:45Z","title_canon_sha256":"8c374aab4b8e9b16e379af11c662fb4865f073c508293589cc2cda4a95da3588"},"schema_version":"1.0","source":{"id":"1411.5326","kind":"arxiv","version":1}},"canonical_sha256":"043d8d4f841710cb1eb6acf994785dd619c47bff03f115afeaceb30ecb55bebb","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"043d8d4f841710cb1eb6acf994785dd619c47bff03f115afeaceb30ecb55bebb","first_computed_at":"2026-05-18T02:34:39.229922Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T02:34:39.229922Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"sZ0J/k0KbxpB63rbZ5OiORawTygBoxB/2OtMvXOZ49k+PeIOKb/lQy+mpSgk1u25bjvAgSP+iF8r4UNMbEpDCQ==","signature_status":"signed_v1","signed_at":"2026-05-18T02:34:39.230514Z","signed_message":"canonical_sha256_bytes"},"source_id":"1411.5326","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:19ebcafdb35219c768a39ca913097783fbca1f1f4815b174f4da542b01431c0c","sha256:6024ed033ab87bfa5695f8385b61b4ced1be6664fc8206d22390240d4b53937a"],"state_sha256":"967dbe8e66b939729e135c9e1e1dce5fe6fec1438fcfcf641cdb88b41f08cc75"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"vkk3XlcjQF1fONC6xGPxIZb0UfTmySugQOHbb2TjNZSpMA4FTNJDz2oaa1aH2v5XDnp3kJg9YnmfIuzH4IEwCg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-28T11:30:30.234014Z","bundle_sha256":"d256b7e0b7a95f11260eb1b2a8a735205d26cbddab7538a2ebd0eb251fc3b6f4"}}