{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:SOEEMBE22WRGYTAWLJJGOFRIDX","short_pith_number":"pith:SOEEMBE2","canonical_record":{"source":{"id":"1811.02597","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-11-06T19:09:04Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"d002b72aa900476cc897c6e7fc1c55a8d18670034cba681dc46a31a93a500208","abstract_canon_sha256":"a1d9e943fc0050f41a57b62becedad8d6b25c835e9432efad048e0977e3af76a"},"schema_version":"1.0"},"canonical_sha256":"938846049ad5a26c4c165a526716281dc981155b48ed0c166b3179f90a06d194","source":{"kind":"arxiv","id":"1811.02597","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1811.02597","created_at":"2026-05-18T00:01:21Z"},{"alias_kind":"arxiv_version","alias_value":"1811.02597v1","created_at":"2026-05-18T00:01:21Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1811.02597","created_at":"2026-05-18T00:01:21Z"},{"alias_kind":"pith_short_12","alias_value":"SOEEMBE22WRG","created_at":"2026-05-18T12:32:53Z"},{"alias_kind":"pith_short_16","alias_value":"SOEEMBE22WRGYTAW","created_at":"2026-05-18T12:32:53Z"},{"alias_kind":"pith_short_8","alias_value":"SOEEMBE2","created_at":"2026-05-18T12:32:53Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:SOEEMBE22WRGYTAWLJJGOFRIDX","target":"record","payload":{"canonical_record":{"source":{"id":"1811.02597","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-11-06T19:09:04Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"d002b72aa900476cc897c6e7fc1c55a8d18670034cba681dc46a31a93a500208","abstract_canon_sha256":"a1d9e943fc0050f41a57b62becedad8d6b25c835e9432efad048e0977e3af76a"},"schema_version":"1.0"},"canonical_sha256":"938846049ad5a26c4c165a526716281dc981155b48ed0c166b3179f90a06d194","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:01:21.295190Z","signature_b64":"751TyZduSWF3xvLkssAiQ/yRkCMGAVChW2N2GXei/6GmHdSJCJn8HS4R4JHKJJnsHvuvt/SjGhJ9gJtCyhISCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"938846049ad5a26c4c165a526716281dc981155b48ed0c166b3179f90a06d194","last_reissued_at":"2026-05-18T00:01:21.294624Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:01:21.294624Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1811.02597","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:01:21Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"0ZWHL23xa0zGoH8uBi/xvE1r1DNrntlD0xxcvWpxpeZbfGsVG8SwN8cIXH6sanV7fsGXsmvaVZfm/xaRdpggCA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T18:56:14.401192Z"},"content_sha256":"1875487adaa9d86b5cc8da84a4b2145205dc12bbb6d66422c836f2cf0d042eeb","schema_version":"1.0","event_id":"sha256:1875487adaa9d86b5cc8da84a4b2145205dc12bbb6d66422c836f2cf0d042eeb"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:SOEEMBE22WRGYTAWLJJGOFRIDX","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Online Off-policy Prediction","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Adam White, Andrew Patterson, Martha White, Richard S. Sutton, Sina Ghiassian","submitted_at":"2018-11-06T19:09:04Z","abstract_excerpt":"This paper investigates the problem of online prediction learning, where learning proceeds continuously as the agent interacts with an environment. The predictions made by the agent are contingent on a particular way of behaving, represented as a value function. However, the behavior used to select actions and generate the behavior data might be different from the one used to define the predictions, and thus the samples are generated off-policy. The ability to learn behavior-contingent predictions online and off-policy has long been advocated as a key capability of predictive-knowledge learnin"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1811.02597","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:01:21Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"U2Q6UhAz6jI2r0UdPWAVVcM2itcGgceJksUekZ+3fZfkTnfmKfug3qPbDmEoouoJmzjX0ugFu4UHWiQlwe6vBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T18:56:14.401839Z"},"content_sha256":"6243013290b012e8d0c366a56f20cced1bbab231dabf29bb51eac65c17293c08","schema_version":"1.0","event_id":"sha256:6243013290b012e8d0c366a56f20cced1bbab231dabf29bb51eac65c17293c08"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/SOEEMBE22WRGYTAWLJJGOFRIDX/bundle.json","state_url":"https://pith.science/pith/SOEEMBE22WRGYTAWLJJGOFRIDX/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/SOEEMBE22WRGYTAWLJJGOFRIDX/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-25T18:56:14Z","links":{"resolver":"https://pith.science/pith/SOEEMBE22WRGYTAWLJJGOFRIDX","bundle":"https://pith.science/pith/SOEEMBE22WRGYTAWLJJGOFRIDX/bundle.json","state":"https://pith.science/pith/SOEEMBE22WRGYTAWLJJGOFRIDX/state.json","well_known_bundle":"https://pith.science/.well-known/pith/SOEEMBE22WRGYTAWLJJGOFRIDX/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:SOEEMBE22WRGYTAWLJJGOFRIDX","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"a1d9e943fc0050f41a57b62becedad8d6b25c835e9432efad048e0977e3af76a","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-11-06T19:09:04Z","title_canon_sha256":"d002b72aa900476cc897c6e7fc1c55a8d18670034cba681dc46a31a93a500208"},"schema_version":"1.0","source":{"id":"1811.02597","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1811.02597","created_at":"2026-05-18T00:01:21Z"},{"alias_kind":"arxiv_version","alias_value":"1811.02597v1","created_at":"2026-05-18T00:01:21Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1811.02597","created_at":"2026-05-18T00:01:21Z"},{"alias_kind":"pith_short_12","alias_value":"SOEEMBE22WRG","created_at":"2026-05-18T12:32:53Z"},{"alias_kind":"pith_short_16","alias_value":"SOEEMBE22WRGYTAW","created_at":"2026-05-18T12:32:53Z"},{"alias_kind":"pith_short_8","alias_value":"SOEEMBE2","created_at":"2026-05-18T12:32:53Z"}],"graph_snapshots":[{"event_id":"sha256:6243013290b012e8d0c366a56f20cced1bbab231dabf29bb51eac65c17293c08","target":"graph","created_at":"2026-05-18T00:01:21Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"This paper investigates the problem of online prediction learning, where learning proceeds continuously as the agent interacts with an environment. The predictions made by the agent are contingent on a particular way of behaving, represented as a value function. However, the behavior used to select actions and generate the behavior data might be different from the one used to define the predictions, and thus the samples are generated off-policy. The ability to learn behavior-contingent predictions online and off-policy has long been advocated as a key capability of predictive-knowledge learnin","authors_text":"Adam White, Andrew Patterson, Martha White, Richard S. Sutton, Sina Ghiassian","cross_cats":["cs.AI","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-11-06T19:09:04Z","title":"Online Off-policy Prediction"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1811.02597","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:1875487adaa9d86b5cc8da84a4b2145205dc12bbb6d66422c836f2cf0d042eeb","target":"record","created_at":"2026-05-18T00:01:21Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"a1d9e943fc0050f41a57b62becedad8d6b25c835e9432efad048e0977e3af76a","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-11-06T19:09:04Z","title_canon_sha256":"d002b72aa900476cc897c6e7fc1c55a8d18670034cba681dc46a31a93a500208"},"schema_version":"1.0","source":{"id":"1811.02597","kind":"arxiv","version":1}},"canonical_sha256":"938846049ad5a26c4c165a526716281dc981155b48ed0c166b3179f90a06d194","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"938846049ad5a26c4c165a526716281dc981155b48ed0c166b3179f90a06d194","first_computed_at":"2026-05-18T00:01:21.294624Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:01:21.294624Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"751TyZduSWF3xvLkssAiQ/yRkCMGAVChW2N2GXei/6GmHdSJCJn8HS4R4JHKJJnsHvuvt/SjGhJ9gJtCyhISCg==","signature_status":"signed_v1","signed_at":"2026-05-18T00:01:21.295190Z","signed_message":"canonical_sha256_bytes"},"source_id":"1811.02597","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:1875487adaa9d86b5cc8da84a4b2145205dc12bbb6d66422c836f2cf0d042eeb","sha256:6243013290b012e8d0c366a56f20cced1bbab231dabf29bb51eac65c17293c08"],"state_sha256":"619368bdee78d97dbf5d9605bacc4e8cfed0cc20cb3588a89553bebc40c1be87"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"M0ToLq2Ea4ugEf4r83fcwJABHo5AZg2zjwJ3PDPC1MeoafPt60f5MFpA+6yeEdMpvWBlmR29/SxrW9lfhDTsCg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-25T18:56:14.405446Z","bundle_sha256":"660a8ecb2a9ee85d72361cc9e4f84a8c20cb37a78e542f955e9d7bf6020ee67f"}}