{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:AEGEVRCFLNDAXNMASPGH5VHRLH","short_pith_number":"pith:AEGEVRCF","canonical_record":{"source":{"id":"1901.07860","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-01-23T13:09:31Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"bf8a54921e0a0fec5bdd9ded700cf80ca38612c57111c905deeea3b377219b63","abstract_canon_sha256":"1261c06cc5ee551424e44c16fdf3e71b904902d632fd8b5ece469b139ec75db1"},"schema_version":"1.0"},"canonical_sha256":"010c4ac4455b460bb58093cc7ed4f159f7572804775e4759c79292606e07c35f","source":{"kind":"arxiv","id":"1901.07860","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1901.07860","created_at":"2026-05-17T23:55:39Z"},{"alias_kind":"arxiv_version","alias_value":"1901.07860v1","created_at":"2026-05-17T23:55:39Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1901.07860","created_at":"2026-05-17T23:55:39Z"},{"alias_kind":"pith_short_12","alias_value":"AEGEVRCFLNDA","created_at":"2026-05-18T12:33:12Z"},{"alias_kind":"pith_short_16","alias_value":"AEGEVRCFLNDAXNMA","created_at":"2026-05-18T12:33:12Z"},{"alias_kind":"pith_short_8","alias_value":"AEGEVRCF","created_at":"2026-05-18T12:33:12Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:AEGEVRCFLNDAXNMASPGH5VHRLH","target":"record","payload":{"canonical_record":{"source":{"id":"1901.07860","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-01-23T13:09:31Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"bf8a54921e0a0fec5bdd9ded700cf80ca38612c57111c905deeea3b377219b63","abstract_canon_sha256":"1261c06cc5ee551424e44c16fdf3e71b904902d632fd8b5ece469b139ec75db1"},"schema_version":"1.0"},"canonical_sha256":"010c4ac4455b460bb58093cc7ed4f159f7572804775e4759c79292606e07c35f","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:55:39.589985Z","signature_b64":"qiaBMfhEu6JhyUbyAsN10CxuodhwEJFhYkq5XYiKnNoeXxJdRB8g/QwZegmXTU/JUS6T/mdiu26a0N+JF0iLAA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"010c4ac4455b460bb58093cc7ed4f159f7572804775e4759c79292606e07c35f","last_reissued_at":"2026-05-17T23:55:39.589607Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:55:39.589607Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1901.07860","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:55:39Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"XlwIT3ZFPYpJEokYxLduUKwVqmZyUkzh+7lS1pB0vlQpAuO/NCoNWdOCad+cfiUhj0hxnxYrkN2Di0qKLcWxBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T03:37:14.598403Z"},"content_sha256":"b9233023f488dd8225b7ab2829c8cd6e33cd6b4aa6a0e27aad83d166c693cd1a","schema_version":"1.0","event_id":"sha256:b9233023f488dd8225b7ab2829c8cd6e33cd6b4aa6a0e27aad83d166c693cd1a"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:AEGEVRCFLNDAXNMASPGH5VHRLH","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Trust Region Value Optimization using Kalman Filtering","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Shie Mannor, Shirli Di-Castro Shashua","submitted_at":"2019-01-23T13:09:31Z","abstract_excerpt":"Policy evaluation is a key process in reinforcement learning. It assesses a given policy using estimation of the corresponding value function. When using a parameterized function to approximate the value, it is common to optimize the set of parameters by minimizing the sum of squared Bellman Temporal Differences errors. However, this approach ignores certain distributional properties of both the errors and value parameters. Taking these distributions into account in the optimization process can provide useful information on the amount of confidence in value estimation. In this work we propose "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1901.07860","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:55:39Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"3uqKXE/E2hPhkaDjrt2m/XsUg7w7tjrWdl3OcSJhMfi4EfYO52QbhuH6ftwckdQfzKXNDnKkNiRth/1bqguQCQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T03:37:14.598754Z"},"content_sha256":"d9efc64cebb5c6fa71270adac435b6c4195bc1ade0e221111f04805766efa583","schema_version":"1.0","event_id":"sha256:d9efc64cebb5c6fa71270adac435b6c4195bc1ade0e221111f04805766efa583"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/AEGEVRCFLNDAXNMASPGH5VHRLH/bundle.json","state_url":"https://pith.science/pith/AEGEVRCFLNDAXNMASPGH5VHRLH/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/AEGEVRCFLNDAXNMASPGH5VHRLH/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-31T03:37:14Z","links":{"resolver":"https://pith.science/pith/AEGEVRCFLNDAXNMASPGH5VHRLH","bundle":"https://pith.science/pith/AEGEVRCFLNDAXNMASPGH5VHRLH/bundle.json","state":"https://pith.science/pith/AEGEVRCFLNDAXNMASPGH5VHRLH/state.json","well_known_bundle":"https://pith.science/.well-known/pith/AEGEVRCFLNDAXNMASPGH5VHRLH/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:AEGEVRCFLNDAXNMASPGH5VHRLH","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"1261c06cc5ee551424e44c16fdf3e71b904902d632fd8b5ece469b139ec75db1","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-01-23T13:09:31Z","title_canon_sha256":"bf8a54921e0a0fec5bdd9ded700cf80ca38612c57111c905deeea3b377219b63"},"schema_version":"1.0","source":{"id":"1901.07860","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1901.07860","created_at":"2026-05-17T23:55:39Z"},{"alias_kind":"arxiv_version","alias_value":"1901.07860v1","created_at":"2026-05-17T23:55:39Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1901.07860","created_at":"2026-05-17T23:55:39Z"},{"alias_kind":"pith_short_12","alias_value":"AEGEVRCFLNDA","created_at":"2026-05-18T12:33:12Z"},{"alias_kind":"pith_short_16","alias_value":"AEGEVRCFLNDAXNMA","created_at":"2026-05-18T12:33:12Z"},{"alias_kind":"pith_short_8","alias_value":"AEGEVRCF","created_at":"2026-05-18T12:33:12Z"}],"graph_snapshots":[{"event_id":"sha256:d9efc64cebb5c6fa71270adac435b6c4195bc1ade0e221111f04805766efa583","target":"graph","created_at":"2026-05-17T23:55:39Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Policy evaluation is a key process in reinforcement learning. It assesses a given policy using estimation of the corresponding value function. When using a parameterized function to approximate the value, it is common to optimize the set of parameters by minimizing the sum of squared Bellman Temporal Differences errors. However, this approach ignores certain distributional properties of both the errors and value parameters. Taking these distributions into account in the optimization process can provide useful information on the amount of confidence in value estimation. In this work we propose ","authors_text":"Shie Mannor, Shirli Di-Castro Shashua","cross_cats":["stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-01-23T13:09:31Z","title":"Trust Region Value Optimization using Kalman Filtering"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1901.07860","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:b9233023f488dd8225b7ab2829c8cd6e33cd6b4aa6a0e27aad83d166c693cd1a","target":"record","created_at":"2026-05-17T23:55:39Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"1261c06cc5ee551424e44c16fdf3e71b904902d632fd8b5ece469b139ec75db1","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-01-23T13:09:31Z","title_canon_sha256":"bf8a54921e0a0fec5bdd9ded700cf80ca38612c57111c905deeea3b377219b63"},"schema_version":"1.0","source":{"id":"1901.07860","kind":"arxiv","version":1}},"canonical_sha256":"010c4ac4455b460bb58093cc7ed4f159f7572804775e4759c79292606e07c35f","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"010c4ac4455b460bb58093cc7ed4f159f7572804775e4759c79292606e07c35f","first_computed_at":"2026-05-17T23:55:39.589607Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:55:39.589607Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"qiaBMfhEu6JhyUbyAsN10CxuodhwEJFhYkq5XYiKnNoeXxJdRB8g/QwZegmXTU/JUS6T/mdiu26a0N+JF0iLAA==","signature_status":"signed_v1","signed_at":"2026-05-17T23:55:39.589985Z","signed_message":"canonical_sha256_bytes"},"source_id":"1901.07860","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:b9233023f488dd8225b7ab2829c8cd6e33cd6b4aa6a0e27aad83d166c693cd1a","sha256:d9efc64cebb5c6fa71270adac435b6c4195bc1ade0e221111f04805766efa583"],"state_sha256":"13f539030d2c014f7fdb5f11a00c3b95ba0d71a125ff691299e20724a959b61f"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"O6pRUBOtkHr8FN0N6W38UufiHxlf78nfwapxx/QDEMgIApvZ3ac77Es2FxhPTVqZsfRX9SE12Sk8D/l5jUlZDg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-31T03:37:14.600832Z","bundle_sha256":"82cd632f13fa8240af622527b763f6be36e2aea653929b2a8370cc8f72a51b06"}}