{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2019:AEGEVRCFLNDAXNMASPGH5VHRLH","short_pith_number":"pith:AEGEVRCF","schema_version":"1.0","canonical_sha256":"010c4ac4455b460bb58093cc7ed4f159f7572804775e4759c79292606e07c35f","source":{"kind":"arxiv","id":"1901.07860","version":1},"attestation_state":"computed","paper":{"title":"Trust Region Value Optimization using Kalman Filtering","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Shie Mannor, Shirli Di-Castro Shashua","submitted_at":"2019-01-23T13:09:31Z","abstract_excerpt":"Policy evaluation is a key process in reinforcement learning. It assesses a given policy using estimation of the corresponding value function. When using a parameterized function to approximate the value, it is common to optimize the set of parameters by minimizing the sum of squared Bellman Temporal Differences errors. However, this approach ignores certain distributional properties of both the errors and value parameters. Taking these distributions into account in the optimization process can provide useful information on the amount of confidence in value estimation. In this work we propose "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1901.07860","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-01-23T13:09:31Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"bf8a54921e0a0fec5bdd9ded700cf80ca38612c57111c905deeea3b377219b63","abstract_canon_sha256":"1261c06cc5ee551424e44c16fdf3e71b904902d632fd8b5ece469b139ec75db1"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:55:39.589985Z","signature_b64":"qiaBMfhEu6JhyUbyAsN10CxuodhwEJFhYkq5XYiKnNoeXxJdRB8g/QwZegmXTU/JUS6T/mdiu26a0N+JF0iLAA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"010c4ac4455b460bb58093cc7ed4f159f7572804775e4759c79292606e07c35f","last_reissued_at":"2026-05-17T23:55:39.589607Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:55:39.589607Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Trust Region Value Optimization using Kalman Filtering","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Shie Mannor, Shirli Di-Castro Shashua","submitted_at":"2019-01-23T13:09:31Z","abstract_excerpt":"Policy evaluation is a key process in reinforcement learning. It assesses a given policy using estimation of the corresponding value function. When using a parameterized function to approximate the value, it is common to optimize the set of parameters by minimizing the sum of squared Bellman Temporal Differences errors. However, this approach ignores certain distributional properties of both the errors and value parameters. Taking these distributions into account in the optimization process can provide useful information on the amount of confidence in value estimation. In this work we propose "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1901.07860","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1901.07860","created_at":"2026-05-17T23:55:39.589674+00:00"},{"alias_kind":"arxiv_version","alias_value":"1901.07860v1","created_at":"2026-05-17T23:55:39.589674+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1901.07860","created_at":"2026-05-17T23:55:39.589674+00:00"},{"alias_kind":"pith_short_12","alias_value":"AEGEVRCFLNDA","created_at":"2026-05-18T12:33:12.712433+00:00"},{"alias_kind":"pith_short_16","alias_value":"AEGEVRCFLNDAXNMA","created_at":"2026-05-18T12:33:12.712433+00:00"},{"alias_kind":"pith_short_8","alias_value":"AEGEVRCF","created_at":"2026-05-18T12:33:12.712433+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":0,"sample":[{"citing_arxiv_id":"2604.23056","citing_title":"K-Score: Kalman Filter as a Principled Alternative to Reward Normalization in Reinforcement Learning","ref_index":4,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/AEGEVRCFLNDAXNMASPGH5VHRLH","json":"https://pith.science/pith/AEGEVRCFLNDAXNMASPGH5VHRLH.json","graph_json":"https://pith.science/api/pith-number/AEGEVRCFLNDAXNMASPGH5VHRLH/graph.json","events_json":"https://pith.science/api/pith-number/AEGEVRCFLNDAXNMASPGH5VHRLH/events.json","paper":"https://pith.science/paper/AEGEVRCF"},"agent_actions":{"view_html":"https://pith.science/pith/AEGEVRCFLNDAXNMASPGH5VHRLH","download_json":"https://pith.science/pith/AEGEVRCFLNDAXNMASPGH5VHRLH.json","view_paper":"https://pith.science/paper/AEGEVRCF","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1901.07860&json=true","fetch_graph":"https://pith.science/api/pith-number/AEGEVRCFLNDAXNMASPGH5VHRLH/graph.json","fetch_events":"https://pith.science/api/pith-number/AEGEVRCFLNDAXNMASPGH5VHRLH/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/AEGEVRCFLNDAXNMASPGH5VHRLH/action/timestamp_anchor","attest_storage":"https://pith.science/pith/AEGEVRCFLNDAXNMASPGH5VHRLH/action/storage_attestation","attest_author":"https://pith.science/pith/AEGEVRCFLNDAXNMASPGH5VHRLH/action/author_attestation","sign_citation":"https://pith.science/pith/AEGEVRCFLNDAXNMASPGH5VHRLH/action/citation_signature","submit_replication":"https://pith.science/pith/AEGEVRCFLNDAXNMASPGH5VHRLH/action/replication_record"}},"created_at":"2026-05-17T23:55:39.589674+00:00","updated_at":"2026-05-17T23:55:39.589674+00:00"}