{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:KP43KVI64BE5FOUAGQHXGTK6XT","short_pith_number":"pith:KP43KVI6","canonical_record":{"source":{"id":"1812.04181","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2018-12-11T01:56:39Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"ac2dcc0e5180a3e1f642fae0ce4944242af2bb61c84db133086a60ceced28118","abstract_canon_sha256":"8ff89826fc56df8e31210f4cb717fbb0a516c154f6866dbc618da0970b6570fb"},"schema_version":"1.0"},"canonical_sha256":"53f9b5551ee049d2ba80340f734d5ebcdf015a10b48a50d1f5945a32f435cc6b","source":{"kind":"arxiv","id":"1812.04181","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1812.04181","created_at":"2026-05-17T23:58:35Z"},{"alias_kind":"arxiv_version","alias_value":"1812.04181v1","created_at":"2026-05-17T23:58:35Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1812.04181","created_at":"2026-05-17T23:58:35Z"},{"alias_kind":"pith_short_12","alias_value":"KP43KVI64BE5","created_at":"2026-05-18T12:32:33Z"},{"alias_kind":"pith_short_16","alias_value":"KP43KVI64BE5FOUA","created_at":"2026-05-18T12:32:33Z"},{"alias_kind":"pith_short_8","alias_value":"KP43KVI6","created_at":"2026-05-18T12:32:33Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:KP43KVI64BE5FOUAGQHXGTK6XT","target":"record","payload":{"canonical_record":{"source":{"id":"1812.04181","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2018-12-11T01:56:39Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"ac2dcc0e5180a3e1f642fae0ce4944242af2bb61c84db133086a60ceced28118","abstract_canon_sha256":"8ff89826fc56df8e31210f4cb717fbb0a516c154f6866dbc618da0970b6570fb"},"schema_version":"1.0"},"canonical_sha256":"53f9b5551ee049d2ba80340f734d5ebcdf015a10b48a50d1f5945a32f435cc6b","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:58:35.312279Z","signature_b64":"+sg/2LzkMpWLMG8r4l71nvgJv4kq97hnpNcWvAdM4dHpGuRF+XTNKWW2tdF5EXIRiZOV7OYfJ7RR2D/DMsMsBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"53f9b5551ee049d2ba80340f734d5ebcdf015a10b48a50d1f5945a32f435cc6b","last_reissued_at":"2026-05-17T23:58:35.311566Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:58:35.311566Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1812.04181","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:58:35Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ZJW9BeaelxPsXGhPWhyjwLNTc98fz3bE4A6s5JyhYyxRQq818pF5v9G3vj1blR5+eOb++8k6kZJ9MzxFa3wADA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-26T20:02:43.681248Z"},"content_sha256":"a4bd667ab958906f9fac007266e6e4ccef67b14ca40d6c7bca3ac6b66ced521c","schema_version":"1.0","event_id":"sha256:a4bd667ab958906f9fac007266e6e4ccef67b14ca40d6c7bca3ac6b66ced521c"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:KP43KVI64BE5FOUAGQHXGTK6XT","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"KF-LAX: Kronecker-factored curvature estimation for control variate optimization in reinforcement learning","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Mohammad Firouzi","submitted_at":"2018-12-11T01:56:39Z","abstract_excerpt":"A key challenge for gradient based optimization methods in model-free reinforcement learning is to develop an approach that is sample efficient and has low variance. In this work, we apply Kronecker-factored curvature estimation technique (KFAC) to a recently proposed gradient estimator for control variate optimization, RELAX, to increase the sample efficiency of using this gradient estimation method in reinforcement learning. The performance of the proposed method is demonstrated on a synthetic problem and a set of three discrete control task Atari games."},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1812.04181","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:58:35Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"RxNt/KiJ5GC5MrxhA+6iL+HDgTHIBMG9iACVcbNXDTav/MNXSv+kZ6HvETEFmPvwK0O7HmkkhRnVPG4KIjNWDw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-26T20:02:43.681603Z"},"content_sha256":"95e57622187438b35f7ef09847c894f81ee7376f660197174b3f20757ff787a3","schema_version":"1.0","event_id":"sha256:95e57622187438b35f7ef09847c894f81ee7376f660197174b3f20757ff787a3"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/KP43KVI64BE5FOUAGQHXGTK6XT/bundle.json","state_url":"https://pith.science/pith/KP43KVI64BE5FOUAGQHXGTK6XT/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/KP43KVI64BE5FOUAGQHXGTK6XT/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-26T20:02:43Z","links":{"resolver":"https://pith.science/pith/KP43KVI64BE5FOUAGQHXGTK6XT","bundle":"https://pith.science/pith/KP43KVI64BE5FOUAGQHXGTK6XT/bundle.json","state":"https://pith.science/pith/KP43KVI64BE5FOUAGQHXGTK6XT/state.json","well_known_bundle":"https://pith.science/.well-known/pith/KP43KVI64BE5FOUAGQHXGTK6XT/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:KP43KVI64BE5FOUAGQHXGTK6XT","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"8ff89826fc56df8e31210f4cb717fbb0a516c154f6866dbc618da0970b6570fb","cross_cats_sorted":["stat.ML"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2018-12-11T01:56:39Z","title_canon_sha256":"ac2dcc0e5180a3e1f642fae0ce4944242af2bb61c84db133086a60ceced28118"},"schema_version":"1.0","source":{"id":"1812.04181","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1812.04181","created_at":"2026-05-17T23:58:35Z"},{"alias_kind":"arxiv_version","alias_value":"1812.04181v1","created_at":"2026-05-17T23:58:35Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1812.04181","created_at":"2026-05-17T23:58:35Z"},{"alias_kind":"pith_short_12","alias_value":"KP43KVI64BE5","created_at":"2026-05-18T12:32:33Z"},{"alias_kind":"pith_short_16","alias_value":"KP43KVI64BE5FOUA","created_at":"2026-05-18T12:32:33Z"},{"alias_kind":"pith_short_8","alias_value":"KP43KVI6","created_at":"2026-05-18T12:32:33Z"}],"graph_snapshots":[{"event_id":"sha256:95e57622187438b35f7ef09847c894f81ee7376f660197174b3f20757ff787a3","target":"graph","created_at":"2026-05-17T23:58:35Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"A key challenge for gradient based optimization methods in model-free reinforcement learning is to develop an approach that is sample efficient and has low variance. In this work, we apply Kronecker-factored curvature estimation technique (KFAC) to a recently proposed gradient estimator for control variate optimization, RELAX, to increase the sample efficiency of using this gradient estimation method in reinforcement learning. The performance of the proposed method is demonstrated on a synthetic problem and a set of three discrete control task Atari games.","authors_text":"Mohammad Firouzi","cross_cats":["stat.ML"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2018-12-11T01:56:39Z","title":"KF-LAX: Kronecker-factored curvature estimation for control variate optimization in reinforcement learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1812.04181","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:a4bd667ab958906f9fac007266e6e4ccef67b14ca40d6c7bca3ac6b66ced521c","target":"record","created_at":"2026-05-17T23:58:35Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"8ff89826fc56df8e31210f4cb717fbb0a516c154f6866dbc618da0970b6570fb","cross_cats_sorted":["stat.ML"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2018-12-11T01:56:39Z","title_canon_sha256":"ac2dcc0e5180a3e1f642fae0ce4944242af2bb61c84db133086a60ceced28118"},"schema_version":"1.0","source":{"id":"1812.04181","kind":"arxiv","version":1}},"canonical_sha256":"53f9b5551ee049d2ba80340f734d5ebcdf015a10b48a50d1f5945a32f435cc6b","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"53f9b5551ee049d2ba80340f734d5ebcdf015a10b48a50d1f5945a32f435cc6b","first_computed_at":"2026-05-17T23:58:35.311566Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:58:35.311566Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"+sg/2LzkMpWLMG8r4l71nvgJv4kq97hnpNcWvAdM4dHpGuRF+XTNKWW2tdF5EXIRiZOV7OYfJ7RR2D/DMsMsBg==","signature_status":"signed_v1","signed_at":"2026-05-17T23:58:35.312279Z","signed_message":"canonical_sha256_bytes"},"source_id":"1812.04181","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:a4bd667ab958906f9fac007266e6e4ccef67b14ca40d6c7bca3ac6b66ced521c","sha256:95e57622187438b35f7ef09847c894f81ee7376f660197174b3f20757ff787a3"],"state_sha256":"03029b49a171f4a4445db732aa149b9a38831a71f38ddaace662a5c1b0945515"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"MsPQKzS24FPhJQQfO5nYks6o9ofEymDwEBD7R5KRex/leURNwUV8JC8PH6EmYAFRdnUOZohGFOicUnp+HvQXCA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-26T20:02:43.683491Z","bundle_sha256":"c5effdef5ea414b58b5680d68c65a04d5f10f6fdc28815eb424b768aadba5a5f"}}