{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2012:QJTDW4SBPABVS4LLFNOERWN3XK","short_pith_number":"pith:QJTDW4SB","canonical_record":{"source":{"id":"1207.1421","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2012-07-04T16:28:10Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"d3523110af4f8af7d1ac49348956a0e427eef266753481a7434b467dfe64aab2","abstract_canon_sha256":"ef40ee4fe855f50323c6804dcc9c2e8fb385cbd021125f9fee62da65b8c43680"},"schema_version":"1.0"},"canonical_sha256":"82663b7241780359716b2b5c48d9bbbabadf4b37ff40e94cd3341f4b00fb387b","source":{"kind":"arxiv","id":"1207.1421","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1207.1421","created_at":"2026-05-18T03:51:37Z"},{"alias_kind":"arxiv_version","alias_value":"1207.1421v1","created_at":"2026-05-18T03:51:37Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1207.1421","created_at":"2026-05-18T03:51:37Z"},{"alias_kind":"pith_short_12","alias_value":"QJTDW4SBPABV","created_at":"2026-05-18T12:27:18Z"},{"alias_kind":"pith_short_16","alias_value":"QJTDW4SBPABVS4LL","created_at":"2026-05-18T12:27:18Z"},{"alias_kind":"pith_short_8","alias_value":"QJTDW4SB","created_at":"2026-05-18T12:27:18Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2012:QJTDW4SBPABVS4LLFNOERWN3XK","target":"record","payload":{"canonical_record":{"source":{"id":"1207.1421","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2012-07-04T16:28:10Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"d3523110af4f8af7d1ac49348956a0e427eef266753481a7434b467dfe64aab2","abstract_canon_sha256":"ef40ee4fe855f50323c6804dcc9c2e8fb385cbd021125f9fee62da65b8c43680"},"schema_version":"1.0"},"canonical_sha256":"82663b7241780359716b2b5c48d9bbbabadf4b37ff40e94cd3341f4b00fb387b","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T03:51:37.141134Z","signature_b64":"e+Gwz93NRQeOkJu2w4o7M6Q2ZvaywvYRfowAcdvz2u0rvzFf4wa9SsFBve0zrNVuMyYcdVwH9HZ5ikCDaUIvDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"82663b7241780359716b2b5c48d9bbbabadf4b37ff40e94cd3341f4b00fb387b","last_reissued_at":"2026-05-18T03:51:37.140664Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T03:51:37.140664Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1207.1421","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T03:51:37Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"QyPGL8ExOjM6vtLEl51E4U6c8nleCV4ujXcUqPfXI27MDj+tLDLbQXtHEjtx/MRgzcQS8i0vsHZRKEXG9QX7Aw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-30T05:05:54.699015Z"},"content_sha256":"4f8534dde19de1e958ae1c20be45edcc1c53697c01a2faf42d990f12cdb78984","schema_version":"1.0","event_id":"sha256:4f8534dde19de1e958ae1c20be45edcc1c53697c01a2faf42d990f12cdb78984"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2012:QJTDW4SBPABVS4LLFNOERWN3XK","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"A Function Approximation Approach to Estimation of Policy Gradient for POMDP with Structured Policies","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Huizhen Yu","submitted_at":"2012-07-04T16:28:10Z","abstract_excerpt":"We consider the estimation of the policy gradient in partially observable Markov decision processes (POMDP) with a special class of structured policies that are finite-state controllers. We show that the gradient estimation can be done in the Actor-Critic framework, by making the critic compute a \"value\" function that does not depend on the states of POMDP. This function is the conditional mean of the true value function that depends on the states. We show that the critic can be implemented using temporal difference (TD) methods with linear function approximations, and the analytical results o"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1207.1421","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T03:51:37Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Vu0/NUKmX6d9YS6XstMEAYTGUibf97vohkHbaKFvPADVd7QpBK3gnTyH+lQth875sq6HpbMJpojwsNkzV1eIAw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-30T05:05:54.699383Z"},"content_sha256":"2e3070cd5079496abf90fcce389535920d3aa2502f3e6b17f5bd73cd17217e90","schema_version":"1.0","event_id":"sha256:2e3070cd5079496abf90fcce389535920d3aa2502f3e6b17f5bd73cd17217e90"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/QJTDW4SBPABVS4LLFNOERWN3XK/bundle.json","state_url":"https://pith.science/pith/QJTDW4SBPABVS4LLFNOERWN3XK/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/QJTDW4SBPABVS4LLFNOERWN3XK/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-30T05:05:54Z","links":{"resolver":"https://pith.science/pith/QJTDW4SBPABVS4LLFNOERWN3XK","bundle":"https://pith.science/pith/QJTDW4SBPABVS4LLFNOERWN3XK/bundle.json","state":"https://pith.science/pith/QJTDW4SBPABVS4LLFNOERWN3XK/state.json","well_known_bundle":"https://pith.science/.well-known/pith/QJTDW4SBPABVS4LLFNOERWN3XK/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2012:QJTDW4SBPABVS4LLFNOERWN3XK","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"ef40ee4fe855f50323c6804dcc9c2e8fb385cbd021125f9fee62da65b8c43680","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2012-07-04T16:28:10Z","title_canon_sha256":"d3523110af4f8af7d1ac49348956a0e427eef266753481a7434b467dfe64aab2"},"schema_version":"1.0","source":{"id":"1207.1421","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1207.1421","created_at":"2026-05-18T03:51:37Z"},{"alias_kind":"arxiv_version","alias_value":"1207.1421v1","created_at":"2026-05-18T03:51:37Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1207.1421","created_at":"2026-05-18T03:51:37Z"},{"alias_kind":"pith_short_12","alias_value":"QJTDW4SBPABV","created_at":"2026-05-18T12:27:18Z"},{"alias_kind":"pith_short_16","alias_value":"QJTDW4SBPABVS4LL","created_at":"2026-05-18T12:27:18Z"},{"alias_kind":"pith_short_8","alias_value":"QJTDW4SB","created_at":"2026-05-18T12:27:18Z"}],"graph_snapshots":[{"event_id":"sha256:2e3070cd5079496abf90fcce389535920d3aa2502f3e6b17f5bd73cd17217e90","target":"graph","created_at":"2026-05-18T03:51:37Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We consider the estimation of the policy gradient in partially observable Markov decision processes (POMDP) with a special class of structured policies that are finite-state controllers. We show that the gradient estimation can be done in the Actor-Critic framework, by making the critic compute a \"value\" function that does not depend on the states of POMDP. This function is the conditional mean of the true value function that depends on the states. We show that the critic can be implemented using temporal difference (TD) methods with linear function approximations, and the analytical results o","authors_text":"Huizhen Yu","cross_cats":["stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2012-07-04T16:28:10Z","title":"A Function Approximation Approach to Estimation of Policy Gradient for POMDP with Structured Policies"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1207.1421","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:4f8534dde19de1e958ae1c20be45edcc1c53697c01a2faf42d990f12cdb78984","target":"record","created_at":"2026-05-18T03:51:37Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"ef40ee4fe855f50323c6804dcc9c2e8fb385cbd021125f9fee62da65b8c43680","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2012-07-04T16:28:10Z","title_canon_sha256":"d3523110af4f8af7d1ac49348956a0e427eef266753481a7434b467dfe64aab2"},"schema_version":"1.0","source":{"id":"1207.1421","kind":"arxiv","version":1}},"canonical_sha256":"82663b7241780359716b2b5c48d9bbbabadf4b37ff40e94cd3341f4b00fb387b","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"82663b7241780359716b2b5c48d9bbbabadf4b37ff40e94cd3341f4b00fb387b","first_computed_at":"2026-05-18T03:51:37.140664Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T03:51:37.140664Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"e+Gwz93NRQeOkJu2w4o7M6Q2ZvaywvYRfowAcdvz2u0rvzFf4wa9SsFBve0zrNVuMyYcdVwH9HZ5ikCDaUIvDg==","signature_status":"signed_v1","signed_at":"2026-05-18T03:51:37.141134Z","signed_message":"canonical_sha256_bytes"},"source_id":"1207.1421","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:4f8534dde19de1e958ae1c20be45edcc1c53697c01a2faf42d990f12cdb78984","sha256:2e3070cd5079496abf90fcce389535920d3aa2502f3e6b17f5bd73cd17217e90"],"state_sha256":"c0a24c4c1a4b170ff83b5928588cd4db07d907de4e70776aad0ad34762854baa"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"oTlJvfY3CDRrrrH8QAO754bjrR+LqGqcPsKf5elhOiO2gde3UJyfsG8aY9o16i8nm6lOewqkmPfLRrvwqt77Bw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-30T05:05:54.701823Z","bundle_sha256":"a73c21ef984fee6d0455b5ced8b01dea6bd458f2f1c15b1992c6d9f1e5ccd0b6"}}