{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2013:2RFXWMUMZ3AFD3Y4DXEP7EQDIQ","short_pith_number":"pith:2RFXWMUM","schema_version":"1.0","canonical_sha256":"d44b7b328ccec051ef1c1dc8ff920344177f215107b45190c452d12f619df9d4","source":{"kind":"arxiv","id":"1301.3878","version":1},"attestation_state":"computed","paper":{"title":"PEGASUS: A Policy Search Method for Large MDPs and POMDPs","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.AI","authors_text":"Andrew Y. Ng, Michael I. Jordan","submitted_at":"2013-01-16T15:51:42Z","abstract_excerpt":"We propose a new approach to the problem of searching a space of policies for a Markov decision process (MDP) or a partially observable Markov decision process (POMDP), given a model.  Our approach is based on the following observation: Any (PO)MDP can be transformed into an \"equivalent\" POMDP in which all state transitions (given the current state and action) are deterministic.  This reduces the general problem of policy search to one in which we need only consider POMDPs with deterministic transitions.  We give a natural way of estimating the value of all policies in these transformed POMDPs"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1301.3878","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2013-01-16T15:51:42Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"9a7c9a9689f9773481e50548d8052fda090adc2e41c93a67ddab8cadb4698b4a","abstract_canon_sha256":"7b89fa6c2dfd0bf6ebba2c8307327f609dae4a2732e11e395fa9c89af64b06c7"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T03:36:15.125804Z","signature_b64":"AqL9+Sk6pMjQEEm47oRmy7lEGXJlXMKic5mgqjkjUXOBmYDdwIcp5dh38UiUsXA+KlkwJ2/d2gBn0Jonido6Dw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"d44b7b328ccec051ef1c1dc8ff920344177f215107b45190c452d12f619df9d4","last_reissued_at":"2026-05-18T03:36:15.125323Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T03:36:15.125323Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"PEGASUS: A Policy Search Method for Large MDPs and POMDPs","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.AI","authors_text":"Andrew Y. Ng, Michael I. Jordan","submitted_at":"2013-01-16T15:51:42Z","abstract_excerpt":"We propose a new approach to the problem of searching a space of policies for a Markov decision process (MDP) or a partially observable Markov decision process (POMDP), given a model.  Our approach is based on the following observation: Any (PO)MDP can be transformed into an \"equivalent\" POMDP in which all state transitions (given the current state and action) are deterministic.  This reduces the general problem of policy search to one in which we need only consider POMDPs with deterministic transitions.  We give a natural way of estimating the value of all policies in these transformed POMDPs"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1301.3878","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1301.3878","created_at":"2026-05-18T03:36:15.125398+00:00"},{"alias_kind":"arxiv_version","alias_value":"1301.3878v1","created_at":"2026-05-18T03:36:15.125398+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1301.3878","created_at":"2026-05-18T03:36:15.125398+00:00"},{"alias_kind":"pith_short_12","alias_value":"2RFXWMUMZ3AF","created_at":"2026-05-18T12:27:32.513160+00:00"},{"alias_kind":"pith_short_16","alias_value":"2RFXWMUMZ3AFD3Y4","created_at":"2026-05-18T12:27:32.513160+00:00"},{"alias_kind":"pith_short_8","alias_value":"2RFXWMUM","created_at":"2026-05-18T12:27:32.513160+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":2,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"2506.23040","citing_title":"Treatment, evidence, imitation, and chat","ref_index":73,"is_internal_anchor":true},{"citing_arxiv_id":"2605.02112","citing_title":"An adaptive variance estimator for relative sparsity","ref_index":127,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/2RFXWMUMZ3AFD3Y4DXEP7EQDIQ","json":"https://pith.science/pith/2RFXWMUMZ3AFD3Y4DXEP7EQDIQ.json","graph_json":"https://pith.science/api/pith-number/2RFXWMUMZ3AFD3Y4DXEP7EQDIQ/graph.json","events_json":"https://pith.science/api/pith-number/2RFXWMUMZ3AFD3Y4DXEP7EQDIQ/events.json","paper":"https://pith.science/paper/2RFXWMUM"},"agent_actions":{"view_html":"https://pith.science/pith/2RFXWMUMZ3AFD3Y4DXEP7EQDIQ","download_json":"https://pith.science/pith/2RFXWMUMZ3AFD3Y4DXEP7EQDIQ.json","view_paper":"https://pith.science/paper/2RFXWMUM","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1301.3878&json=true","fetch_graph":"https://pith.science/api/pith-number/2RFXWMUMZ3AFD3Y4DXEP7EQDIQ/graph.json","fetch_events":"https://pith.science/api/pith-number/2RFXWMUMZ3AFD3Y4DXEP7EQDIQ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/2RFXWMUMZ3AFD3Y4DXEP7EQDIQ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/2RFXWMUMZ3AFD3Y4DXEP7EQDIQ/action/storage_attestation","attest_author":"https://pith.science/pith/2RFXWMUMZ3AFD3Y4DXEP7EQDIQ/action/author_attestation","sign_citation":"https://pith.science/pith/2RFXWMUMZ3AFD3Y4DXEP7EQDIQ/action/citation_signature","submit_replication":"https://pith.science/pith/2RFXWMUMZ3AFD3Y4DXEP7EQDIQ/action/replication_record"}},"created_at":"2026-05-18T03:36:15.125398+00:00","updated_at":"2026-05-18T03:36:15.125398+00:00"}