{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2016:VOSKFK3NATTBLI7J3VSLUEINQX","short_pith_number":"pith:VOSKFK3N","schema_version":"1.0","canonical_sha256":"aba4a2ab6d04e615a3e9dd64ba110d85fe054457aa94932f1b20c7b738859be3","source":{"kind":"arxiv","id":"1609.03076","version":2},"attestation_state":"computed","paper":{"title":"Guided Policy Search with Delayed Sensor Measurements","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.RO","authors_text":"Connor Schenck, Dieter Fox","submitted_at":"2016-09-10T17:44:41Z","abstract_excerpt":"Guided policy search is a method for reinforcement learning that trains a general policy for accomplishing a given task by guiding the learning of the policy with multiple guiding distributions. Guided policy search relies on learning an underlying dynamical model of the environment and then, at each iteration of the algorithm, using that model to gradually improve the policy. This model, though, often makes the assumption that the environment dynamics are markovian, e.g., depend only on the current state and control signal. In this paper we apply guided policy search to a problem with non-mar"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1609.03076","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2016-09-10T17:44:41Z","cross_cats_sorted":[],"title_canon_sha256":"ac0c631bb4a340f36850197fc8d4ec56960f58692f8258933f21a958b33ad6bd","abstract_canon_sha256":"5abe652568c2761d1ea2868b2b6085853c1a91549b3149425cc2998bb9f92138"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:33:59.863812Z","signature_b64":"N4RRIF5NwwebltxzgRLRQr1HuNcZSCLZJM2E5pHMbGp/euHugYL+rOynDjbm0L5WF5KUHq+qchQ6n4lvdZD/Bw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"aba4a2ab6d04e615a3e9dd64ba110d85fe054457aa94932f1b20c7b738859be3","last_reissued_at":"2026-05-18T00:33:59.863209Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:33:59.863209Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Guided Policy Search with Delayed Sensor Measurements","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.RO","authors_text":"Connor Schenck, Dieter Fox","submitted_at":"2016-09-10T17:44:41Z","abstract_excerpt":"Guided policy search is a method for reinforcement learning that trains a general policy for accomplishing a given task by guiding the learning of the policy with multiple guiding distributions. Guided policy search relies on learning an underlying dynamical model of the environment and then, at each iteration of the algorithm, using that model to gradually improve the policy. This model, though, often makes the assumption that the environment dynamics are markovian, e.g., depend only on the current state and control signal. In this paper we apply guided policy search to a problem with non-mar"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1609.03076","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1609.03076","created_at":"2026-05-18T00:33:59.863295+00:00"},{"alias_kind":"arxiv_version","alias_value":"1609.03076v2","created_at":"2026-05-18T00:33:59.863295+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1609.03076","created_at":"2026-05-18T00:33:59.863295+00:00"},{"alias_kind":"pith_short_12","alias_value":"VOSKFK3NATTB","created_at":"2026-05-18T12:30:48.956258+00:00"},{"alias_kind":"pith_short_16","alias_value":"VOSKFK3NATTBLI7J","created_at":"2026-05-18T12:30:48.956258+00:00"},{"alias_kind":"pith_short_8","alias_value":"VOSKFK3N","created_at":"2026-05-18T12:30:48.956258+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/VOSKFK3NATTBLI7J3VSLUEINQX","json":"https://pith.science/pith/VOSKFK3NATTBLI7J3VSLUEINQX.json","graph_json":"https://pith.science/api/pith-number/VOSKFK3NATTBLI7J3VSLUEINQX/graph.json","events_json":"https://pith.science/api/pith-number/VOSKFK3NATTBLI7J3VSLUEINQX/events.json","paper":"https://pith.science/paper/VOSKFK3N"},"agent_actions":{"view_html":"https://pith.science/pith/VOSKFK3NATTBLI7J3VSLUEINQX","download_json":"https://pith.science/pith/VOSKFK3NATTBLI7J3VSLUEINQX.json","view_paper":"https://pith.science/paper/VOSKFK3N","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1609.03076&json=true","fetch_graph":"https://pith.science/api/pith-number/VOSKFK3NATTBLI7J3VSLUEINQX/graph.json","fetch_events":"https://pith.science/api/pith-number/VOSKFK3NATTBLI7J3VSLUEINQX/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/VOSKFK3NATTBLI7J3VSLUEINQX/action/timestamp_anchor","attest_storage":"https://pith.science/pith/VOSKFK3NATTBLI7J3VSLUEINQX/action/storage_attestation","attest_author":"https://pith.science/pith/VOSKFK3NATTBLI7J3VSLUEINQX/action/author_attestation","sign_citation":"https://pith.science/pith/VOSKFK3NATTBLI7J3VSLUEINQX/action/citation_signature","submit_replication":"https://pith.science/pith/VOSKFK3NATTBLI7J3VSLUEINQX/action/replication_record"}},"created_at":"2026-05-18T00:33:59.863295+00:00","updated_at":"2026-05-18T00:33:59.863295+00:00"}