{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2016:F5KS5ES7USXNTHBYYY7D3UNMCG","short_pith_number":"pith:F5KS5ES7","canonical_record":{"source":{"id":"1603.00622","kind":"arxiv","version":4},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-03-02T08:50:57Z","cross_cats_sorted":[],"title_canon_sha256":"929d1c7d3719837b83f7269663e7be4e659d2de166465ddb0ac5db6e387b2185","abstract_canon_sha256":"c8471b3436c11c4392c1512daf5588007dfc64bdec9798fd585b4747cb01906c"},"schema_version":"1.0"},"canonical_sha256":"2f552e925fa4aed99c38c63e3dd1ac11b0d0c51d30bc53c9143194ebf8d4d2a9","source":{"kind":"arxiv","id":"1603.00622","version":4},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1603.00622","created_at":"2026-05-18T00:50:01Z"},{"alias_kind":"arxiv_version","alias_value":"1603.00622v4","created_at":"2026-05-18T00:50:01Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1603.00622","created_at":"2026-05-18T00:50:01Z"},{"alias_kind":"pith_short_12","alias_value":"F5KS5ES7USXN","created_at":"2026-05-18T12:30:15Z"},{"alias_kind":"pith_short_16","alias_value":"F5KS5ES7USXNTHBY","created_at":"2026-05-18T12:30:15Z"},{"alias_kind":"pith_short_8","alias_value":"F5KS5ES7","created_at":"2026-05-18T12:30:15Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2016:F5KS5ES7USXNTHBYYY7D3UNMCG","target":"record","payload":{"canonical_record":{"source":{"id":"1603.00622","kind":"arxiv","version":4},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-03-02T08:50:57Z","cross_cats_sorted":[],"title_canon_sha256":"929d1c7d3719837b83f7269663e7be4e659d2de166465ddb0ac5db6e387b2185","abstract_canon_sha256":"c8471b3436c11c4392c1512daf5588007dfc64bdec9798fd585b4747cb01906c"},"schema_version":"1.0"},"canonical_sha256":"2f552e925fa4aed99c38c63e3dd1ac11b0d0c51d30bc53c9143194ebf8d4d2a9","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:50:01.553194Z","signature_b64":"Jia9bCaGept/PKoTFduUjYnouXdv4XXcWTgeAw7hSBw2N1IriI+ZM6PDFIK8Ajh2CHLmZypZeJdfNtS3+nXIBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"2f552e925fa4aed99c38c63e3dd1ac11b0d0c51d30bc53c9143194ebf8d4d2a9","last_reissued_at":"2026-05-18T00:50:01.552611Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:50:01.552611Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1603.00622","source_version":4,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:50:01Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"W/ZnW9AsGNE/UklmjOUQJj611O7wvRsadiSJ3kjESZpnQwZdLp2qRxVbkymT+5Uvt4TbwQblwKBprW3JgBiRBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T05:13:14.022085Z"},"content_sha256":"f1d67c264d6666083b87784f791f89e5fbed259d635b08f9b51c6f589861aac3","schema_version":"1.0","event_id":"sha256:f1d67c264d6666083b87784f791f89e5fbed259d635b08f9b51c6f589861aac3"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2016:F5KS5ES7USXNTHBYYY7D3UNMCG","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"PLATO: Policy Learning using Adaptive Trajectory Optimization","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Gregory Kahn, Pieter Abbeel, Sergey Levine, Tianhao Zhang","submitted_at":"2016-03-02T08:50:57Z","abstract_excerpt":"Policy search can in principle acquire complex strategies for control of robots and other autonomous systems. When the policy is trained to process raw sensory inputs, such as images and depth maps, it can also acquire a strategy that combines perception and control. However, effectively processing such complex inputs requires an expressive policy class, such as a large neural network. These high-dimensional policies are difficult to train, especially when learning to control safety-critical systems. We propose PLATO, an algorithm that trains complex control policies with supervised learning, "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1603.00622","kind":"arxiv","version":4},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:50:01Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"KOOVI/69UAHyBrYMmbB5mH982fJ3ai0lzS2FR6JChWNzFxoIUlmrncq9UuHshIAgAb1V7QKTOPMKK+JAvQCQDw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T05:13:14.022734Z"},"content_sha256":"cb2dff5b799fb8f209b8bbaae3a017ccb5a04060fb5db1f97d3e2b44081aca67","schema_version":"1.0","event_id":"sha256:cb2dff5b799fb8f209b8bbaae3a017ccb5a04060fb5db1f97d3e2b44081aca67"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/F5KS5ES7USXNTHBYYY7D3UNMCG/bundle.json","state_url":"https://pith.science/pith/F5KS5ES7USXNTHBYYY7D3UNMCG/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/F5KS5ES7USXNTHBYYY7D3UNMCG/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-31T05:13:14Z","links":{"resolver":"https://pith.science/pith/F5KS5ES7USXNTHBYYY7D3UNMCG","bundle":"https://pith.science/pith/F5KS5ES7USXNTHBYYY7D3UNMCG/bundle.json","state":"https://pith.science/pith/F5KS5ES7USXNTHBYYY7D3UNMCG/state.json","well_known_bundle":"https://pith.science/.well-known/pith/F5KS5ES7USXNTHBYYY7D3UNMCG/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2016:F5KS5ES7USXNTHBYYY7D3UNMCG","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"c8471b3436c11c4392c1512daf5588007dfc64bdec9798fd585b4747cb01906c","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-03-02T08:50:57Z","title_canon_sha256":"929d1c7d3719837b83f7269663e7be4e659d2de166465ddb0ac5db6e387b2185"},"schema_version":"1.0","source":{"id":"1603.00622","kind":"arxiv","version":4}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1603.00622","created_at":"2026-05-18T00:50:01Z"},{"alias_kind":"arxiv_version","alias_value":"1603.00622v4","created_at":"2026-05-18T00:50:01Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1603.00622","created_at":"2026-05-18T00:50:01Z"},{"alias_kind":"pith_short_12","alias_value":"F5KS5ES7USXN","created_at":"2026-05-18T12:30:15Z"},{"alias_kind":"pith_short_16","alias_value":"F5KS5ES7USXNTHBY","created_at":"2026-05-18T12:30:15Z"},{"alias_kind":"pith_short_8","alias_value":"F5KS5ES7","created_at":"2026-05-18T12:30:15Z"}],"graph_snapshots":[{"event_id":"sha256:cb2dff5b799fb8f209b8bbaae3a017ccb5a04060fb5db1f97d3e2b44081aca67","target":"graph","created_at":"2026-05-18T00:50:01Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Policy search can in principle acquire complex strategies for control of robots and other autonomous systems. When the policy is trained to process raw sensory inputs, such as images and depth maps, it can also acquire a strategy that combines perception and control. However, effectively processing such complex inputs requires an expressive policy class, such as a large neural network. These high-dimensional policies are difficult to train, especially when learning to control safety-critical systems. We propose PLATO, an algorithm that trains complex control policies with supervised learning, ","authors_text":"Gregory Kahn, Pieter Abbeel, Sergey Levine, Tianhao Zhang","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-03-02T08:50:57Z","title":"PLATO: Policy Learning using Adaptive Trajectory Optimization"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1603.00622","kind":"arxiv","version":4},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:f1d67c264d6666083b87784f791f89e5fbed259d635b08f9b51c6f589861aac3","target":"record","created_at":"2026-05-18T00:50:01Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"c8471b3436c11c4392c1512daf5588007dfc64bdec9798fd585b4747cb01906c","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-03-02T08:50:57Z","title_canon_sha256":"929d1c7d3719837b83f7269663e7be4e659d2de166465ddb0ac5db6e387b2185"},"schema_version":"1.0","source":{"id":"1603.00622","kind":"arxiv","version":4}},"canonical_sha256":"2f552e925fa4aed99c38c63e3dd1ac11b0d0c51d30bc53c9143194ebf8d4d2a9","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"2f552e925fa4aed99c38c63e3dd1ac11b0d0c51d30bc53c9143194ebf8d4d2a9","first_computed_at":"2026-05-18T00:50:01.552611Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:50:01.552611Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"Jia9bCaGept/PKoTFduUjYnouXdv4XXcWTgeAw7hSBw2N1IriI+ZM6PDFIK8Ajh2CHLmZypZeJdfNtS3+nXIBg==","signature_status":"signed_v1","signed_at":"2026-05-18T00:50:01.553194Z","signed_message":"canonical_sha256_bytes"},"source_id":"1603.00622","source_kind":"arxiv","source_version":4}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:f1d67c264d6666083b87784f791f89e5fbed259d635b08f9b51c6f589861aac3","sha256:cb2dff5b799fb8f209b8bbaae3a017ccb5a04060fb5db1f97d3e2b44081aca67"],"state_sha256":"2cd021e56d49fdb0287b02ab7a0a9793d51f9bd0c4610b7b6e86eae52b5f901a"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"CPROmrS/6LzoduwcWdrj96diMtB+tGUlzFE9sxpj6L2XHEbJjkV38ZTM80l1s8h9+JY+wXNnytHacQINY+qeDA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-31T05:13:14.026376Z","bundle_sha256":"8dec64b995cfbe2f958ff0efda26e2a5d5cd7ba30747cc17ac7673aefd7ab67f"}}