{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:YC6MUKYXLEPEHMJ6AO26OLS3UF","short_pith_number":"pith:YC6MUKYX","canonical_record":{"source":{"id":"1706.06491","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.SY","submitted_at":"2017-06-20T14:44:25Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"2946607aeae0a2a3f8b6d28570a317f7d1bc48dc8648da4f195f470927638f98","abstract_canon_sha256":"92fc2e8234544d96aadaca8c4c8565bf7828eb8c1b922846be69f1cfb3557b33"},"schema_version":"1.0"},"canonical_sha256":"c0bcca2b17591e43b13e03b5e72e5ba144a35d4b4ba0fe7688139ee9d79cb269","source":{"kind":"arxiv","id":"1706.06491","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1706.06491","created_at":"2026-05-18T00:22:46Z"},{"alias_kind":"arxiv_version","alias_value":"1706.06491v2","created_at":"2026-05-18T00:22:46Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1706.06491","created_at":"2026-05-18T00:22:46Z"},{"alias_kind":"pith_short_12","alias_value":"YC6MUKYXLEPE","created_at":"2026-05-18T12:31:56Z"},{"alias_kind":"pith_short_16","alias_value":"YC6MUKYXLEPEHMJ6","created_at":"2026-05-18T12:31:56Z"},{"alias_kind":"pith_short_8","alias_value":"YC6MUKYX","created_at":"2026-05-18T12:31:56Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:YC6MUKYXLEPEHMJ6AO26OLS3UF","target":"record","payload":{"canonical_record":{"source":{"id":"1706.06491","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.SY","submitted_at":"2017-06-20T14:44:25Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"2946607aeae0a2a3f8b6d28570a317f7d1bc48dc8648da4f195f470927638f98","abstract_canon_sha256":"92fc2e8234544d96aadaca8c4c8565bf7828eb8c1b922846be69f1cfb3557b33"},"schema_version":"1.0"},"canonical_sha256":"c0bcca2b17591e43b13e03b5e72e5ba144a35d4b4ba0fe7688139ee9d79cb269","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:22:46.393988Z","signature_b64":"n4ce7acKJatMTjRJPTbUz4QebnO8lum5ZwjvFZb36md3biFvNQthnW2yl86HNKzr4SrHJXOc+FOitydySspCBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"c0bcca2b17591e43b13e03b5e72e5ba144a35d4b4ba0fe7688139ee9d79cb269","last_reissued_at":"2026-05-18T00:22:46.393396Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:22:46.393396Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1706.06491","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:22:46Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"o20u38MG8G0fYIIl0q7tkdehQ6lAp29ISuZpyaoWery9YFGUG9tB/aenpmSogdfTHLg8wjfoRTXNdn2ovu7aAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T20:48:01.389444Z"},"content_sha256":"6ae743b1e6789f3aa261a6a1acd84d84f75038eb2cddc0b0f224cc77b741e941","schema_version":"1.0","event_id":"sha256:6ae743b1e6789f3aa261a6a1acd84d84f75038eb2cddc0b0f224cc77b741e941"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:YC6MUKYXLEPEHMJ6AO26OLS3UF","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Data-Efficient Reinforcement Learning with Probabilistic Model Predictive Control","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.SY","authors_text":"Marc Peter Deisenroth, Sanket Kamthe","submitted_at":"2017-06-20T14:44:25Z","abstract_excerpt":"Trial-and-error based reinforcement learning (RL) has seen rapid advancements in recent times, especially with the advent of deep neural networks. However, the majority of autonomous RL algorithms require a large number of interactions with the environment. A large number of interactions may be impractical in many real-world applications, such as robotics, and many practical systems have to obey limitations in the form of state space or control constraints. To reduce the number of system interactions while simultaneously handling constraints, we propose a model-based RL framework based on prob"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1706.06491","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:22:46Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"QVKana3KvZwlsif835l+BeWLzVyv7J9htaleWEO0pfpc10BhHXrZutxE2FudCnzFVOlJe1KuV/X6JDJ8m+p2DQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T20:48:01.390123Z"},"content_sha256":"ada6974bd047d5b489bad8224f296b59eb145061f59b7da668b61f2c325921ae","schema_version":"1.0","event_id":"sha256:ada6974bd047d5b489bad8224f296b59eb145061f59b7da668b61f2c325921ae"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/YC6MUKYXLEPEHMJ6AO26OLS3UF/bundle.json","state_url":"https://pith.science/pith/YC6MUKYXLEPEHMJ6AO26OLS3UF/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/YC6MUKYXLEPEHMJ6AO26OLS3UF/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-26T20:48:01Z","links":{"resolver":"https://pith.science/pith/YC6MUKYXLEPEHMJ6AO26OLS3UF","bundle":"https://pith.science/pith/YC6MUKYXLEPEHMJ6AO26OLS3UF/bundle.json","state":"https://pith.science/pith/YC6MUKYXLEPEHMJ6AO26OLS3UF/state.json","well_known_bundle":"https://pith.science/.well-known/pith/YC6MUKYXLEPEHMJ6AO26OLS3UF/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:YC6MUKYXLEPEHMJ6AO26OLS3UF","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"92fc2e8234544d96aadaca8c4c8565bf7828eb8c1b922846be69f1cfb3557b33","cross_cats_sorted":["stat.ML"],"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.SY","submitted_at":"2017-06-20T14:44:25Z","title_canon_sha256":"2946607aeae0a2a3f8b6d28570a317f7d1bc48dc8648da4f195f470927638f98"},"schema_version":"1.0","source":{"id":"1706.06491","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1706.06491","created_at":"2026-05-18T00:22:46Z"},{"alias_kind":"arxiv_version","alias_value":"1706.06491v2","created_at":"2026-05-18T00:22:46Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1706.06491","created_at":"2026-05-18T00:22:46Z"},{"alias_kind":"pith_short_12","alias_value":"YC6MUKYXLEPE","created_at":"2026-05-18T12:31:56Z"},{"alias_kind":"pith_short_16","alias_value":"YC6MUKYXLEPEHMJ6","created_at":"2026-05-18T12:31:56Z"},{"alias_kind":"pith_short_8","alias_value":"YC6MUKYX","created_at":"2026-05-18T12:31:56Z"}],"graph_snapshots":[{"event_id":"sha256:ada6974bd047d5b489bad8224f296b59eb145061f59b7da668b61f2c325921ae","target":"graph","created_at":"2026-05-18T00:22:46Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Trial-and-error based reinforcement learning (RL) has seen rapid advancements in recent times, especially with the advent of deep neural networks. However, the majority of autonomous RL algorithms require a large number of interactions with the environment. A large number of interactions may be impractical in many real-world applications, such as robotics, and many practical systems have to obey limitations in the form of state space or control constraints. To reduce the number of system interactions while simultaneously handling constraints, we propose a model-based RL framework based on prob","authors_text":"Marc Peter Deisenroth, Sanket Kamthe","cross_cats":["stat.ML"],"headline":"","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.SY","submitted_at":"2017-06-20T14:44:25Z","title":"Data-Efficient Reinforcement Learning with Probabilistic Model Predictive Control"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1706.06491","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:6ae743b1e6789f3aa261a6a1acd84d84f75038eb2cddc0b0f224cc77b741e941","target":"record","created_at":"2026-05-18T00:22:46Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"92fc2e8234544d96aadaca8c4c8565bf7828eb8c1b922846be69f1cfb3557b33","cross_cats_sorted":["stat.ML"],"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.SY","submitted_at":"2017-06-20T14:44:25Z","title_canon_sha256":"2946607aeae0a2a3f8b6d28570a317f7d1bc48dc8648da4f195f470927638f98"},"schema_version":"1.0","source":{"id":"1706.06491","kind":"arxiv","version":2}},"canonical_sha256":"c0bcca2b17591e43b13e03b5e72e5ba144a35d4b4ba0fe7688139ee9d79cb269","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"c0bcca2b17591e43b13e03b5e72e5ba144a35d4b4ba0fe7688139ee9d79cb269","first_computed_at":"2026-05-18T00:22:46.393396Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:22:46.393396Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"n4ce7acKJatMTjRJPTbUz4QebnO8lum5ZwjvFZb36md3biFvNQthnW2yl86HNKzr4SrHJXOc+FOitydySspCBQ==","signature_status":"signed_v1","signed_at":"2026-05-18T00:22:46.393988Z","signed_message":"canonical_sha256_bytes"},"source_id":"1706.06491","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:6ae743b1e6789f3aa261a6a1acd84d84f75038eb2cddc0b0f224cc77b741e941","sha256:ada6974bd047d5b489bad8224f296b59eb145061f59b7da668b61f2c325921ae"],"state_sha256":"b4fc756adad689e3f9a22832d8d5b085d00ac9b0c8c6334ede72f80f88e6c773"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"jFxhAseBHqiS0ij2uqZLh5iS6yceT9BrolKqPk0iEAujfyMQewYk60XNYHec3PR78Z/RHRmnwEn6UJwMtdVHBQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-26T20:48:01.393861Z","bundle_sha256":"e830c6074efe7a88791305645e05178214af515ae85d189fdcc53acd445336b9"}}