{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:6MT5WPNGUZQDLQCNR2U4NMC6TB","short_pith_number":"pith:6MT5WPNG","canonical_record":{"source":{"id":"1903.09537","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2019-03-22T14:48:40Z","cross_cats_sorted":[],"title_canon_sha256":"d0455e33b7b89d6a4096dd3bf509cf82572f54e56f7c076f11062040968f9eb9","abstract_canon_sha256":"c3cedc6327142d7aeba75b66e8c11e8eeb52d7a82feb8a82a88e01cb2c6b129d"},"schema_version":"1.0"},"canonical_sha256":"f327db3da6a66035c04d8ea9c6b05e98460bed4cafdc7c5a8766cd1f792a346d","source":{"kind":"arxiv","id":"1903.09537","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1903.09537","created_at":"2026-05-17T23:50:39Z"},{"alias_kind":"arxiv_version","alias_value":"1903.09537v1","created_at":"2026-05-17T23:50:39Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1903.09537","created_at":"2026-05-17T23:50:39Z"},{"alias_kind":"pith_short_12","alias_value":"6MT5WPNGUZQD","created_at":"2026-05-18T12:33:10Z"},{"alias_kind":"pith_short_16","alias_value":"6MT5WPNGUZQDLQCN","created_at":"2026-05-18T12:33:10Z"},{"alias_kind":"pith_short_8","alias_value":"6MT5WPNG","created_at":"2026-05-18T12:33:10Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:6MT5WPNGUZQDLQCNR2U4NMC6TB","target":"record","payload":{"canonical_record":{"source":{"id":"1903.09537","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2019-03-22T14:48:40Z","cross_cats_sorted":[],"title_canon_sha256":"d0455e33b7b89d6a4096dd3bf509cf82572f54e56f7c076f11062040968f9eb9","abstract_canon_sha256":"c3cedc6327142d7aeba75b66e8c11e8eeb52d7a82feb8a82a88e01cb2c6b129d"},"schema_version":"1.0"},"canonical_sha256":"f327db3da6a66035c04d8ea9c6b05e98460bed4cafdc7c5a8766cd1f792a346d","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:50:39.342102Z","signature_b64":"2q3V6JYfrSlRhL6rKNqzFaLv2LU3d6fL7UTg+zoUd4D4AD4k5zUOqrTTbI1VDqWxCuf6prOibscAosbOYP6EAA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"f327db3da6a66035c04d8ea9c6b05e98460bed4cafdc7c5a8766cd1f792a346d","last_reissued_at":"2026-05-17T23:50:39.341269Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:50:39.341269Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1903.09537","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:50:39Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"EgbNOS0yJOsSxY3H0A5LKIUVKY/UIcLwVaISWxhr/RZNny9zAvUBR06ykw6VqtYUWNy2uOaDwHt41OLj5Tk7AQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-10T14:57:48.949888Z"},"content_sha256":"a1f5340a022bf722be92fee4f51d2171c088e1958a615dcf9fdf317d8c486892","schema_version":"1.0","event_id":"sha256:a1f5340a022bf722be92fee4f51d2171c088e1958a615dcf9fdf317d8c486892"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:6MT5WPNGUZQDLQCNR2U4NMC6TB","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Iterative Reinforcement Learning Based Design of Dynamic Locomotion Skills for Cassie","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.RO","authors_text":"Jeremy Dao, Jonathan Hurst, Michiel van de Panne, Patrick Clary, Pedro Morais, Zhaoming Xie","submitted_at":"2019-03-22T14:48:40Z","abstract_excerpt":"Deep reinforcement learning (DRL) is a promising approach for developing legged locomotion skills. However, the iterative design process that is inevitable in practice is poorly supported by the default methodology. It is difficult to predict the outcomes of changes made to the reward functions, policy architectures, and the set of tasks being trained on. In this paper, we propose a practical method that allows the reward function to be fully redefined on each successive design iteration while limiting the deviation from the previous iteration. We characterize policies via sets of Deterministi"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1903.09537","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:50:39Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"0OoTAIhq/x03NoQQB95LaCcrG8ssfg9m/BzBtjUg+DYgT5VJVVMFIQlHM55mvoUqDDy5YAfFzQcP0eaEusSWCg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-10T14:57:48.950568Z"},"content_sha256":"84ee88a252c302a87ead00cdce35e835bbbeb575affaac59c6cbcb11f74133fe","schema_version":"1.0","event_id":"sha256:84ee88a252c302a87ead00cdce35e835bbbeb575affaac59c6cbcb11f74133fe"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/6MT5WPNGUZQDLQCNR2U4NMC6TB/bundle.json","state_url":"https://pith.science/pith/6MT5WPNGUZQDLQCNR2U4NMC6TB/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/6MT5WPNGUZQDLQCNR2U4NMC6TB/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-10T14:57:48Z","links":{"resolver":"https://pith.science/pith/6MT5WPNGUZQDLQCNR2U4NMC6TB","bundle":"https://pith.science/pith/6MT5WPNGUZQDLQCNR2U4NMC6TB/bundle.json","state":"https://pith.science/pith/6MT5WPNGUZQDLQCNR2U4NMC6TB/state.json","well_known_bundle":"https://pith.science/.well-known/pith/6MT5WPNGUZQDLQCNR2U4NMC6TB/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:6MT5WPNGUZQDLQCNR2U4NMC6TB","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"c3cedc6327142d7aeba75b66e8c11e8eeb52d7a82feb8a82a88e01cb2c6b129d","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2019-03-22T14:48:40Z","title_canon_sha256":"d0455e33b7b89d6a4096dd3bf509cf82572f54e56f7c076f11062040968f9eb9"},"schema_version":"1.0","source":{"id":"1903.09537","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1903.09537","created_at":"2026-05-17T23:50:39Z"},{"alias_kind":"arxiv_version","alias_value":"1903.09537v1","created_at":"2026-05-17T23:50:39Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1903.09537","created_at":"2026-05-17T23:50:39Z"},{"alias_kind":"pith_short_12","alias_value":"6MT5WPNGUZQD","created_at":"2026-05-18T12:33:10Z"},{"alias_kind":"pith_short_16","alias_value":"6MT5WPNGUZQDLQCN","created_at":"2026-05-18T12:33:10Z"},{"alias_kind":"pith_short_8","alias_value":"6MT5WPNG","created_at":"2026-05-18T12:33:10Z"}],"graph_snapshots":[{"event_id":"sha256:84ee88a252c302a87ead00cdce35e835bbbeb575affaac59c6cbcb11f74133fe","target":"graph","created_at":"2026-05-17T23:50:39Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Deep reinforcement learning (DRL) is a promising approach for developing legged locomotion skills. However, the iterative design process that is inevitable in practice is poorly supported by the default methodology. It is difficult to predict the outcomes of changes made to the reward functions, policy architectures, and the set of tasks being trained on. In this paper, we propose a practical method that allows the reward function to be fully redefined on each successive design iteration while limiting the deviation from the previous iteration. We characterize policies via sets of Deterministi","authors_text":"Jeremy Dao, Jonathan Hurst, Michiel van de Panne, Patrick Clary, Pedro Morais, Zhaoming Xie","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2019-03-22T14:48:40Z","title":"Iterative Reinforcement Learning Based Design of Dynamic Locomotion Skills for Cassie"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1903.09537","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:a1f5340a022bf722be92fee4f51d2171c088e1958a615dcf9fdf317d8c486892","target":"record","created_at":"2026-05-17T23:50:39Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"c3cedc6327142d7aeba75b66e8c11e8eeb52d7a82feb8a82a88e01cb2c6b129d","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2019-03-22T14:48:40Z","title_canon_sha256":"d0455e33b7b89d6a4096dd3bf509cf82572f54e56f7c076f11062040968f9eb9"},"schema_version":"1.0","source":{"id":"1903.09537","kind":"arxiv","version":1}},"canonical_sha256":"f327db3da6a66035c04d8ea9c6b05e98460bed4cafdc7c5a8766cd1f792a346d","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"f327db3da6a66035c04d8ea9c6b05e98460bed4cafdc7c5a8766cd1f792a346d","first_computed_at":"2026-05-17T23:50:39.341269Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:50:39.341269Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"2q3V6JYfrSlRhL6rKNqzFaLv2LU3d6fL7UTg+zoUd4D4AD4k5zUOqrTTbI1VDqWxCuf6prOibscAosbOYP6EAA==","signature_status":"signed_v1","signed_at":"2026-05-17T23:50:39.342102Z","signed_message":"canonical_sha256_bytes"},"source_id":"1903.09537","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:a1f5340a022bf722be92fee4f51d2171c088e1958a615dcf9fdf317d8c486892","sha256:84ee88a252c302a87ead00cdce35e835bbbeb575affaac59c6cbcb11f74133fe"],"state_sha256":"6d3d3f37b54eb7cd82f992341e3c4d85f310bc118526dc3752bf6e703a481e8d"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"plJDOxfFjepjb/KUzWeZDK2o+fp8TV0O7Y0+3SCo5HtthDigYOsGZfwQJJUPPdRDWpwEvkBeswHAXTF9dR35Bw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-10T14:57:48.954580Z","bundle_sha256":"df6763837333262a9957a0e6310646a804e132148ac465f386f7b2a9791c1125"}}