{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:F4P2DHAKX4LZDD77FXKPPPZTO5","short_pith_number":"pith:F4P2DHAK","canonical_record":{"source":{"id":"1809.10811","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.RO","submitted_at":"2018-09-28T00:55:27Z","cross_cats_sorted":[],"title_canon_sha256":"8b9ba0b96e846f41112452e02b3589a580a72f2d7ebc47f6040a87d5310ce542","abstract_canon_sha256":"42181b269b860e331a7083c0465abb8a893123ae8350f76d8ee4467574ca8321"},"schema_version":"1.0"},"canonical_sha256":"2f1fa19c0abf17918fff2dd4f7bf3377688856409c6ac2c6691e2f1a7f095c03","source":{"kind":"arxiv","id":"1809.10811","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1809.10811","created_at":"2026-05-18T00:04:34Z"},{"alias_kind":"arxiv_version","alias_value":"1809.10811v1","created_at":"2026-05-18T00:04:34Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1809.10811","created_at":"2026-05-18T00:04:34Z"},{"alias_kind":"pith_short_12","alias_value":"F4P2DHAKX4LZ","created_at":"2026-05-18T12:32:22Z"},{"alias_kind":"pith_short_16","alias_value":"F4P2DHAKX4LZDD77","created_at":"2026-05-18T12:32:22Z"},{"alias_kind":"pith_short_8","alias_value":"F4P2DHAK","created_at":"2026-05-18T12:32:22Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:F4P2DHAKX4LZDD77FXKPPPZTO5","target":"record","payload":{"canonical_record":{"source":{"id":"1809.10811","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.RO","submitted_at":"2018-09-28T00:55:27Z","cross_cats_sorted":[],"title_canon_sha256":"8b9ba0b96e846f41112452e02b3589a580a72f2d7ebc47f6040a87d5310ce542","abstract_canon_sha256":"42181b269b860e331a7083c0465abb8a893123ae8350f76d8ee4467574ca8321"},"schema_version":"1.0"},"canonical_sha256":"2f1fa19c0abf17918fff2dd4f7bf3377688856409c6ac2c6691e2f1a7f095c03","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:04:34.977111Z","signature_b64":"2WgT24Urt3fUeoZk3HVrTaT+IrUvjQNAA8G3Tt3obWS/x0YLeXmh5cOF2jial9fHEMjWt8kTp+nfafIV/07pBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"2f1fa19c0abf17918fff2dd4f7bf3377688856409c6ac2c6691e2f1a7f095c03","last_reissued_at":"2026-05-18T00:04:34.976609Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:04:34.976609Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1809.10811","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:04:34Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"igniixw68kWX69O2u95tdKlmfNyDbobCuBxBAMA1IRknSsxQg4+00jGVi8kYJ8/DWcHN7qwMbt26CPdAvKsOAw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-18T20:37:20.606993Z"},"content_sha256":"8c61c79cba9c1a86757785288233500c66fe55c79d5fb856c27f6118b32d170c","schema_version":"1.0","event_id":"sha256:8c61c79cba9c1a86757785288233500c66fe55c79d5fb856c27f6118b32d170c"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:F4P2DHAKX4LZDD77FXKPPPZTO5","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Using Deep Reinforcement Learning to Learn High-Level Policies on the ATRIAS Biped","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.RO","authors_text":"Akshara Rai, Christopher G. Atkeson, Hartmut Geyer, Tianyu Li","submitted_at":"2018-09-28T00:55:27Z","abstract_excerpt":"Learning controllers for bipedal robots is a challenging problem, often requiring expert knowledge and extensive tuning of parameters that vary in different situations. Recently, deep reinforcement learning has shown promise at automatically learning controllers for complex systems in simulation. This has been followed by a push towards learning controllers that can be transferred between simulation and hardware, primarily with the use of domain randomization. However, domain randomization can make the problem of finding stable controllers even more challenging, especially for underactuated bi"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1809.10811","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:04:34Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"3SHx9TFWRbxRqTKS0Oqejl5ds/iIwmtlZa2JeCQYEu1zcpx+TAQfIKHirgn9ppElp8MN1zzl3m/VAY+QsOLpDw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-18T20:37:20.607654Z"},"content_sha256":"3b67726094592952cff5009a00c1d6691e4b1efde5fe6df2aa2d4acb9de7cf71","schema_version":"1.0","event_id":"sha256:3b67726094592952cff5009a00c1d6691e4b1efde5fe6df2aa2d4acb9de7cf71"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/F4P2DHAKX4LZDD77FXKPPPZTO5/bundle.json","state_url":"https://pith.science/pith/F4P2DHAKX4LZDD77FXKPPPZTO5/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/F4P2DHAKX4LZDD77FXKPPPZTO5/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-18T20:37:20Z","links":{"resolver":"https://pith.science/pith/F4P2DHAKX4LZDD77FXKPPPZTO5","bundle":"https://pith.science/pith/F4P2DHAKX4LZDD77FXKPPPZTO5/bundle.json","state":"https://pith.science/pith/F4P2DHAKX4LZDD77FXKPPPZTO5/state.json","well_known_bundle":"https://pith.science/.well-known/pith/F4P2DHAKX4LZDD77FXKPPPZTO5/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:F4P2DHAKX4LZDD77FXKPPPZTO5","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"42181b269b860e331a7083c0465abb8a893123ae8350f76d8ee4467574ca8321","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.RO","submitted_at":"2018-09-28T00:55:27Z","title_canon_sha256":"8b9ba0b96e846f41112452e02b3589a580a72f2d7ebc47f6040a87d5310ce542"},"schema_version":"1.0","source":{"id":"1809.10811","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1809.10811","created_at":"2026-05-18T00:04:34Z"},{"alias_kind":"arxiv_version","alias_value":"1809.10811v1","created_at":"2026-05-18T00:04:34Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1809.10811","created_at":"2026-05-18T00:04:34Z"},{"alias_kind":"pith_short_12","alias_value":"F4P2DHAKX4LZ","created_at":"2026-05-18T12:32:22Z"},{"alias_kind":"pith_short_16","alias_value":"F4P2DHAKX4LZDD77","created_at":"2026-05-18T12:32:22Z"},{"alias_kind":"pith_short_8","alias_value":"F4P2DHAK","created_at":"2026-05-18T12:32:22Z"}],"graph_snapshots":[{"event_id":"sha256:3b67726094592952cff5009a00c1d6691e4b1efde5fe6df2aa2d4acb9de7cf71","target":"graph","created_at":"2026-05-18T00:04:34Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Learning controllers for bipedal robots is a challenging problem, often requiring expert knowledge and extensive tuning of parameters that vary in different situations. Recently, deep reinforcement learning has shown promise at automatically learning controllers for complex systems in simulation. This has been followed by a push towards learning controllers that can be transferred between simulation and hardware, primarily with the use of domain randomization. However, domain randomization can make the problem of finding stable controllers even more challenging, especially for underactuated bi","authors_text":"Akshara Rai, Christopher G. Atkeson, Hartmut Geyer, Tianyu Li","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.RO","submitted_at":"2018-09-28T00:55:27Z","title":"Using Deep Reinforcement Learning to Learn High-Level Policies on the ATRIAS Biped"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1809.10811","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:8c61c79cba9c1a86757785288233500c66fe55c79d5fb856c27f6118b32d170c","target":"record","created_at":"2026-05-18T00:04:34Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"42181b269b860e331a7083c0465abb8a893123ae8350f76d8ee4467574ca8321","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.RO","submitted_at":"2018-09-28T00:55:27Z","title_canon_sha256":"8b9ba0b96e846f41112452e02b3589a580a72f2d7ebc47f6040a87d5310ce542"},"schema_version":"1.0","source":{"id":"1809.10811","kind":"arxiv","version":1}},"canonical_sha256":"2f1fa19c0abf17918fff2dd4f7bf3377688856409c6ac2c6691e2f1a7f095c03","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"2f1fa19c0abf17918fff2dd4f7bf3377688856409c6ac2c6691e2f1a7f095c03","first_computed_at":"2026-05-18T00:04:34.976609Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:04:34.976609Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"2WgT24Urt3fUeoZk3HVrTaT+IrUvjQNAA8G3Tt3obWS/x0YLeXmh5cOF2jial9fHEMjWt8kTp+nfafIV/07pBg==","signature_status":"signed_v1","signed_at":"2026-05-18T00:04:34.977111Z","signed_message":"canonical_sha256_bytes"},"source_id":"1809.10811","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:8c61c79cba9c1a86757785288233500c66fe55c79d5fb856c27f6118b32d170c","sha256:3b67726094592952cff5009a00c1d6691e4b1efde5fe6df2aa2d4acb9de7cf71"],"state_sha256":"bf821ce65ca73fba84f33fe83f297cf709194c9827a2f529a2be80bf6c00c01b"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"v5FoLlCy58FdH96oyXc2VEi41GxrPphowGT9FCMD20Jh8JZUatnKUusG/njKlQ/cVpZSpBp2s3JFldELyHbuCw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-18T20:37:20.609740Z","bundle_sha256":"8b94362aa643065753b3484570b6253cb50a29b8ac9be808c2981684b3bed4a6"}}