{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:X6AFUFR5K5NAZKRQFOXEB2NAPG","short_pith_number":"pith:X6AFUFR5","canonical_record":{"source":{"id":"1812.11103","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-12-26T10:07:13Z","cross_cats_sorted":["cs.AI","cs.RO","stat.ML"],"title_canon_sha256":"d8261b8c7589615c4e548e7eca869219b1169efa6bba9a3948c2504a0dc2f909","abstract_canon_sha256":"7ba8b96594b0ee9a086a22b3deae5c71d5105b0d4373fed93ea7469b15e2f28e"},"schema_version":"1.0"},"canonical_sha256":"bf805a163d575a0caa302bae40e9a0799b30e6b0957e1f2ac8ae4a310385b058","source":{"kind":"arxiv","id":"1812.11103","version":3},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1812.11103","created_at":"2026-05-17T23:42:58Z"},{"alias_kind":"arxiv_version","alias_value":"1812.11103v3","created_at":"2026-05-17T23:42:58Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1812.11103","created_at":"2026-05-17T23:42:58Z"},{"alias_kind":"pith_short_12","alias_value":"X6AFUFR5K5NA","created_at":"2026-05-18T12:33:01Z"},{"alias_kind":"pith_short_16","alias_value":"X6AFUFR5K5NAZKRQ","created_at":"2026-05-18T12:33:01Z"},{"alias_kind":"pith_short_8","alias_value":"X6AFUFR5","created_at":"2026-05-18T12:33:01Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:X6AFUFR5K5NAZKRQFOXEB2NAPG","target":"record","payload":{"canonical_record":{"source":{"id":"1812.11103","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-12-26T10:07:13Z","cross_cats_sorted":["cs.AI","cs.RO","stat.ML"],"title_canon_sha256":"d8261b8c7589615c4e548e7eca869219b1169efa6bba9a3948c2504a0dc2f909","abstract_canon_sha256":"7ba8b96594b0ee9a086a22b3deae5c71d5105b0d4373fed93ea7469b15e2f28e"},"schema_version":"1.0"},"canonical_sha256":"bf805a163d575a0caa302bae40e9a0799b30e6b0957e1f2ac8ae4a310385b058","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:42:58.533603Z","signature_b64":"5tiRf6329Y7GTMzB8uvun/smbGAm6WWjAma4W70wy+qTke41xdUaQi/M19s9lwrejzalBinJRsLglopnpSq8BQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"bf805a163d575a0caa302bae40e9a0799b30e6b0957e1f2ac8ae4a310385b058","last_reissued_at":"2026-05-17T23:42:58.532994Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:42:58.532994Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1812.11103","source_version":3,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:42:58Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Qz+E6YCnK0cQFBw/lzHyvBWlOxX0AWUQTWCEDlK0EyW65hzSgKm5CdHBWZAWJtBfS1g6CWJqEyQOGJC94CwtCA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T16:31:19.643964Z"},"content_sha256":"2427e1a03c888dca00ef567035e73d2b7549277fdcaaf57bf1ffbcffabff3d6e","schema_version":"1.0","event_id":"sha256:2427e1a03c888dca00ef567035e73d2b7549277fdcaaf57bf1ffbcffabff3d6e"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:X6AFUFR5K5NAZKRQFOXEB2NAPG","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Learning to Walk via Deep Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.RO","stat.ML"],"primary_cat":"cs.LG","authors_text":"Aurick Zhou, George Tucker, Jie Tan, Sehoon Ha, Sergey Levine, Tuomas Haarnoja","submitted_at":"2018-12-26T10:07:13Z","abstract_excerpt":"Deep reinforcement learning (deep RL) holds the promise of automating the acquisition of complex controllers that can map sensory inputs directly to low-level actions. In the domain of robotic locomotion, deep RL could enable learning locomotion skills with minimal engineering and without an explicit model of the robot dynamics. Unfortunately, applying deep RL to real-world robotic tasks is exceptionally difficult, primarily due to poor sample complexity and sensitivity to hyperparameters. While hyperparameters can be easily tuned in simulated domains, tuning may be prohibitively expensive on "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1812.11103","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:42:58Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"eyX/SY3jHJU4HAKhBVc0tys3M6A0NmnDV1Dw3w9FJE9zi/sUuBO+mFGlvQPQNX6/Gub5XogivM7fuvr3jptcCA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T16:31:19.644597Z"},"content_sha256":"575012056f0b79ec30e48812474e1cdbc7deeb52432dba37398da965a06d6692","schema_version":"1.0","event_id":"sha256:575012056f0b79ec30e48812474e1cdbc7deeb52432dba37398da965a06d6692"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/X6AFUFR5K5NAZKRQFOXEB2NAPG/bundle.json","state_url":"https://pith.science/pith/X6AFUFR5K5NAZKRQFOXEB2NAPG/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/X6AFUFR5K5NAZKRQFOXEB2NAPG/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-26T16:31:19Z","links":{"resolver":"https://pith.science/pith/X6AFUFR5K5NAZKRQFOXEB2NAPG","bundle":"https://pith.science/pith/X6AFUFR5K5NAZKRQFOXEB2NAPG/bundle.json","state":"https://pith.science/pith/X6AFUFR5K5NAZKRQFOXEB2NAPG/state.json","well_known_bundle":"https://pith.science/.well-known/pith/X6AFUFR5K5NAZKRQFOXEB2NAPG/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:X6AFUFR5K5NAZKRQFOXEB2NAPG","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"7ba8b96594b0ee9a086a22b3deae5c71d5105b0d4373fed93ea7469b15e2f28e","cross_cats_sorted":["cs.AI","cs.RO","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-12-26T10:07:13Z","title_canon_sha256":"d8261b8c7589615c4e548e7eca869219b1169efa6bba9a3948c2504a0dc2f909"},"schema_version":"1.0","source":{"id":"1812.11103","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1812.11103","created_at":"2026-05-17T23:42:58Z"},{"alias_kind":"arxiv_version","alias_value":"1812.11103v3","created_at":"2026-05-17T23:42:58Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1812.11103","created_at":"2026-05-17T23:42:58Z"},{"alias_kind":"pith_short_12","alias_value":"X6AFUFR5K5NA","created_at":"2026-05-18T12:33:01Z"},{"alias_kind":"pith_short_16","alias_value":"X6AFUFR5K5NAZKRQ","created_at":"2026-05-18T12:33:01Z"},{"alias_kind":"pith_short_8","alias_value":"X6AFUFR5","created_at":"2026-05-18T12:33:01Z"}],"graph_snapshots":[{"event_id":"sha256:575012056f0b79ec30e48812474e1cdbc7deeb52432dba37398da965a06d6692","target":"graph","created_at":"2026-05-17T23:42:58Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Deep reinforcement learning (deep RL) holds the promise of automating the acquisition of complex controllers that can map sensory inputs directly to low-level actions. In the domain of robotic locomotion, deep RL could enable learning locomotion skills with minimal engineering and without an explicit model of the robot dynamics. Unfortunately, applying deep RL to real-world robotic tasks is exceptionally difficult, primarily due to poor sample complexity and sensitivity to hyperparameters. While hyperparameters can be easily tuned in simulated domains, tuning may be prohibitively expensive on ","authors_text":"Aurick Zhou, George Tucker, Jie Tan, Sehoon Ha, Sergey Levine, Tuomas Haarnoja","cross_cats":["cs.AI","cs.RO","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-12-26T10:07:13Z","title":"Learning to Walk via Deep Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1812.11103","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:2427e1a03c888dca00ef567035e73d2b7549277fdcaaf57bf1ffbcffabff3d6e","target":"record","created_at":"2026-05-17T23:42:58Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"7ba8b96594b0ee9a086a22b3deae5c71d5105b0d4373fed93ea7469b15e2f28e","cross_cats_sorted":["cs.AI","cs.RO","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-12-26T10:07:13Z","title_canon_sha256":"d8261b8c7589615c4e548e7eca869219b1169efa6bba9a3948c2504a0dc2f909"},"schema_version":"1.0","source":{"id":"1812.11103","kind":"arxiv","version":3}},"canonical_sha256":"bf805a163d575a0caa302bae40e9a0799b30e6b0957e1f2ac8ae4a310385b058","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"bf805a163d575a0caa302bae40e9a0799b30e6b0957e1f2ac8ae4a310385b058","first_computed_at":"2026-05-17T23:42:58.532994Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:42:58.532994Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"5tiRf6329Y7GTMzB8uvun/smbGAm6WWjAma4W70wy+qTke41xdUaQi/M19s9lwrejzalBinJRsLglopnpSq8BQ==","signature_status":"signed_v1","signed_at":"2026-05-17T23:42:58.533603Z","signed_message":"canonical_sha256_bytes"},"source_id":"1812.11103","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:2427e1a03c888dca00ef567035e73d2b7549277fdcaaf57bf1ffbcffabff3d6e","sha256:575012056f0b79ec30e48812474e1cdbc7deeb52432dba37398da965a06d6692"],"state_sha256":"8b3250568af2876a79d616bd9254ff49d99113f61eaabc6e5bbd9b43ba5085a5"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"irxOsdEt6fzY75vJZHQW0NDw9Ca0o7FAOJlH07EeuQF3qtvGbVEy8tTi1s3iK8s/WkRLc++DLrEyT1uUMqLRAA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-26T16:31:19.647941Z","bundle_sha256":"ce179f0e865ca0a40ac69d016c34ceaab2e753dbded026da802068d863caa8e4"}}