{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:AGUXAQUTVZELLDTINOAEZRIJXS","short_pith_number":"pith:AGUXAQUT","canonical_record":{"source":{"id":"1707.08817","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-07-27T11:16:53Z","cross_cats_sorted":[],"title_canon_sha256":"31ac8aee2c5377055d09451b8f60bc35597766337c2f1bf9e706bd7b70fc0626","abstract_canon_sha256":"ea901a17e7aef1852fbb4497579d073b929292d3d7d71c718409ea28403baee4"},"schema_version":"1.0"},"canonical_sha256":"01a9704293ae48b58e686b804cc509bc8c58b7737730f3547e4f42766433994e","source":{"kind":"arxiv","id":"1707.08817","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1707.08817","created_at":"2026-05-18T00:03:56Z"},{"alias_kind":"arxiv_version","alias_value":"1707.08817v2","created_at":"2026-05-18T00:03:56Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1707.08817","created_at":"2026-05-18T00:03:56Z"},{"alias_kind":"pith_short_12","alias_value":"AGUXAQUTVZEL","created_at":"2026-05-18T12:31:05Z"},{"alias_kind":"pith_short_16","alias_value":"AGUXAQUTVZELLDTI","created_at":"2026-05-18T12:31:05Z"},{"alias_kind":"pith_short_8","alias_value":"AGUXAQUT","created_at":"2026-05-18T12:31:05Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:AGUXAQUTVZELLDTINOAEZRIJXS","target":"record","payload":{"canonical_record":{"source":{"id":"1707.08817","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-07-27T11:16:53Z","cross_cats_sorted":[],"title_canon_sha256":"31ac8aee2c5377055d09451b8f60bc35597766337c2f1bf9e706bd7b70fc0626","abstract_canon_sha256":"ea901a17e7aef1852fbb4497579d073b929292d3d7d71c718409ea28403baee4"},"schema_version":"1.0"},"canonical_sha256":"01a9704293ae48b58e686b804cc509bc8c58b7737730f3547e4f42766433994e","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:03:56.930530Z","signature_b64":"NFTM+TiM2VeaqWZ23ynM7Z+maXOx3JfPaUgPFc6mgG6q3A42EDCyd9xAice1eIsXE22FTm45c9YjFVLP00JCBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"01a9704293ae48b58e686b804cc509bc8c58b7737730f3547e4f42766433994e","last_reissued_at":"2026-05-18T00:03:56.929806Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:03:56.929806Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1707.08817","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:03:56Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"EH2GC6qBLFalNOLhHnaysRsm0v0uqxKzNOWjEPpPuc4qyH79vIQJnrEV6czVBVfOVkmHax4Pqy+H4fcwNDk8Cg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-10T22:29:14.012886Z"},"content_sha256":"2e0cf315781a18cbfa271c3a51f1864d0a201277f85c72a65e9728b648a8fb69","schema_version":"1.0","event_id":"sha256:2e0cf315781a18cbfa271c3a51f1864d0a201277f85c72a65e9728b648a8fb69"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:AGUXAQUTVZELLDTINOAEZRIJXS","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Leveraging Demonstrations for Deep Reinforcement Learning on Robotics Problems with Sparse Rewards","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Bilal Piot, Fumin Wang, Jonathan Scholz, Martin Riedmiller, Mel Vecerik, Nicolas Heess, Olivier Pietquin, Thomas Lampe, Thomas Roth\\\"orl, Todd Hester","submitted_at":"2017-07-27T11:16:53Z","abstract_excerpt":"We propose a general and model-free approach for Reinforcement Learning (RL) on real robotics with sparse rewards. We build upon the Deep Deterministic Policy Gradient (DDPG) algorithm to use demonstrations. Both demonstrations and actual interactions are used to fill a replay buffer and the sampling ratio between demonstrations and transitions is automatically tuned via a prioritized replay mechanism. Typically, carefully engineered shaping rewards are required to enable the agents to efficiently explore on high dimensional control problems such as robotics. They are also required for model-b"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1707.08817","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:03:56Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Wi9IhZbRLTzBT71OVEdIECLbwWt0v/WN99R/u5mlaHaFoSjhJmwXuLoqoPT5hqS9t+5WmTZrD6lRWv3JljneAQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-10T22:29:14.013680Z"},"content_sha256":"72042d72e528a23dd0605c6590787d8b8ac523b787737f161f12854b179d12ba","schema_version":"1.0","event_id":"sha256:72042d72e528a23dd0605c6590787d8b8ac523b787737f161f12854b179d12ba"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/AGUXAQUTVZELLDTINOAEZRIJXS/bundle.json","state_url":"https://pith.science/pith/AGUXAQUTVZELLDTINOAEZRIJXS/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/AGUXAQUTVZELLDTINOAEZRIJXS/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-10T22:29:14Z","links":{"resolver":"https://pith.science/pith/AGUXAQUTVZELLDTINOAEZRIJXS","bundle":"https://pith.science/pith/AGUXAQUTVZELLDTINOAEZRIJXS/bundle.json","state":"https://pith.science/pith/AGUXAQUTVZELLDTINOAEZRIJXS/state.json","well_known_bundle":"https://pith.science/.well-known/pith/AGUXAQUTVZELLDTINOAEZRIJXS/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:AGUXAQUTVZELLDTINOAEZRIJXS","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"ea901a17e7aef1852fbb4497579d073b929292d3d7d71c718409ea28403baee4","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-07-27T11:16:53Z","title_canon_sha256":"31ac8aee2c5377055d09451b8f60bc35597766337c2f1bf9e706bd7b70fc0626"},"schema_version":"1.0","source":{"id":"1707.08817","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1707.08817","created_at":"2026-05-18T00:03:56Z"},{"alias_kind":"arxiv_version","alias_value":"1707.08817v2","created_at":"2026-05-18T00:03:56Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1707.08817","created_at":"2026-05-18T00:03:56Z"},{"alias_kind":"pith_short_12","alias_value":"AGUXAQUTVZEL","created_at":"2026-05-18T12:31:05Z"},{"alias_kind":"pith_short_16","alias_value":"AGUXAQUTVZELLDTI","created_at":"2026-05-18T12:31:05Z"},{"alias_kind":"pith_short_8","alias_value":"AGUXAQUT","created_at":"2026-05-18T12:31:05Z"}],"graph_snapshots":[{"event_id":"sha256:72042d72e528a23dd0605c6590787d8b8ac523b787737f161f12854b179d12ba","target":"graph","created_at":"2026-05-18T00:03:56Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We propose a general and model-free approach for Reinforcement Learning (RL) on real robotics with sparse rewards. We build upon the Deep Deterministic Policy Gradient (DDPG) algorithm to use demonstrations. Both demonstrations and actual interactions are used to fill a replay buffer and the sampling ratio between demonstrations and transitions is automatically tuned via a prioritized replay mechanism. Typically, carefully engineered shaping rewards are required to enable the agents to efficiently explore on high dimensional control problems such as robotics. They are also required for model-b","authors_text":"Bilal Piot, Fumin Wang, Jonathan Scholz, Martin Riedmiller, Mel Vecerik, Nicolas Heess, Olivier Pietquin, Thomas Lampe, Thomas Roth\\\"orl, Todd Hester","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-07-27T11:16:53Z","title":"Leveraging Demonstrations for Deep Reinforcement Learning on Robotics Problems with Sparse Rewards"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1707.08817","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:2e0cf315781a18cbfa271c3a51f1864d0a201277f85c72a65e9728b648a8fb69","target":"record","created_at":"2026-05-18T00:03:56Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"ea901a17e7aef1852fbb4497579d073b929292d3d7d71c718409ea28403baee4","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-07-27T11:16:53Z","title_canon_sha256":"31ac8aee2c5377055d09451b8f60bc35597766337c2f1bf9e706bd7b70fc0626"},"schema_version":"1.0","source":{"id":"1707.08817","kind":"arxiv","version":2}},"canonical_sha256":"01a9704293ae48b58e686b804cc509bc8c58b7737730f3547e4f42766433994e","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"01a9704293ae48b58e686b804cc509bc8c58b7737730f3547e4f42766433994e","first_computed_at":"2026-05-18T00:03:56.929806Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:03:56.929806Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"NFTM+TiM2VeaqWZ23ynM7Z+maXOx3JfPaUgPFc6mgG6q3A42EDCyd9xAice1eIsXE22FTm45c9YjFVLP00JCBg==","signature_status":"signed_v1","signed_at":"2026-05-18T00:03:56.930530Z","signed_message":"canonical_sha256_bytes"},"source_id":"1707.08817","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:2e0cf315781a18cbfa271c3a51f1864d0a201277f85c72a65e9728b648a8fb69","sha256:72042d72e528a23dd0605c6590787d8b8ac523b787737f161f12854b179d12ba"],"state_sha256":"ba70c11a46bdd20060ccd42b621466c46b55b1fe1c246d32133a76262329b80c"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"iAgk+473yFKbMsJUaPRvX2oE1pZpWR/cW+jwjw/3A7pXo1IYaPhvyA3hF7v41eeLKcFuZU4S8Hk475PYcd5XDQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-10T22:29:14.017902Z","bundle_sha256":"e95d8f2c9d72631235f434eadada4afaac9ceaad98cb090b9e76c8f5dafc6e17"}}