{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:FTLQPOMQPDEB2WENY3GRZFNLOU","short_pith_number":"pith:FTLQPOMQ","canonical_record":{"source":{"id":"1903.08542","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2019-03-20T15:07:53Z","cross_cats_sorted":[],"title_canon_sha256":"275c617820ef95205e948ba0890c7e4792cf3a6d69af2a3e9bf7170a7486835c","abstract_canon_sha256":"bb9f873a0eae2b8ad0a12da7f528f0a96ac726e62edf52d7c631bfc0886cda11"},"schema_version":"1.0"},"canonical_sha256":"2cd707b99078c81d588dc6cd1c95ab750f9bb4f09a33a2a7ebe4b568c292b695","source":{"kind":"arxiv","id":"1903.08542","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1903.08542","created_at":"2026-05-17T23:50:47Z"},{"alias_kind":"arxiv_version","alias_value":"1903.08542v1","created_at":"2026-05-17T23:50:47Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1903.08542","created_at":"2026-05-17T23:50:47Z"},{"alias_kind":"pith_short_12","alias_value":"FTLQPOMQPDEB","created_at":"2026-05-18T12:33:15Z"},{"alias_kind":"pith_short_16","alias_value":"FTLQPOMQPDEB2WEN","created_at":"2026-05-18T12:33:15Z"},{"alias_kind":"pith_short_8","alias_value":"FTLQPOMQ","created_at":"2026-05-18T12:33:15Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:FTLQPOMQPDEB2WENY3GRZFNLOU","target":"record","payload":{"canonical_record":{"source":{"id":"1903.08542","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2019-03-20T15:07:53Z","cross_cats_sorted":[],"title_canon_sha256":"275c617820ef95205e948ba0890c7e4792cf3a6d69af2a3e9bf7170a7486835c","abstract_canon_sha256":"bb9f873a0eae2b8ad0a12da7f528f0a96ac726e62edf52d7c631bfc0886cda11"},"schema_version":"1.0"},"canonical_sha256":"2cd707b99078c81d588dc6cd1c95ab750f9bb4f09a33a2a7ebe4b568c292b695","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:50:47.814219Z","signature_b64":"bdTqcP2HDKY0L3Mm13Yg/kaXOtY03TUx2Msyn7+DKuojL4e4Yr+M93z802w1oa82IdCBzCQQVLihVDsXqGSIAQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"2cd707b99078c81d588dc6cd1c95ab750f9bb4f09a33a2a7ebe4b568c292b695","last_reissued_at":"2026-05-17T23:50:47.813662Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:50:47.813662Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1903.08542","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:50:47Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"x/d70bpIkXFhFEiRfOVyLUpjSM7RcTp/u++CKG3WW3MrUydUcF64YgBCa/huBS2MBbNoqRvN1wWQRtxKRIi+Ag==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-04T20:20:31.755671Z"},"content_sha256":"62f0f91f82ac696e8556acb98ab83ef342eb9bcde5e19709df207fb5e09ed2b9","schema_version":"1.0","event_id":"sha256:62f0f91f82ac696e8556acb98ab83ef342eb9bcde5e19709df207fb5e09ed2b9"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:FTLQPOMQPDEB2WENY3GRZFNLOU","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Learning Gentle Object Manipulation with Curiosity-Driven Deep Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.RO","authors_text":"Jackie Kay, Martina Zambelli, Murilo F. Martins, Patrick M. Pilarski, Raia Hadsell, Sandy H. Huang, Yuval Tassa","submitted_at":"2019-03-20T15:07:53Z","abstract_excerpt":"Robots must know how to be gentle when they need to interact with fragile objects, or when the robot itself is prone to wear and tear. We propose an approach that enables deep reinforcement learning to train policies that are gentle, both during exploration and task execution. In a reward-based learning environment, a natural approach involves augmenting the (task) reward with a penalty for non-gentleness, which can be defined as excessive impact force. However, augmenting with only this penalty impairs learning: policies get stuck in a local optimum which avoids all contact with the environme"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1903.08542","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:50:47Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"HVQ4EATl0sMPb0GzAavb5tYXXlzKqbh7U2Y8haKzmbZ1OzPjv33VPcgC1LSOCCrVbM+3Zjoemf4ijSiVX1u6Bw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-04T20:20:31.756032Z"},"content_sha256":"9a5e2260aa676a02b73900505b8ea9485ca1f32e8e59932a705a18fe5b5b941e","schema_version":"1.0","event_id":"sha256:9a5e2260aa676a02b73900505b8ea9485ca1f32e8e59932a705a18fe5b5b941e"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/FTLQPOMQPDEB2WENY3GRZFNLOU/bundle.json","state_url":"https://pith.science/pith/FTLQPOMQPDEB2WENY3GRZFNLOU/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/FTLQPOMQPDEB2WENY3GRZFNLOU/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-04T20:20:31Z","links":{"resolver":"https://pith.science/pith/FTLQPOMQPDEB2WENY3GRZFNLOU","bundle":"https://pith.science/pith/FTLQPOMQPDEB2WENY3GRZFNLOU/bundle.json","state":"https://pith.science/pith/FTLQPOMQPDEB2WENY3GRZFNLOU/state.json","well_known_bundle":"https://pith.science/.well-known/pith/FTLQPOMQPDEB2WENY3GRZFNLOU/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:FTLQPOMQPDEB2WENY3GRZFNLOU","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"bb9f873a0eae2b8ad0a12da7f528f0a96ac726e62edf52d7c631bfc0886cda11","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2019-03-20T15:07:53Z","title_canon_sha256":"275c617820ef95205e948ba0890c7e4792cf3a6d69af2a3e9bf7170a7486835c"},"schema_version":"1.0","source":{"id":"1903.08542","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1903.08542","created_at":"2026-05-17T23:50:47Z"},{"alias_kind":"arxiv_version","alias_value":"1903.08542v1","created_at":"2026-05-17T23:50:47Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1903.08542","created_at":"2026-05-17T23:50:47Z"},{"alias_kind":"pith_short_12","alias_value":"FTLQPOMQPDEB","created_at":"2026-05-18T12:33:15Z"},{"alias_kind":"pith_short_16","alias_value":"FTLQPOMQPDEB2WEN","created_at":"2026-05-18T12:33:15Z"},{"alias_kind":"pith_short_8","alias_value":"FTLQPOMQ","created_at":"2026-05-18T12:33:15Z"}],"graph_snapshots":[{"event_id":"sha256:9a5e2260aa676a02b73900505b8ea9485ca1f32e8e59932a705a18fe5b5b941e","target":"graph","created_at":"2026-05-17T23:50:47Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Robots must know how to be gentle when they need to interact with fragile objects, or when the robot itself is prone to wear and tear. We propose an approach that enables deep reinforcement learning to train policies that are gentle, both during exploration and task execution. In a reward-based learning environment, a natural approach involves augmenting the (task) reward with a penalty for non-gentleness, which can be defined as excessive impact force. However, augmenting with only this penalty impairs learning: policies get stuck in a local optimum which avoids all contact with the environme","authors_text":"Jackie Kay, Martina Zambelli, Murilo F. Martins, Patrick M. Pilarski, Raia Hadsell, Sandy H. Huang, Yuval Tassa","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2019-03-20T15:07:53Z","title":"Learning Gentle Object Manipulation with Curiosity-Driven Deep Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1903.08542","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:62f0f91f82ac696e8556acb98ab83ef342eb9bcde5e19709df207fb5e09ed2b9","target":"record","created_at":"2026-05-17T23:50:47Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"bb9f873a0eae2b8ad0a12da7f528f0a96ac726e62edf52d7c631bfc0886cda11","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2019-03-20T15:07:53Z","title_canon_sha256":"275c617820ef95205e948ba0890c7e4792cf3a6d69af2a3e9bf7170a7486835c"},"schema_version":"1.0","source":{"id":"1903.08542","kind":"arxiv","version":1}},"canonical_sha256":"2cd707b99078c81d588dc6cd1c95ab750f9bb4f09a33a2a7ebe4b568c292b695","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"2cd707b99078c81d588dc6cd1c95ab750f9bb4f09a33a2a7ebe4b568c292b695","first_computed_at":"2026-05-17T23:50:47.813662Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:50:47.813662Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"bdTqcP2HDKY0L3Mm13Yg/kaXOtY03TUx2Msyn7+DKuojL4e4Yr+M93z802w1oa82IdCBzCQQVLihVDsXqGSIAQ==","signature_status":"signed_v1","signed_at":"2026-05-17T23:50:47.814219Z","signed_message":"canonical_sha256_bytes"},"source_id":"1903.08542","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:62f0f91f82ac696e8556acb98ab83ef342eb9bcde5e19709df207fb5e09ed2b9","sha256:9a5e2260aa676a02b73900505b8ea9485ca1f32e8e59932a705a18fe5b5b941e"],"state_sha256":"867e4050611a3d695f97596ca8304a8f54aec94eeed16bb8fd71c520c59e7423"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"WGE7kHLGEHgcJhpQ1qVmpbCtcrkjYNdAXdA7NKclNIfK1yvFg32pNckRtw3ty8EGG2Ov83i06kHsM4gTJi7+Ag==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-04T20:20:31.757914Z","bundle_sha256":"742eb54d0168d0818eb52e880ce4ec434a6223b324d62084fa3dc19daf7d3e42"}}