{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:R3GMT45HQH3OCEAGEYQ6WB2O4O","short_pith_number":"pith:R3GMT45H","canonical_record":{"source":{"id":"1812.05027","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2018-12-12T16:56:51Z","cross_cats_sorted":[],"title_canon_sha256":"90a22bea9086b5d24edeba665e2157766a6a412d22b639e2bcfc4f5ea1a8c1d0","abstract_canon_sha256":"d52dbc997e46f61b7bdc6c183ac3efef10dc54adfabb3a47270721929eb4c02f"},"schema_version":"1.0"},"canonical_sha256":"8eccc9f3a781f6e110062621eb074ee3a7dda6e1e35a293391f7c0e02ead09c7","source":{"kind":"arxiv","id":"1812.05027","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1812.05027","created_at":"2026-05-17T23:58:26Z"},{"alias_kind":"arxiv_version","alias_value":"1812.05027v1","created_at":"2026-05-17T23:58:26Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1812.05027","created_at":"2026-05-17T23:58:26Z"},{"alias_kind":"pith_short_12","alias_value":"R3GMT45HQH3O","created_at":"2026-05-18T12:32:50Z"},{"alias_kind":"pith_short_16","alias_value":"R3GMT45HQH3OCEAG","created_at":"2026-05-18T12:32:50Z"},{"alias_kind":"pith_short_8","alias_value":"R3GMT45H","created_at":"2026-05-18T12:32:50Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:R3GMT45HQH3OCEAGEYQ6WB2O4O","target":"record","payload":{"canonical_record":{"source":{"id":"1812.05027","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2018-12-12T16:56:51Z","cross_cats_sorted":[],"title_canon_sha256":"90a22bea9086b5d24edeba665e2157766a6a412d22b639e2bcfc4f5ea1a8c1d0","abstract_canon_sha256":"d52dbc997e46f61b7bdc6c183ac3efef10dc54adfabb3a47270721929eb4c02f"},"schema_version":"1.0"},"canonical_sha256":"8eccc9f3a781f6e110062621eb074ee3a7dda6e1e35a293391f7c0e02ead09c7","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:58:26.570755Z","signature_b64":"ImKQiErRbpwawU3EC7rjjGrN0/ZFiWX+nIJ95pLb3+sACN8Rwmdlz8CFXQBnRd310J7M2uiTf1ndLcoIIAOgAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"8eccc9f3a781f6e110062621eb074ee3a7dda6e1e35a293391f7c0e02ead09c7","last_reissued_at":"2026-05-17T23:58:26.570037Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:58:26.570037Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1812.05027","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:58:26Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"6Mngu++RnHkMLX0MjKf2zmG/b0jyYgQaNxy98aVP4/c6p1GIdhIS43vVxFRsts438GyKyzbLBmQKNHLIlmGWCQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T15:57:49.757275Z"},"content_sha256":"299e145aa8b1006b1ec461cc85c044e60af00f38b3297a135ba0cd832647e217","schema_version":"1.0","event_id":"sha256:299e145aa8b1006b1ec461cc85c044e60af00f38b3297a135ba0cd832647e217"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:R3GMT45HQH3OCEAGEYQ6WB2O4O","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Learning with Training Wheels: Speeding up Training with a Simple Controller for Deep Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.RO","authors_text":"Andrew Markham, Linhai Xie, Niki Trigoni, Sen Wang, Stefano Rosa","submitted_at":"2018-12-12T16:56:51Z","abstract_excerpt":"Deep Reinforcement Learning (DRL) has been applied successfully to many robotic applications. However, the large number of trials needed for training is a key issue. Most of existing techniques developed to improve training efficiency (e.g. imitation) target on general tasks rather than being tailored for robot applications, which have their specific context to benefit from. We propose a novel framework, Assisted Reinforcement Learning, where a classical controller (e.g. a PID controller) is used as an alternative, switchable policy to speed up training of DRL for local planning and navigation"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1812.05027","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:58:26Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"XMt97VnZPYzEsZqntPmHd2oxD7jJvOtkZTvi3V4OTowFEjreD8byLwFpHBK6SE1lQCsEt27tuAyGXqiWlUi+Cg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T15:57:49.757921Z"},"content_sha256":"9af98ad3ba003fe72ed7039ffdd98c6d02a51725aa00a424b70f6305dddc39a1","schema_version":"1.0","event_id":"sha256:9af98ad3ba003fe72ed7039ffdd98c6d02a51725aa00a424b70f6305dddc39a1"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/R3GMT45HQH3OCEAGEYQ6WB2O4O/bundle.json","state_url":"https://pith.science/pith/R3GMT45HQH3OCEAGEYQ6WB2O4O/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/R3GMT45HQH3OCEAGEYQ6WB2O4O/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-27T15:57:49Z","links":{"resolver":"https://pith.science/pith/R3GMT45HQH3OCEAGEYQ6WB2O4O","bundle":"https://pith.science/pith/R3GMT45HQH3OCEAGEYQ6WB2O4O/bundle.json","state":"https://pith.science/pith/R3GMT45HQH3OCEAGEYQ6WB2O4O/state.json","well_known_bundle":"https://pith.science/.well-known/pith/R3GMT45HQH3OCEAGEYQ6WB2O4O/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:R3GMT45HQH3OCEAGEYQ6WB2O4O","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"d52dbc997e46f61b7bdc6c183ac3efef10dc54adfabb3a47270721929eb4c02f","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2018-12-12T16:56:51Z","title_canon_sha256":"90a22bea9086b5d24edeba665e2157766a6a412d22b639e2bcfc4f5ea1a8c1d0"},"schema_version":"1.0","source":{"id":"1812.05027","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1812.05027","created_at":"2026-05-17T23:58:26Z"},{"alias_kind":"arxiv_version","alias_value":"1812.05027v1","created_at":"2026-05-17T23:58:26Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1812.05027","created_at":"2026-05-17T23:58:26Z"},{"alias_kind":"pith_short_12","alias_value":"R3GMT45HQH3O","created_at":"2026-05-18T12:32:50Z"},{"alias_kind":"pith_short_16","alias_value":"R3GMT45HQH3OCEAG","created_at":"2026-05-18T12:32:50Z"},{"alias_kind":"pith_short_8","alias_value":"R3GMT45H","created_at":"2026-05-18T12:32:50Z"}],"graph_snapshots":[{"event_id":"sha256:9af98ad3ba003fe72ed7039ffdd98c6d02a51725aa00a424b70f6305dddc39a1","target":"graph","created_at":"2026-05-17T23:58:26Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Deep Reinforcement Learning (DRL) has been applied successfully to many robotic applications. However, the large number of trials needed for training is a key issue. Most of existing techniques developed to improve training efficiency (e.g. imitation) target on general tasks rather than being tailored for robot applications, which have their specific context to benefit from. We propose a novel framework, Assisted Reinforcement Learning, where a classical controller (e.g. a PID controller) is used as an alternative, switchable policy to speed up training of DRL for local planning and navigation","authors_text":"Andrew Markham, Linhai Xie, Niki Trigoni, Sen Wang, Stefano Rosa","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2018-12-12T16:56:51Z","title":"Learning with Training Wheels: Speeding up Training with a Simple Controller for Deep Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1812.05027","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:299e145aa8b1006b1ec461cc85c044e60af00f38b3297a135ba0cd832647e217","target":"record","created_at":"2026-05-17T23:58:26Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"d52dbc997e46f61b7bdc6c183ac3efef10dc54adfabb3a47270721929eb4c02f","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2018-12-12T16:56:51Z","title_canon_sha256":"90a22bea9086b5d24edeba665e2157766a6a412d22b639e2bcfc4f5ea1a8c1d0"},"schema_version":"1.0","source":{"id":"1812.05027","kind":"arxiv","version":1}},"canonical_sha256":"8eccc9f3a781f6e110062621eb074ee3a7dda6e1e35a293391f7c0e02ead09c7","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"8eccc9f3a781f6e110062621eb074ee3a7dda6e1e35a293391f7c0e02ead09c7","first_computed_at":"2026-05-17T23:58:26.570037Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:58:26.570037Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"ImKQiErRbpwawU3EC7rjjGrN0/ZFiWX+nIJ95pLb3+sACN8Rwmdlz8CFXQBnRd310J7M2uiTf1ndLcoIIAOgAg==","signature_status":"signed_v1","signed_at":"2026-05-17T23:58:26.570755Z","signed_message":"canonical_sha256_bytes"},"source_id":"1812.05027","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:299e145aa8b1006b1ec461cc85c044e60af00f38b3297a135ba0cd832647e217","sha256:9af98ad3ba003fe72ed7039ffdd98c6d02a51725aa00a424b70f6305dddc39a1"],"state_sha256":"5a0a339f9f56b33da71848fab2768b819dac3a39c2ad53153f467853f66d1799"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Iu6Gv8ZPdVTmPatj5b5fDtb8Xrw2K47+uFh9LCSApKeJmfZMwJQyaiWiTHB/kmvZ41iJi/ZMPT5BRmCg+G6NAQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-27T15:57:49.761305Z","bundle_sha256":"a3f0f72c32b463b06f525fd158694acbb40ac4c2d1920398d1c10ec0d5b2135f"}}