{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:MPD2SVSYKGZXNSA57D2RGJVLRP","short_pith_number":"pith:MPD2SVSY","schema_version":"1.0","canonical_sha256":"63c7a9565851b376c81df8f51326ab8bfa5be70f9ea45f1097bc8942ad65d9d2","source":{"kind":"arxiv","id":"1702.01182","version":1},"attestation_state":"computed","paper":{"title":"Uncertainty-Aware Reinforcement Learning for Collision Avoidance","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.RO"],"primary_cat":"cs.LG","authors_text":"Adam Villaflor, Gregory Kahn, Pieter Abbeel, Sergey Levine, Vitchyr Pong","submitted_at":"2017-02-03T21:57:13Z","abstract_excerpt":"Reinforcement learning can enable complex, adaptive behavior to be learned automatically for autonomous robotic platforms. However, practical deployment of reinforcement learning methods must contend with the fact that the training process itself can be unsafe for the robot. In this paper, we consider the specific case of a mobile robot learning to navigate an a priori unknown environment while avoiding collisions. In order to learn collision avoidance, the robot must experience collisions at training time. However, high-speed collisions, even at training time, could damage the robot. A succes"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1702.01182","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-02-03T21:57:13Z","cross_cats_sorted":["cs.RO"],"title_canon_sha256":"ff1daf310a55c5a77a382e56e9457ce45df84a51bff9c18bf98ac1f49eb2e306","abstract_canon_sha256":"8d357cbc896d3e856233ec34a846032d2b644e0e00fa952570e0749127948482"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:51:24.279696Z","signature_b64":"PltnyLi4Pd5UXQZCRV7uG+pngU7/tfjy1u2MDyGtYkeoNQ0ktxJsULjurmXGCqXXpgb1MBsqoSi5KAJiMyhfBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"63c7a9565851b376c81df8f51326ab8bfa5be70f9ea45f1097bc8942ad65d9d2","last_reissued_at":"2026-05-18T00:51:24.279162Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:51:24.279162Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Uncertainty-Aware Reinforcement Learning for Collision Avoidance","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.RO"],"primary_cat":"cs.LG","authors_text":"Adam Villaflor, Gregory Kahn, Pieter Abbeel, Sergey Levine, Vitchyr Pong","submitted_at":"2017-02-03T21:57:13Z","abstract_excerpt":"Reinforcement learning can enable complex, adaptive behavior to be learned automatically for autonomous robotic platforms. However, practical deployment of reinforcement learning methods must contend with the fact that the training process itself can be unsafe for the robot. In this paper, we consider the specific case of a mobile robot learning to navigate an a priori unknown environment while avoiding collisions. In order to learn collision avoidance, the robot must experience collisions at training time. However, high-speed collisions, even at training time, could damage the robot. A succes"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1702.01182","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1702.01182","created_at":"2026-05-18T00:51:24.279249+00:00"},{"alias_kind":"arxiv_version","alias_value":"1702.01182v1","created_at":"2026-05-18T00:51:24.279249+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1702.01182","created_at":"2026-05-18T00:51:24.279249+00:00"},{"alias_kind":"pith_short_12","alias_value":"MPD2SVSYKGZX","created_at":"2026-05-18T12:31:31.346846+00:00"},{"alias_kind":"pith_short_16","alias_value":"MPD2SVSYKGZXNSA5","created_at":"2026-05-18T12:31:31.346846+00:00"},{"alias_kind":"pith_short_8","alias_value":"MPD2SVSY","created_at":"2026-05-18T12:31:31.346846+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":8,"internal_anchor_count":6,"sample":[{"citing_arxiv_id":"1907.00456","citing_title":"Way Off-Policy Batch Deep Reinforcement Learning of Implicit Human Preferences in Dialog","ref_index":31,"is_internal_anchor":true},{"citing_arxiv_id":"1907.01475","citing_title":"Generalizing from a few environments in safety-critical reinforcement learning","ref_index":17,"is_internal_anchor":true},{"citing_arxiv_id":"2503.03480","citing_title":"SafeVLA: Towards Safety Alignment of Vision-Language-Action Model via Constrained Learning","ref_index":95,"is_internal_anchor":true},{"citing_arxiv_id":"2510.24680","citing_title":"InFeR: Informed Failure Resilience in Learned Visual Navigation Control","ref_index":17,"is_internal_anchor":true},{"citing_arxiv_id":"2605.09183","citing_title":"Learning When to Stop: Selective Imitation Learning Under Arbitrary Dynamics Shift","ref_index":62,"is_internal_anchor":true},{"citing_arxiv_id":"2605.10289","citing_title":"Sample-Mean Anchored Thompson Sampling for Offline-to-Online Learning with Distribution Shift","ref_index":34,"is_internal_anchor":true},{"citing_arxiv_id":"2605.09183","citing_title":"Learning When to Stop: Selective Imitation Learning Under Arbitrary Dynamics Shift","ref_index":62,"is_internal_anchor":false},{"citing_arxiv_id":"2605.10289","citing_title":"Sample-Mean Anchored Thompson Sampling for Offline-to-Online Learning with Distribution Shift","ref_index":34,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/MPD2SVSYKGZXNSA57D2RGJVLRP","json":"https://pith.science/pith/MPD2SVSYKGZXNSA57D2RGJVLRP.json","graph_json":"https://pith.science/api/pith-number/MPD2SVSYKGZXNSA57D2RGJVLRP/graph.json","events_json":"https://pith.science/api/pith-number/MPD2SVSYKGZXNSA57D2RGJVLRP/events.json","paper":"https://pith.science/paper/MPD2SVSY"},"agent_actions":{"view_html":"https://pith.science/pith/MPD2SVSYKGZXNSA57D2RGJVLRP","download_json":"https://pith.science/pith/MPD2SVSYKGZXNSA57D2RGJVLRP.json","view_paper":"https://pith.science/paper/MPD2SVSY","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1702.01182&json=true","fetch_graph":"https://pith.science/api/pith-number/MPD2SVSYKGZXNSA57D2RGJVLRP/graph.json","fetch_events":"https://pith.science/api/pith-number/MPD2SVSYKGZXNSA57D2RGJVLRP/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/MPD2SVSYKGZXNSA57D2RGJVLRP/action/timestamp_anchor","attest_storage":"https://pith.science/pith/MPD2SVSYKGZXNSA57D2RGJVLRP/action/storage_attestation","attest_author":"https://pith.science/pith/MPD2SVSYKGZXNSA57D2RGJVLRP/action/author_attestation","sign_citation":"https://pith.science/pith/MPD2SVSYKGZXNSA57D2RGJVLRP/action/citation_signature","submit_replication":"https://pith.science/pith/MPD2SVSYKGZXNSA57D2RGJVLRP/action/replication_record"}},"created_at":"2026-05-18T00:51:24.279249+00:00","updated_at":"2026-05-18T00:51:24.279249+00:00"}