{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:VOUFM4ABTJCP243EE2DITG5YY5","short_pith_number":"pith:VOUFM4AB","schema_version":"1.0","canonical_sha256":"aba85670019a44fd73642686899bb8c74379d631683a20a332ac5a5fc6fd1099","source":{"kind":"arxiv","id":"2510.14959","version":6},"attestation_state":"computed","paper":{"title":"CBF-RL: Safety Filtering Reinforcement Learning in Training with Control Barrier Functions","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.LG","cs.SY","eess.SY"],"primary_cat":"cs.RO","authors_text":"Aaron D. Ames, Blake Werner, Lizhi Yang, Massimiliano de Sa","submitted_at":"2025-10-16T17:58:58Z","abstract_excerpt":"Reinforcement learning (RL), while powerful and expressive, can often prioritize performance at the expense of safety. Yet safety violations can lead to catastrophic outcomes in real-world deployments. Control Barrier Functions (CBFs) offer a principled method to enforce dynamic safety -- traditionally deployed online via safety filters. While the result is safe behavior, the fact that the RL policy does not have knowledge of the CBF can lead to conservative behaviors. This paper proposes CBF-RL, a framework for generating safe behaviors with RL by enforcing CBFs in training. CBF-RL has two ke"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2510.14959","kind":"arxiv","version":6},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2025-10-16T17:58:58Z","cross_cats_sorted":["cs.AI","cs.LG","cs.SY","eess.SY"],"title_canon_sha256":"2ea5aa27022ff541e7281c3bcccfa007bccfe5f9a95e0900341923e3800fa209","abstract_canon_sha256":"0e698bd4149e89b9068b50956dffa796b8b828a897f4ffb64c39dc7abac5800c"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-23T03:13:48.895633Z","signature_b64":"79fbgVaOHKP+1U+BRfgS8QS87BJ6zwvyj3PVCqGysUDcQBQ2Tb7sMwMlSX6NBxOcn89L/5EgZTL4Vq19qlrDCQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"aba85670019a44fd73642686899bb8c74379d631683a20a332ac5a5fc6fd1099","last_reissued_at":"2026-06-23T03:13:48.895217Z","signature_status":"signed_v1","first_computed_at":"2026-06-23T03:13:48.895217Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"CBF-RL: Safety Filtering Reinforcement Learning in Training with Control Barrier Functions","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.LG","cs.SY","eess.SY"],"primary_cat":"cs.RO","authors_text":"Aaron D. Ames, Blake Werner, Lizhi Yang, Massimiliano de Sa","submitted_at":"2025-10-16T17:58:58Z","abstract_excerpt":"Reinforcement learning (RL), while powerful and expressive, can often prioritize performance at the expense of safety. Yet safety violations can lead to catastrophic outcomes in real-world deployments. Control Barrier Functions (CBFs) offer a principled method to enforce dynamic safety -- traditionally deployed online via safety filters. While the result is safe behavior, the fact that the RL policy does not have knowledge of the CBF can lead to conservative behaviors. This paper proposes CBF-RL, a framework for generating safe behaviors with RL by enforcing CBFs in training. CBF-RL has two ke"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2510.14959","kind":"arxiv","version":6},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2510.14959/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2510.14959","created_at":"2026-06-23T03:13:48.895274+00:00"},{"alias_kind":"arxiv_version","alias_value":"2510.14959v6","created_at":"2026-06-23T03:13:48.895274+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2510.14959","created_at":"2026-06-23T03:13:48.895274+00:00"},{"alias_kind":"pith_short_12","alias_value":"VOUFM4ABTJCP","created_at":"2026-06-23T03:13:48.895274+00:00"},{"alias_kind":"pith_short_16","alias_value":"VOUFM4ABTJCP243E","created_at":"2026-06-23T03:13:48.895274+00:00"},{"alias_kind":"pith_short_8","alias_value":"VOUFM4AB","created_at":"2026-06-23T03:13:48.895274+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":7,"internal_anchor_count":7,"sample":[{"citing_arxiv_id":"2606.02027","citing_title":"World-Task Factorization for Robot Learning","ref_index":36,"is_internal_anchor":true},{"citing_arxiv_id":"2605.31310","citing_title":"Model-free LQG Control with Chance Constraints","ref_index":23,"is_internal_anchor":true},{"citing_arxiv_id":"2606.00374","citing_title":"Constrained Whole-Body Tracking for Humanoid Robots","ref_index":14,"is_internal_anchor":true},{"citing_arxiv_id":"2605.25546","citing_title":"Safety-Critical Whole-Body Control for Humanoid Robots via Input-to-State Safe Control Barrier Functions","ref_index":42,"is_internal_anchor":true},{"citing_arxiv_id":"2606.23686","citing_title":"LIBERO-Safety: A Comprehensive Benchmark for Physical and Semantic Safety in Vision-Language-Action Models","ref_index":50,"is_internal_anchor":true},{"citing_arxiv_id":"2512.10118","citing_title":"Explicit Control Barrier Function-based Safety Filters and their Resource-Aware Computation","ref_index":40,"is_internal_anchor":true},{"citing_arxiv_id":"2605.09824","citing_title":"Geometric Pareto Control: Riemannian Gradient Flow of Energy Function via Lie Group Homotopy","ref_index":35,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/VOUFM4ABTJCP243EE2DITG5YY5","json":"https://pith.science/pith/VOUFM4ABTJCP243EE2DITG5YY5.json","graph_json":"https://pith.science/api/pith-number/VOUFM4ABTJCP243EE2DITG5YY5/graph.json","events_json":"https://pith.science/api/pith-number/VOUFM4ABTJCP243EE2DITG5YY5/events.json","paper":"https://pith.science/paper/VOUFM4AB"},"agent_actions":{"view_html":"https://pith.science/pith/VOUFM4ABTJCP243EE2DITG5YY5","download_json":"https://pith.science/pith/VOUFM4ABTJCP243EE2DITG5YY5.json","view_paper":"https://pith.science/paper/VOUFM4AB","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2510.14959&json=true","fetch_graph":"https://pith.science/api/pith-number/VOUFM4ABTJCP243EE2DITG5YY5/graph.json","fetch_events":"https://pith.science/api/pith-number/VOUFM4ABTJCP243EE2DITG5YY5/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/VOUFM4ABTJCP243EE2DITG5YY5/action/timestamp_anchor","attest_storage":"https://pith.science/pith/VOUFM4ABTJCP243EE2DITG5YY5/action/storage_attestation","attest_author":"https://pith.science/pith/VOUFM4ABTJCP243EE2DITG5YY5/action/author_attestation","sign_citation":"https://pith.science/pith/VOUFM4ABTJCP243EE2DITG5YY5/action/citation_signature","submit_replication":"https://pith.science/pith/VOUFM4ABTJCP243EE2DITG5YY5/action/replication_record"}},"created_at":"2026-06-23T03:13:48.895274+00:00","updated_at":"2026-06-23T03:13:48.895274+00:00"}