{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:LTIDCONDQCV35B3NKD2ORS3235","short_pith_number":"pith:LTIDCOND","schema_version":"1.0","canonical_sha256":"5cd03139a380abbe876d50f4e8cb7adf6c4d74a48b0404c6454d6484cb6d96f6","source":{"kind":"arxiv","id":"2603.23565","version":2},"attestation_state":"computed","paper":{"title":"Safe Reinforcement Learning with Preference-based Constraint Inference","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Chenglin Li, Grant Ruan, Hua Geng","submitted_at":"2026-03-24T08:54:03Z","abstract_excerpt":"Safe reinforcement learning (RL) is a standard paradigm for safety-critical decision making. However, real-world safety constraints can be complex, subjective, and even hard to explicitly specify. Existing works on constraint inference rely on restrictive assumptions or extensive expert demonstrations, which are not realistic in many real-world applications. How to cheaply and reliably learn these constraints is the major challenge we focus on in this study. While inferring constraints from human preferences offers a data-efficient alternative, we identify popular Bradley-Terry (BT) models fai"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2603.23565","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-03-24T08:54:03Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"aafda2297fe065d17df490f189561530f14edd01d2f5cab04c2e3d0fa6bd7c06","abstract_canon_sha256":"efc33254da412a686d548d96990dfc77585c77a5d09791c261ada1bee5a4b1cc"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-25T02:02:14.692248Z","signature_b64":"O7rTuA//g1HJDEZq2q+IsZusW2WuwJTl9sjrceY5ygKIB9z64W9lFc3BXiCei0kq6d9DF7awuP5XwLXuqnDUBw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"5cd03139a380abbe876d50f4e8cb7adf6c4d74a48b0404c6454d6484cb6d96f6","last_reissued_at":"2026-05-25T02:02:14.691406Z","signature_status":"signed_v1","first_computed_at":"2026-05-25T02:02:14.691406Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Safe Reinforcement Learning with Preference-based Constraint Inference","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Chenglin Li, Grant Ruan, Hua Geng","submitted_at":"2026-03-24T08:54:03Z","abstract_excerpt":"Safe reinforcement learning (RL) is a standard paradigm for safety-critical decision making. However, real-world safety constraints can be complex, subjective, and even hard to explicitly specify. Existing works on constraint inference rely on restrictive assumptions or extensive expert demonstrations, which are not realistic in many real-world applications. How to cheaply and reliably learn these constraints is the major challenge we focus on in this study. While inferring constraints from human preferences offers a data-efficient alternative, we identify popular Bradley-Terry (BT) models fai"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2603.23565","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2603.23565/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2603.23565","created_at":"2026-05-25T02:02:14.691506+00:00"},{"alias_kind":"arxiv_version","alias_value":"2603.23565v2","created_at":"2026-05-25T02:02:14.691506+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2603.23565","created_at":"2026-05-25T02:02:14.691506+00:00"},{"alias_kind":"pith_short_12","alias_value":"LTIDCONDQCV3","created_at":"2026-05-25T02:02:14.691506+00:00"},{"alias_kind":"pith_short_16","alias_value":"LTIDCONDQCV35B3N","created_at":"2026-05-25T02:02:14.691506+00:00"},{"alias_kind":"pith_short_8","alias_value":"LTIDCOND","created_at":"2026-05-25T02:02:14.691506+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/LTIDCONDQCV35B3NKD2ORS3235","json":"https://pith.science/pith/LTIDCONDQCV35B3NKD2ORS3235.json","graph_json":"https://pith.science/api/pith-number/LTIDCONDQCV35B3NKD2ORS3235/graph.json","events_json":"https://pith.science/api/pith-number/LTIDCONDQCV35B3NKD2ORS3235/events.json","paper":"https://pith.science/paper/LTIDCOND"},"agent_actions":{"view_html":"https://pith.science/pith/LTIDCONDQCV35B3NKD2ORS3235","download_json":"https://pith.science/pith/LTIDCONDQCV35B3NKD2ORS3235.json","view_paper":"https://pith.science/paper/LTIDCOND","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2603.23565&json=true","fetch_graph":"https://pith.science/api/pith-number/LTIDCONDQCV35B3NKD2ORS3235/graph.json","fetch_events":"https://pith.science/api/pith-number/LTIDCONDQCV35B3NKD2ORS3235/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/LTIDCONDQCV35B3NKD2ORS3235/action/timestamp_anchor","attest_storage":"https://pith.science/pith/LTIDCONDQCV35B3NKD2ORS3235/action/storage_attestation","attest_author":"https://pith.science/pith/LTIDCONDQCV35B3NKD2ORS3235/action/author_attestation","sign_citation":"https://pith.science/pith/LTIDCONDQCV35B3NKD2ORS3235/action/citation_signature","submit_replication":"https://pith.science/pith/LTIDCONDQCV35B3NKD2ORS3235/action/replication_record"}},"created_at":"2026-05-25T02:02:14.691506+00:00","updated_at":"2026-05-25T02:02:14.691506+00:00"}