{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:652VCRXKJXDSDVGPVKI3EHWIFB","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"a25d4012c7c28c55530ed0ac4741138e790faa1f7d44e5177250d02bd4fe9b85","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-05-20T23:44:06Z","title_canon_sha256":"16c55846a121202d4be451aa5df9354a17cdd960b15ca606e41dcd6f0e67836f"},"schema_version":"1.0","source":{"id":"2605.21822","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.21822","created_at":"2026-05-22T01:04:09Z"},{"alias_kind":"arxiv_version","alias_value":"2605.21822v1","created_at":"2026-05-22T01:04:09Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.21822","created_at":"2026-05-22T01:04:09Z"},{"alias_kind":"pith_short_12","alias_value":"652VCRXKJXDS","created_at":"2026-05-22T01:04:09Z"},{"alias_kind":"pith_short_16","alias_value":"652VCRXKJXDSDVGP","created_at":"2026-05-22T01:04:09Z"},{"alias_kind":"pith_short_8","alias_value":"652VCRXK","created_at":"2026-05-22T01:04:09Z"}],"graph_snapshots":[{"event_id":"sha256:41ba070aa2e072b02d6cf0c24a3b21db02d9efb0c68eba5ad4aa5a7106124b24","target":"graph","created_at":"2026-05-22T01:04:09Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.21822/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Reinforcement Learning from Human Feedback (RLHF) can reveal implicit objectives such as safety considerations that go beyond task completion. In this work, we focus on the common safety criteria embedded in crowd preference datasets, where different users may express distinct preferences or objectives, yet follow similar safety principles. Our aim is to discover shared safety criteria from crowd preferences and then transfer them to downstream RL tasks to regularize agent behavior and enforce safety. We first show that direct reward combination-optimizing a preference-learned reward model tog","authors_text":"Daniel S. Brown, Qian Lin","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-05-20T23:44:06Z","title":"Implicit Safety Alignment from Crowd Preferences"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.21822","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:e909f428a35202a1f7b65d01ef8ced9eb35fa3420a6e9b7e321a3e66f9e40d1f","target":"record","created_at":"2026-05-22T01:04:09Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"a25d4012c7c28c55530ed0ac4741138e790faa1f7d44e5177250d02bd4fe9b85","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-05-20T23:44:06Z","title_canon_sha256":"16c55846a121202d4be451aa5df9354a17cdd960b15ca606e41dcd6f0e67836f"},"schema_version":"1.0","source":{"id":"2605.21822","kind":"arxiv","version":1}},"canonical_sha256":"f7755146ea4dc721d4cfaa91b21ec82868468a5ba99f0a285fc66eac31f3464a","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"f7755146ea4dc721d4cfaa91b21ec82868468a5ba99f0a285fc66eac31f3464a","first_computed_at":"2026-05-22T01:04:09.469890Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-22T01:04:09.469890Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"7F6IoUJBM6AVjvFSX5JHizzBPO/Z5kM3cwfMkXBFtncIilsteS/2Fpz5UnlbfMOVSV7uUqq5wYtky8spZxRUAQ==","signature_status":"signed_v1","signed_at":"2026-05-22T01:04:09.470548Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.21822","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:e909f428a35202a1f7b65d01ef8ced9eb35fa3420a6e9b7e321a3e66f9e40d1f","sha256:41ba070aa2e072b02d6cf0c24a3b21db02d9efb0c68eba5ad4aa5a7106124b24"],"state_sha256":"495f6c9b059b67f777d66802112b81bbceab9cdd8d644cbb24895d226077f6d2"}