{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:M7SRINVXCSMTIQUG6LEOSN6BZR","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"34e5cf21c41c9ed9cfb71aafd3c5c9d735e9f61275952a95054d4660026385dd","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-14T12:13:08Z","title_canon_sha256":"0af9546d8e8d9886ae5de83c16817341ab94c70cf83391e2feeddffdeb29b98e"},"schema_version":"1.0","source":{"id":"2605.14746","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.14746","created_at":"2026-05-17T23:38:58Z"},{"alias_kind":"arxiv_version","alias_value":"2605.14746v1","created_at":"2026-05-17T23:38:58Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.14746","created_at":"2026-05-17T23:38:58Z"},{"alias_kind":"pith_short_12","alias_value":"M7SRINVXCSMT","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"M7SRINVXCSMTIQUG","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"M7SRINVX","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:78b4f845f63dfe4eddffd5b2554e49209c1cbe6bd2d357e063079e2bb5583a1f","target":"graph","created_at":"2026-05-17T23:38:58Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"While large language models (LLMs) are trained to align with human values, their generations may still violate safety constraints. A growing line of work addresses this problem by modifying the model's sampling policy at decoding time using a safety reward. However, existing decoding-time steering methods often intervene unnecessarily, modifying generations that would have been safe under the base model. Such unnecessary interventions are undesirable, as they can distort key properties of the base model such as helpfulness, fluency, style, and coherence. We propose a new test-time steering met","authors_text":"Bat-Sheva Einbinder, Hen Davidov, Yaniv Romano, Yarin Gal, Yee Whye Teh","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-14T12:13:08Z","title":"Selective Safety Steering via Value-Filtered Decoding"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.14746","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:8fb35dd9e642b88afdbd7ae05384c3c064b5808919f0a865c5022ab70bc7782c","target":"record","created_at":"2026-05-17T23:38:58Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"34e5cf21c41c9ed9cfb71aafd3c5c9d735e9f61275952a95054d4660026385dd","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-14T12:13:08Z","title_canon_sha256":"0af9546d8e8d9886ae5de83c16817341ab94c70cf83391e2feeddffdeb29b98e"},"schema_version":"1.0","source":{"id":"2605.14746","kind":"arxiv","version":1}},"canonical_sha256":"67e51436b71499344286f2c8e937c1cc62f8e44779f5da0f61618c8b0a9f62f6","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"67e51436b71499344286f2c8e937c1cc62f8e44779f5da0f61618c8b0a9f62f6","first_computed_at":"2026-05-17T23:38:58.892736Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:38:58.892736Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"bOq0OITZx65JStIUi09bpt4WoOmSAugtz6Sq89Q0JHKUPCf/ikFpo5YBiho+Fkfpb1Zt1XLPvhYpYGHMcCd8Cg==","signature_status":"signed_v1","signed_at":"2026-05-17T23:38:58.893393Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.14746","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:8fb35dd9e642b88afdbd7ae05384c3c064b5808919f0a865c5022ab70bc7782c","sha256:78b4f845f63dfe4eddffd5b2554e49209c1cbe6bd2d357e063079e2bb5583a1f"],"state_sha256":"778f07e397b0f4b768a0a0c1d789e37689b9b612c1dc6f719dd0f126e5f33086"}