{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:V4DWYQEJEF4FHSG6QMAP5PU4KZ","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"a450899c7d70ddca13647262070e5c90a1989b95c9a9a281b3deee90def965a3","cross_cats_sorted":["cs.CL"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-07-02T08:17:57Z","title_canon_sha256":"2b7c5c6a10e6d305504f33c41f26de4235e5f638c55e96c6ac2e2b1d6cd4a6bf"},"schema_version":"1.0","source":{"id":"2607.01859","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2607.01859","created_at":"2026-07-03T01:17:31Z"},{"alias_kind":"arxiv_version","alias_value":"2607.01859v1","created_at":"2026-07-03T01:17:31Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2607.01859","created_at":"2026-07-03T01:17:31Z"},{"alias_kind":"pith_short_12","alias_value":"V4DWYQEJEF4F","created_at":"2026-07-03T01:17:31Z"},{"alias_kind":"pith_short_16","alias_value":"V4DWYQEJEF4FHSG6","created_at":"2026-07-03T01:17:31Z"},{"alias_kind":"pith_short_8","alias_value":"V4DWYQEJ","created_at":"2026-07-03T01:17:31Z"}],"graph_snapshots":[{"event_id":"sha256:f4f7a49e4133d2ab5d80e7e7bb1e88d8315e991f5a30d49e4a056110c336aa2d","target":"graph","created_at":"2026-07-03T01:17:31Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2607.01859/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Safety training for large language models (LLMs) is conducted predominantly in English, leaving uncertain how well safety mechanisms generalize to low-resource languages and mixed-language code-switching. We show that this creates an epistemic gap in which models confidently generate harmful responses for inputs that fall outside the distribution of their safety training. To study this phenomenon, we introduce STEER (Safety Targeted Embedding Exploit via Refinement), a gradient-guided attack that identifies words contributing most strongly to the model's refusal behavior and iteratively transl","authors_text":"Joshua Adrian Cahyono","cross_cats":["cs.CL"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-07-02T08:17:57Z","title":"Safety Targeted Embedding Exploit via Refinement"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2607.01859","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:eb257f8271f9ee994c6cad51d70fa423f3049d6cfad4af754ef9840d597e7ad8","target":"record","created_at":"2026-07-03T01:17:31Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"a450899c7d70ddca13647262070e5c90a1989b95c9a9a281b3deee90def965a3","cross_cats_sorted":["cs.CL"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-07-02T08:17:57Z","title_canon_sha256":"2b7c5c6a10e6d305504f33c41f26de4235e5f638c55e96c6ac2e2b1d6cd4a6bf"},"schema_version":"1.0","source":{"id":"2607.01859","kind":"arxiv","version":1}},"canonical_sha256":"af076c4089217853c8de8300febe9c565079391cab672b0a12314d2b5213ed3d","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"af076c4089217853c8de8300febe9c565079391cab672b0a12314d2b5213ed3d","first_computed_at":"2026-07-03T01:17:31.815175Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-07-03T01:17:31.815175Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"52R+gzQvRk7WwRWsIMLV4c2CkPB29fnkoMEyprVzpZUlXn/yG1yN7Idiv+/WlSbPHHBJCp8tTRTwIOTgkovjCA==","signature_status":"signed_v1","signed_at":"2026-07-03T01:17:31.815533Z","signed_message":"canonical_sha256_bytes"},"source_id":"2607.01859","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:eb257f8271f9ee994c6cad51d70fa423f3049d6cfad4af754ef9840d597e7ad8","sha256:f4f7a49e4133d2ab5d80e7e7bb1e88d8315e991f5a30d49e4a056110c336aa2d"],"state_sha256":"1493ba99c179be024ce5fa1aa5e464070be3c28accf5bde4a503e462b60ceb45"}