{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:DCIKTAISJGBV6X245FJSXIMMJI","short_pith_number":"pith:DCIKTAIS","schema_version":"1.0","canonical_sha256":"1890a9811249835f5f5ce9532ba18c4a2eca884a9041fecfb205645a2de58cc6","source":{"kind":"arxiv","id":"2605.26158","version":1},"attestation_state":"computed","paper":{"title":"Furina: Fragmented Uncertainty-Driven Refusal Instability Attack","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.LG"],"primary_cat":"cs.CR","authors_text":"Jian Zhang, Tongxi Wu, Yang Gao","submitted_at":"2026-05-24T11:35:12Z","abstract_excerpt":"Safety alignment in large language models (LLMs) and multimodal large language models (MLLMs) is commonly assumed to operate as a near-binary threshold mechanism. We challenge this assumption by revealing that safety behavior is governed by an instability region where small perturbations induce stochastic refusal decisions rather than deterministic outcomes. We develop a multi-metric diagnostic framework combining external and internal signals to characterize this instability. Through systematic experiments, we identify a characteristic diagnostic signature: inputs in unstable regimes exhibit "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.26158","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CR","submitted_at":"2026-05-24T11:35:12Z","cross_cats_sorted":["cs.AI","cs.LG"],"title_canon_sha256":"3b17bf40463dea02c6f1736b779c802b03270046c87bea4ec77372190dfa67b6","abstract_canon_sha256":"bebe29487351ba0fbad8a4983ed78290187843d5ab069d4eb0f562bce8d6fc6d"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-27T00:04:26.736527Z","signature_b64":"TDvtpWhH0xwUNhA8wAhbB9EcSCYxfs33VUmdH8pfexL8ewki7Kq3MJM7ZwUEGU79AZsG/Wm3L1PhwSMTL9/wDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"1890a9811249835f5f5ce9532ba18c4a2eca884a9041fecfb205645a2de58cc6","last_reissued_at":"2026-05-27T00:04:26.735819Z","signature_status":"signed_v1","first_computed_at":"2026-05-27T00:04:26.735819Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Furina: Fragmented Uncertainty-Driven Refusal Instability Attack","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.LG"],"primary_cat":"cs.CR","authors_text":"Jian Zhang, Tongxi Wu, Yang Gao","submitted_at":"2026-05-24T11:35:12Z","abstract_excerpt":"Safety alignment in large language models (LLMs) and multimodal large language models (MLLMs) is commonly assumed to operate as a near-binary threshold mechanism. We challenge this assumption by revealing that safety behavior is governed by an instability region where small perturbations induce stochastic refusal decisions rather than deterministic outcomes. We develop a multi-metric diagnostic framework combining external and internal signals to characterize this instability. Through systematic experiments, we identify a characteristic diagnostic signature: inputs in unstable regimes exhibit "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.26158","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.26158/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.26158","created_at":"2026-05-27T00:04:26.735917+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.26158v1","created_at":"2026-05-27T00:04:26.735917+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.26158","created_at":"2026-05-27T00:04:26.735917+00:00"},{"alias_kind":"pith_short_12","alias_value":"DCIKTAISJGBV","created_at":"2026-05-27T00:04:26.735917+00:00"},{"alias_kind":"pith_short_16","alias_value":"DCIKTAISJGBV6X24","created_at":"2026-05-27T00:04:26.735917+00:00"},{"alias_kind":"pith_short_8","alias_value":"DCIKTAIS","created_at":"2026-05-27T00:04:26.735917+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/DCIKTAISJGBV6X245FJSXIMMJI","json":"https://pith.science/pith/DCIKTAISJGBV6X245FJSXIMMJI.json","graph_json":"https://pith.science/api/pith-number/DCIKTAISJGBV6X245FJSXIMMJI/graph.json","events_json":"https://pith.science/api/pith-number/DCIKTAISJGBV6X245FJSXIMMJI/events.json","paper":"https://pith.science/paper/DCIKTAIS"},"agent_actions":{"view_html":"https://pith.science/pith/DCIKTAISJGBV6X245FJSXIMMJI","download_json":"https://pith.science/pith/DCIKTAISJGBV6X245FJSXIMMJI.json","view_paper":"https://pith.science/paper/DCIKTAIS","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.26158&json=true","fetch_graph":"https://pith.science/api/pith-number/DCIKTAISJGBV6X245FJSXIMMJI/graph.json","fetch_events":"https://pith.science/api/pith-number/DCIKTAISJGBV6X245FJSXIMMJI/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/DCIKTAISJGBV6X245FJSXIMMJI/action/timestamp_anchor","attest_storage":"https://pith.science/pith/DCIKTAISJGBV6X245FJSXIMMJI/action/storage_attestation","attest_author":"https://pith.science/pith/DCIKTAISJGBV6X245FJSXIMMJI/action/author_attestation","sign_citation":"https://pith.science/pith/DCIKTAISJGBV6X245FJSXIMMJI/action/citation_signature","submit_replication":"https://pith.science/pith/DCIKTAISJGBV6X245FJSXIMMJI/action/replication_record"}},"created_at":"2026-05-27T00:04:26.735917+00:00","updated_at":"2026-05-27T00:04:26.735917+00:00"}