{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:JZFZLXG2TXCSLITVX6ODM37JTK","short_pith_number":"pith:JZFZLXG2","canonical_record":{"source":{"id":"2606.28153","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CR","submitted_at":"2026-06-26T14:51:16Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"03341ba9a35e8969cfb512f360ecd35e8f8856350d9608c2d681521706c97720","abstract_canon_sha256":"93bcf11c24b0507a9b7051e96f052555f8b412493cd04b4a50aa960416c6f9ce"},"schema_version":"1.0"},"canonical_sha256":"4e4b95dcda9dc525a275bf9c366fe99a9061477c38868c0d0484de73a76f6c5d","source":{"kind":"arxiv","id":"2606.28153","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.28153","created_at":"2026-06-29T01:15:07Z"},{"alias_kind":"arxiv_version","alias_value":"2606.28153v1","created_at":"2026-06-29T01:15:07Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.28153","created_at":"2026-06-29T01:15:07Z"},{"alias_kind":"pith_short_12","alias_value":"JZFZLXG2TXCS","created_at":"2026-06-29T01:15:07Z"},{"alias_kind":"pith_short_16","alias_value":"JZFZLXG2TXCSLITV","created_at":"2026-06-29T01:15:07Z"},{"alias_kind":"pith_short_8","alias_value":"JZFZLXG2","created_at":"2026-06-29T01:15:07Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:JZFZLXG2TXCSLITVX6ODM37JTK","target":"record","payload":{"canonical_record":{"source":{"id":"2606.28153","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CR","submitted_at":"2026-06-26T14:51:16Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"03341ba9a35e8969cfb512f360ecd35e8f8856350d9608c2d681521706c97720","abstract_canon_sha256":"93bcf11c24b0507a9b7051e96f052555f8b412493cd04b4a50aa960416c6f9ce"},"schema_version":"1.0"},"canonical_sha256":"4e4b95dcda9dc525a275bf9c366fe99a9061477c38868c0d0484de73a76f6c5d","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-29T01:15:07.587191Z","signature_b64":"HSwS3XS8ujNI15LS7FpsZnX4w5Y2gL7YrHX6AlWKdIfN7G+VB+H+YDm5Mud3SZ7J3D0IcNtaC/zaIClgo5suBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"4e4b95dcda9dc525a275bf9c366fe99a9061477c38868c0d0484de73a76f6c5d","last_reissued_at":"2026-06-29T01:15:07.586783Z","signature_status":"signed_v1","first_computed_at":"2026-06-29T01:15:07.586783Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2606.28153","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-29T01:15:07Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"LDmSlg9YMAa2Qu9dT2AcPmk6HKxSByzcroEoY8T2SK7XNG8rblEs49q3EdVPNn/dvrcZ4tQ+Z6LXn1OspW3BAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-29T21:23:53.114712Z"},"content_sha256":"f2184af0f37d642f8b991e89ad702194c02c75e2d086e371bfab00c9cb2a5f9e","schema_version":"1.0","event_id":"sha256:f2184af0f37d642f8b991e89ad702194c02c75e2d086e371bfab00c9cb2a5f9e"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:JZFZLXG2TXCSLITVX6ODM37JTK","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Robust Harmful Features Under Jailbreak Attacks: Mechanistic Evidence from Attention Head Specialization in Large Language Models","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CR","authors_text":"Dongqi Han, Linghui Li, Yanchen Yin","submitted_at":"2026-06-26T14:51:16Z","abstract_excerpt":"Jailbreak attacks bypass LLM safety alignment, yet their mechanisms remain poorly understood. We provide evidence that attacks do not comprehensively eliminate safety features, but instead selectively suppress specific attention heads. We identify two functionally differentiated types: Adversarially Compromised Heads (ACHs) concentrated in early layers, which are suppressed under attacks, and Safety-Aligned Heads (SAHs) in mid-layers, which maintain robust activations even when attacks succeed. Ablation studies support the causal role of ACHs and the contribution of SAHs to robust activations:"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.28153","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.28153/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-29T01:15:07Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Dhzf62XC1AsrTymsn7cY6Wjx+xWFwmblqHx2HAi4l9rKT6Sh+DbU3ad0kuYfp/WDkmOSCB9CTA7A8PQH0ZmiAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-29T21:23:53.115096Z"},"content_sha256":"a0d701b2d0f53cbb89046dd61c3526b4822ddb2bdeeaa0d98b91fe73bf7b6e4f","schema_version":"1.0","event_id":"sha256:a0d701b2d0f53cbb89046dd61c3526b4822ddb2bdeeaa0d98b91fe73bf7b6e4f"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/JZFZLXG2TXCSLITVX6ODM37JTK/bundle.json","state_url":"https://pith.science/pith/JZFZLXG2TXCSLITVX6ODM37JTK/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/JZFZLXG2TXCSLITVX6ODM37JTK/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-29T21:23:53Z","links":{"resolver":"https://pith.science/pith/JZFZLXG2TXCSLITVX6ODM37JTK","bundle":"https://pith.science/pith/JZFZLXG2TXCSLITVX6ODM37JTK/bundle.json","state":"https://pith.science/pith/JZFZLXG2TXCSLITVX6ODM37JTK/state.json","well_known_bundle":"https://pith.science/.well-known/pith/JZFZLXG2TXCSLITVX6ODM37JTK/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:JZFZLXG2TXCSLITVX6ODM37JTK","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"93bcf11c24b0507a9b7051e96f052555f8b412493cd04b4a50aa960416c6f9ce","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CR","submitted_at":"2026-06-26T14:51:16Z","title_canon_sha256":"03341ba9a35e8969cfb512f360ecd35e8f8856350d9608c2d681521706c97720"},"schema_version":"1.0","source":{"id":"2606.28153","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.28153","created_at":"2026-06-29T01:15:07Z"},{"alias_kind":"arxiv_version","alias_value":"2606.28153v1","created_at":"2026-06-29T01:15:07Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.28153","created_at":"2026-06-29T01:15:07Z"},{"alias_kind":"pith_short_12","alias_value":"JZFZLXG2TXCS","created_at":"2026-06-29T01:15:07Z"},{"alias_kind":"pith_short_16","alias_value":"JZFZLXG2TXCSLITV","created_at":"2026-06-29T01:15:07Z"},{"alias_kind":"pith_short_8","alias_value":"JZFZLXG2","created_at":"2026-06-29T01:15:07Z"}],"graph_snapshots":[{"event_id":"sha256:a0d701b2d0f53cbb89046dd61c3526b4822ddb2bdeeaa0d98b91fe73bf7b6e4f","target":"graph","created_at":"2026-06-29T01:15:07Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.28153/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Jailbreak attacks bypass LLM safety alignment, yet their mechanisms remain poorly understood. We provide evidence that attacks do not comprehensively eliminate safety features, but instead selectively suppress specific attention heads. We identify two functionally differentiated types: Adversarially Compromised Heads (ACHs) concentrated in early layers, which are suppressed under attacks, and Safety-Aligned Heads (SAHs) in mid-layers, which maintain robust activations even when attacks succeed. Ablation studies support the causal role of ACHs and the contribution of SAHs to robust activations:","authors_text":"Dongqi Han, Linghui Li, Yanchen Yin","cross_cats":["cs.AI"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CR","submitted_at":"2026-06-26T14:51:16Z","title":"Robust Harmful Features Under Jailbreak Attacks: Mechanistic Evidence from Attention Head Specialization in Large Language Models"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.28153","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:f2184af0f37d642f8b991e89ad702194c02c75e2d086e371bfab00c9cb2a5f9e","target":"record","created_at":"2026-06-29T01:15:07Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"93bcf11c24b0507a9b7051e96f052555f8b412493cd04b4a50aa960416c6f9ce","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CR","submitted_at":"2026-06-26T14:51:16Z","title_canon_sha256":"03341ba9a35e8969cfb512f360ecd35e8f8856350d9608c2d681521706c97720"},"schema_version":"1.0","source":{"id":"2606.28153","kind":"arxiv","version":1}},"canonical_sha256":"4e4b95dcda9dc525a275bf9c366fe99a9061477c38868c0d0484de73a76f6c5d","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"4e4b95dcda9dc525a275bf9c366fe99a9061477c38868c0d0484de73a76f6c5d","first_computed_at":"2026-06-29T01:15:07.586783Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-29T01:15:07.586783Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"HSwS3XS8ujNI15LS7FpsZnX4w5Y2gL7YrHX6AlWKdIfN7G+VB+H+YDm5Mud3SZ7J3D0IcNtaC/zaIClgo5suBA==","signature_status":"signed_v1","signed_at":"2026-06-29T01:15:07.587191Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.28153","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:f2184af0f37d642f8b991e89ad702194c02c75e2d086e371bfab00c9cb2a5f9e","sha256:a0d701b2d0f53cbb89046dd61c3526b4822ddb2bdeeaa0d98b91fe73bf7b6e4f"],"state_sha256":"1d403fee8fc05cb1c06ba693a692b05704c98ec53d4818b30f1158cbf9773bfb"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"cOKBPbqk5tZpZlCdb3zVcbMK4zw31Ta/5FO8VLzePSdsHLHcaZ0Fr4uq60Pli87lCo2MeUR0Gnq1NTnAg3ViAg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-29T21:23:53.117010Z","bundle_sha256":"2e117e65fdb7a7d866cb1ff72e3d3306c43119151588a778509d3376a69a7caf"}}