{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2025:NH4KH3L5C6RXWKB7YSX6D77DOA","short_pith_number":"pith:NH4KH3L5","canonical_record":{"source":{"id":"2507.05660","kind":"arxiv","version":3},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CR","submitted_at":"2025-07-08T04:40:09Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"ad931765579c5c8ecbd3d932863e35f9adc695a2c5a7eadadb18ee4ca2405823","abstract_canon_sha256":"e564a931680a0a5e23cf9a7b82dba2c5f54bf551ee04ace6d415335a29f32f69"},"schema_version":"1.0"},"canonical_sha256":"69f8a3ed7d17a37b283fc4afe1ffe3701c4563c38856f2dcb584129042a48735","source":{"kind":"arxiv","id":"2507.05660","version":3},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2507.05660","created_at":"2026-05-22T02:04:36Z"},{"alias_kind":"arxiv_version","alias_value":"2507.05660v3","created_at":"2026-05-22T02:04:36Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2507.05660","created_at":"2026-05-22T02:04:36Z"},{"alias_kind":"pith_short_12","alias_value":"NH4KH3L5C6RX","created_at":"2026-05-22T02:04:36Z"},{"alias_kind":"pith_short_16","alias_value":"NH4KH3L5C6RXWKB7","created_at":"2026-05-22T02:04:36Z"},{"alias_kind":"pith_short_8","alias_value":"NH4KH3L5","created_at":"2026-05-22T02:04:36Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2025:NH4KH3L5C6RXWKB7YSX6D77DOA","target":"record","payload":{"canonical_record":{"source":{"id":"2507.05660","kind":"arxiv","version":3},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CR","submitted_at":"2025-07-08T04:40:09Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"ad931765579c5c8ecbd3d932863e35f9adc695a2c5a7eadadb18ee4ca2405823","abstract_canon_sha256":"e564a931680a0a5e23cf9a7b82dba2c5f54bf551ee04ace6d415335a29f32f69"},"schema_version":"1.0"},"canonical_sha256":"69f8a3ed7d17a37b283fc4afe1ffe3701c4563c38856f2dcb584129042a48735","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-22T02:04:36.343003Z","signature_b64":"ki5JTjSPD2uoqTaSpUDsnl90tIj6VnMT0rpjfqKx71hiV6jj3C+yMCp68X4aOunsgO/0iB7ajixUB/IstnpLCQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"69f8a3ed7d17a37b283fc4afe1ffe3701c4563c38856f2dcb584129042a48735","last_reissued_at":"2026-05-22T02:04:36.341997Z","signature_status":"signed_v1","first_computed_at":"2026-05-22T02:04:36.341997Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2507.05660","source_version":3,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-22T02:04:36Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"vvmwX4MeOOiE6MRLQ+lYtTlClpsMeBNHm1pGsjXjZIuf8GkV9Mi3fiT5aAfaOuDCtbJQPiLrj7D9X2vGn9qmAw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-23T22:27:46.060458Z"},"content_sha256":"47d216d08c788a7d74ee3950702d8360db8a9e41ccca3d7c0333d69663ca1416","schema_version":"1.0","event_id":"sha256:47d216d08c788a7d74ee3950702d8360db8a9e41ccca3d7c0333d69663ca1416"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2025:NH4KH3L5C6RXWKB7YSX6D77DOA","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Optimus: A Robust Defense Framework for Mitigating Toxicity while Fine-Tuning Conversational AI","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.CL"],"primary_cat":"cs.CR","authors_text":"Aravind Cheruvu, Bimal Viswanath, Daphne Yao, Murtuza Jadliwala, Nicholas Ka-Shing Kong, Shravya Kanchi, Sifat Muhammad Abdullah","submitted_at":"2025-07-08T04:40:09Z","abstract_excerpt":"Customizing Large Language Models (LLMs) on untrusted datasets poses severe risks of injecting toxic behaviors. In this work, we introduce Optimus, a novel defense framework designed to mitigate fine-tuning harms while preserving conversational utility. Unlike existing defenses that rely heavily on precise toxicity detection or restrictive filtering, Optimus addresses the critical challenge of ensuring robust mitigation even when toxicity classifiers are imperfect or biased. Optimus integrates a training-free toxicity classification scheme that repurposes the safety alignment of commodity LLMs"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2507.05660","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2507.05660/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-22T02:04:36Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"HZAGxBVZ/M/9n+sGoFSbR+XaJ/t2vULxH2QbjhqWsjx53tBRkedN0UffyyP69W4TqFc8QmlbUmMWX0A0yc1GDw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-23T22:27:46.061216Z"},"content_sha256":"8eaa561af55b3b6703b89b59bfedb336195f8225515679db62352c3f9e4f51fd","schema_version":"1.0","event_id":"sha256:8eaa561af55b3b6703b89b59bfedb336195f8225515679db62352c3f9e4f51fd"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/NH4KH3L5C6RXWKB7YSX6D77DOA/bundle.json","state_url":"https://pith.science/pith/NH4KH3L5C6RXWKB7YSX6D77DOA/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/NH4KH3L5C6RXWKB7YSX6D77DOA/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-23T22:27:46Z","links":{"resolver":"https://pith.science/pith/NH4KH3L5C6RXWKB7YSX6D77DOA","bundle":"https://pith.science/pith/NH4KH3L5C6RXWKB7YSX6D77DOA/bundle.json","state":"https://pith.science/pith/NH4KH3L5C6RXWKB7YSX6D77DOA/state.json","well_known_bundle":"https://pith.science/.well-known/pith/NH4KH3L5C6RXWKB7YSX6D77DOA/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:NH4KH3L5C6RXWKB7YSX6D77DOA","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"e564a931680a0a5e23cf9a7b82dba2c5f54bf551ee04ace6d415335a29f32f69","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CR","submitted_at":"2025-07-08T04:40:09Z","title_canon_sha256":"ad931765579c5c8ecbd3d932863e35f9adc695a2c5a7eadadb18ee4ca2405823"},"schema_version":"1.0","source":{"id":"2507.05660","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2507.05660","created_at":"2026-05-22T02:04:36Z"},{"alias_kind":"arxiv_version","alias_value":"2507.05660v3","created_at":"2026-05-22T02:04:36Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2507.05660","created_at":"2026-05-22T02:04:36Z"},{"alias_kind":"pith_short_12","alias_value":"NH4KH3L5C6RX","created_at":"2026-05-22T02:04:36Z"},{"alias_kind":"pith_short_16","alias_value":"NH4KH3L5C6RXWKB7","created_at":"2026-05-22T02:04:36Z"},{"alias_kind":"pith_short_8","alias_value":"NH4KH3L5","created_at":"2026-05-22T02:04:36Z"}],"graph_snapshots":[{"event_id":"sha256:8eaa561af55b3b6703b89b59bfedb336195f8225515679db62352c3f9e4f51fd","target":"graph","created_at":"2026-05-22T02:04:36Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2507.05660/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Customizing Large Language Models (LLMs) on untrusted datasets poses severe risks of injecting toxic behaviors. In this work, we introduce Optimus, a novel defense framework designed to mitigate fine-tuning harms while preserving conversational utility. Unlike existing defenses that rely heavily on precise toxicity detection or restrictive filtering, Optimus addresses the critical challenge of ensuring robust mitigation even when toxicity classifiers are imperfect or biased. Optimus integrates a training-free toxicity classification scheme that repurposes the safety alignment of commodity LLMs","authors_text":"Aravind Cheruvu, Bimal Viswanath, Daphne Yao, Murtuza Jadliwala, Nicholas Ka-Shing Kong, Shravya Kanchi, Sifat Muhammad Abdullah","cross_cats":["cs.AI","cs.CL"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CR","submitted_at":"2025-07-08T04:40:09Z","title":"Optimus: A Robust Defense Framework for Mitigating Toxicity while Fine-Tuning Conversational AI"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2507.05660","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:47d216d08c788a7d74ee3950702d8360db8a9e41ccca3d7c0333d69663ca1416","target":"record","created_at":"2026-05-22T02:04:36Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"e564a931680a0a5e23cf9a7b82dba2c5f54bf551ee04ace6d415335a29f32f69","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CR","submitted_at":"2025-07-08T04:40:09Z","title_canon_sha256":"ad931765579c5c8ecbd3d932863e35f9adc695a2c5a7eadadb18ee4ca2405823"},"schema_version":"1.0","source":{"id":"2507.05660","kind":"arxiv","version":3}},"canonical_sha256":"69f8a3ed7d17a37b283fc4afe1ffe3701c4563c38856f2dcb584129042a48735","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"69f8a3ed7d17a37b283fc4afe1ffe3701c4563c38856f2dcb584129042a48735","first_computed_at":"2026-05-22T02:04:36.341997Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-22T02:04:36.341997Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"ki5JTjSPD2uoqTaSpUDsnl90tIj6VnMT0rpjfqKx71hiV6jj3C+yMCp68X4aOunsgO/0iB7ajixUB/IstnpLCQ==","signature_status":"signed_v1","signed_at":"2026-05-22T02:04:36.343003Z","signed_message":"canonical_sha256_bytes"},"source_id":"2507.05660","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:47d216d08c788a7d74ee3950702d8360db8a9e41ccca3d7c0333d69663ca1416","sha256:8eaa561af55b3b6703b89b59bfedb336195f8225515679db62352c3f9e4f51fd"],"state_sha256":"5989f0e342230880a9002785584048af2bb8acd4c92714c930dee6162fbad402"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"2a1Qf4mhJxqzAeUyc5OKJBNUbatRXpZoRZSxiBSKPXv3k34ZUF1Zx4HDctJ13gkttaTKfz/xSmS12+FMIzSnCQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-23T22:27:46.065553Z","bundle_sha256":"a37a4461fcd063a1c74dde4978aab7228f026d61969829b8351198e05e656d2e"}}