{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2024:XIVCPCBYZWSKFFGAUUSABRWCJX","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"b8511cb7645f2764d20882259a87ce523ee2eb4a27f74b27b59a9c5cf31b3f29","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2024-10-05T13:30:33Z","title_canon_sha256":"326321b3dffb4611ffc52f241a9ab72d036364c1178c5d101fd0dc2c2b7cd322"},"schema_version":"1.0","source":{"id":"2410.04155","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2410.04155","created_at":"2026-05-21T01:04:12Z"},{"alias_kind":"arxiv_version","alias_value":"2410.04155v2","created_at":"2026-05-21T01:04:12Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2410.04155","created_at":"2026-05-21T01:04:12Z"},{"alias_kind":"pith_short_12","alias_value":"XIVCPCBYZWSK","created_at":"2026-05-21T01:04:12Z"},{"alias_kind":"pith_short_16","alias_value":"XIVCPCBYZWSKFFGA","created_at":"2026-05-21T01:04:12Z"},{"alias_kind":"pith_short_8","alias_value":"XIVCPCBY","created_at":"2026-05-21T01:04:12Z"}],"graph_snapshots":[{"event_id":"sha256:ca137fe1ef517ddc87630a0ed688c5720d6b0e0c6297e21764853d84e3e686ec","target":"graph","created_at":"2026-05-21T01:04:12Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2410.04155/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"How to defend large language models (LLMs) from generating toxic content is an important research area. Yet, most research focused on various model training techniques to remediate LLMs by updating their weights. A typical related research area is safety alignment. This however is often costly and tedious and can expose the model to even more problems such as catastrophic forgetting if the trainings are not carefully handled by experienced NLP practitioners. We thus propose a simple yet effective and novel algorithm, namely \\textbf{Tox}ic Subword \\textbf{Prun}ing (ToxPrune) to prune the subwor","authors_text":"Hongyuan Lu, Wai Lam","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2024-10-05T13:30:33Z","title":"Toxic Subword Pruning for Dialogue Response Generation on Large Language Models"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2410.04155","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:3c8ada19d83e538e9cc41fef79b16fd0b7cf9bc3d88685a6cf7070bf7069a818","target":"record","created_at":"2026-05-21T01:04:12Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"b8511cb7645f2764d20882259a87ce523ee2eb4a27f74b27b59a9c5cf31b3f29","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2024-10-05T13:30:33Z","title_canon_sha256":"326321b3dffb4611ffc52f241a9ab72d036364c1178c5d101fd0dc2c2b7cd322"},"schema_version":"1.0","source":{"id":"2410.04155","kind":"arxiv","version":2}},"canonical_sha256":"ba2a278838cda4a294c0a52400c6c24dc41606206af88ab0bb5d900ae955ab7e","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"ba2a278838cda4a294c0a52400c6c24dc41606206af88ab0bb5d900ae955ab7e","first_computed_at":"2026-05-21T01:04:12.056546Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-21T01:04:12.056546Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"vPAd9/W93KdMJtjTbw8pyFVNSN1xnoE3kauNzREbq+0N1Z4O9GirzXdQslpOsqPIcleJbcTwo1QTRXu23a0WDg==","signature_status":"signed_v1","signed_at":"2026-05-21T01:04:12.057310Z","signed_message":"canonical_sha256_bytes"},"source_id":"2410.04155","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:3c8ada19d83e538e9cc41fef79b16fd0b7cf9bc3d88685a6cf7070bf7069a818","sha256:ca137fe1ef517ddc87630a0ed688c5720d6b0e0c6297e21764853d84e3e686ec"],"state_sha256":"174e84b5fe2d1c09b2300848dbe018b501f25eb1561bb87ba099136054543bb2"}