{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:VJLPHIXBNZSE3Y2Y77TTAUK5MZ","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"b12e5633517ddf54a6d746a4cd1cba72c945e2892f5c57b8dd51e7e288c331ec","cross_cats_sorted":["cs.AI","q-bio.QM"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-12-05T15:18:50Z","title_canon_sha256":"11bdb3870692d685680408b0abb8ef1efac6a665394a1858afd384c8f2b2e68e"},"schema_version":"1.0","source":{"id":"2512.05794","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2512.05794","created_at":"2026-05-27T02:05:08Z"},{"alias_kind":"arxiv_version","alias_value":"2512.05794v3","created_at":"2026-05-27T02:05:08Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2512.05794","created_at":"2026-05-27T02:05:08Z"},{"alias_kind":"pith_short_12","alias_value":"VJLPHIXBNZSE","created_at":"2026-05-27T02:05:08Z"},{"alias_kind":"pith_short_16","alias_value":"VJLPHIXBNZSE3Y2Y","created_at":"2026-05-27T02:05:08Z"},{"alias_kind":"pith_short_8","alias_value":"VJLPHIXB","created_at":"2026-05-27T02:05:08Z"}],"graph_snapshots":[{"event_id":"sha256:75202678def74f7be5eaebe6b885a1c3b8d84a9cac4c6570c38807b06d4757e3","target":"graph","created_at":"2026-05-27T02:05:08Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Ordered SAEs impose a hierarchical structure that reliably identifies steerable features, but at the expense of more complex and less interpretable activation patterns."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That observed correlations between SAE features and biological concepts reflect causal mechanisms in the model rather than spurious statistical associations, and that steering success generalizes beyond the tested antibody sequences."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"TopK SAEs uncover biologically meaningful latent features in antibody language models without guaranteeing causal steering, whereas Ordered SAEs provide reliable generative control at the cost of complex activation patterns."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Ordered SAEs reliably identify steerable features in antibody language models at the cost of more complex activation patterns."}],"snapshot_sha256":"2e61fcd25ffd0f8e39627fa4e30932657f8094e0f422bd2f9e2b4bbbf81de780"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"2d01b489f5d9a8612afe025fe5e415b3f1c79bd61f603c78726ccd9e8833bb13"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2512.05794/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Sparse autoencoders (SAEs) are a mechanistic interpretability technique that have been used to provide insight into learned concepts within large protein language models. Here, we employ TopK and Ordered SAEs to investigate autoregressive antibody language models, and steer their generation. We show that TopK SAEs can reveal biologically meaningful latent features, but high feature-concept correlation does not guarantee causal control over generation. In contrast, Ordered SAEs impose a hierarchical structure that reliably identifies steerable features, but at the expense of more complex and le","authors_text":"Anisha Parsan, Anna L. Beukenhorst, Charlotte M. Deane, John J. Yang, Nithin Parsan, Oliver M. Turnbull, Rebonto Haque","cross_cats":["cs.AI","q-bio.QM"],"headline":"Ordered SAEs reliably identify steerable features in antibody language models at the cost of more complex activation patterns.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-12-05T15:18:50Z","title":"Mechanistic Interpretability of Antibody Language Models Using SAEs"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2512.05794","kind":"arxiv","version":3},"verdict":{"created_at":"2026-05-17T00:23:30.871293Z","id":"7782b804-28c5-45b6-b170-fb3e7a799a69","model_set":{"reader":"grok-4.3"},"one_line_summary":"TopK SAEs uncover biologically meaningful latent features in antibody language models without guaranteeing causal steering, whereas Ordered SAEs provide reliable generative control at the cost of complex activation patterns.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Ordered SAEs reliably identify steerable features in antibody language models at the cost of more complex activation patterns.","strongest_claim":"Ordered SAEs impose a hierarchical structure that reliably identifies steerable features, but at the expense of more complex and less interpretable activation patterns.","weakest_assumption":"That observed correlations between SAE features and biological concepts reflect causal mechanisms in the model rather than spurious statistical associations, and that steering success generalizes beyond the tested antibody sequences."}},"verdict_id":"7782b804-28c5-45b6-b170-fb3e7a799a69"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:9796113b848833e8891938cdc316631efeb236c940a3c87a73ae656d0d0ed280","target":"record","created_at":"2026-05-27T02:05:08Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"b12e5633517ddf54a6d746a4cd1cba72c945e2892f5c57b8dd51e7e288c331ec","cross_cats_sorted":["cs.AI","q-bio.QM"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-12-05T15:18:50Z","title_canon_sha256":"11bdb3870692d685680408b0abb8ef1efac6a665394a1858afd384c8f2b2e68e"},"schema_version":"1.0","source":{"id":"2512.05794","kind":"arxiv","version":3}},"canonical_sha256":"aa56f3a2e16e644de358ffe730515d6643f4aea5446af8b93bdf48b71d1f79e0","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"aa56f3a2e16e644de358ffe730515d6643f4aea5446af8b93bdf48b71d1f79e0","first_computed_at":"2026-05-27T02:05:08.968874Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-27T02:05:08.968874Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"yP4gR3oMGSI1Uw1KnL64xYnXWq4O+9g1Oz6+cQSBgItYi42qHcJwgXPgC2UIFvv2wbd+pVRtGLLM+HpUT0TEBA==","signature_status":"signed_v1","signed_at":"2026-05-27T02:05:08.969653Z","signed_message":"canonical_sha256_bytes"},"source_id":"2512.05794","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:9796113b848833e8891938cdc316631efeb236c940a3c87a73ae656d0d0ed280","sha256:75202678def74f7be5eaebe6b885a1c3b8d84a9cac4c6570c38807b06d4757e3"],"state_sha256":"c0425f19c67f8632707e5c833d85759913981eea905854215bcb50b8b2397337"}