{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2025:KF37V5QUANOISNQBGSURYCLID6","short_pith_number":"pith:KF37V5QU","canonical_record":{"source":{"id":"2506.01247","kind":"arxiv","version":3},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2025-06-02T01:51:20Z","cross_cats_sorted":["cs.AI","cs.LG"],"title_canon_sha256":"6569d1084f929db00478bca025773fc5878573e280f030c727caa9835d7d046f","abstract_canon_sha256":"6f5c4b69b71d0f8723bd4a66d3d3ecdce6b9a42bb9bf40880c2660c610d64bb2"},"schema_version":"1.0"},"canonical_sha256":"5177faf614035c89360134a91c09681fa4f608e36a989d446fcb216cb2f8fd7d","source":{"kind":"arxiv","id":"2506.01247","version":3},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2506.01247","created_at":"2026-05-28T01:04:28Z"},{"alias_kind":"arxiv_version","alias_value":"2506.01247v3","created_at":"2026-05-28T01:04:28Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2506.01247","created_at":"2026-05-28T01:04:28Z"},{"alias_kind":"pith_short_12","alias_value":"KF37V5QUANOI","created_at":"2026-05-28T01:04:28Z"},{"alias_kind":"pith_short_16","alias_value":"KF37V5QUANOISNQB","created_at":"2026-05-28T01:04:28Z"},{"alias_kind":"pith_short_8","alias_value":"KF37V5QU","created_at":"2026-05-28T01:04:28Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2025:KF37V5QUANOISNQBGSURYCLID6","target":"record","payload":{"canonical_record":{"source":{"id":"2506.01247","kind":"arxiv","version":3},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2025-06-02T01:51:20Z","cross_cats_sorted":["cs.AI","cs.LG"],"title_canon_sha256":"6569d1084f929db00478bca025773fc5878573e280f030c727caa9835d7d046f","abstract_canon_sha256":"6f5c4b69b71d0f8723bd4a66d3d3ecdce6b9a42bb9bf40880c2660c610d64bb2"},"schema_version":"1.0"},"canonical_sha256":"5177faf614035c89360134a91c09681fa4f608e36a989d446fcb216cb2f8fd7d","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-28T01:04:28.424871Z","signature_b64":"TCQC3xtX+EItL1tA/psjAPgCmLZvzuZ5Vo+/mQ71C8gxbfJleLf5vj1/6pJp5eOgAkrqD+FNNYA/yQ0Nh1crBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"5177faf614035c89360134a91c09681fa4f608e36a989d446fcb216cb2f8fd7d","last_reissued_at":"2026-05-28T01:04:28.424360Z","signature_status":"signed_v1","first_computed_at":"2026-05-28T01:04:28.424360Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2506.01247","source_version":3,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-28T01:04:28Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"uylwF1xm3NQvonGJ3o0vf2RLOo2vKxo5251gvqLVqOt96Do0UnykOh91SYc0UoVVxXsVYQNyx2wCI1zlfsvIAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-02T04:18:09.819370Z"},"content_sha256":"681f63690fc98ff7eedb886f2d5c56588025987e20fcdaec87f85482e199d51a","schema_version":"1.0","event_id":"sha256:681f63690fc98ff7eedb886f2d5c56588025987e20fcdaec87f85482e199d51a"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2025:KF37V5QUANOISNQBGSURYCLID6","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Beyond Interpretability: When, Why, and How Sparse Autoencoders Enable Label-Free Visual Steering","license":"http://creativecommons.org/licenses/by/4.0/","headline":"Visual Sparse Steering extracts a steering vector from SAE features on unlabeled data to adapt CLIP models at test time and raise zero-shot accuracy by 1-4%.","cross_cats":["cs.AI","cs.LG"],"primary_cat":"cs.CV","authors_text":"Dimitris N. Metaxas, Gemma E. Moran, Gerasimos Chatzoudis, Hao Wang, Zhuowei Li","submitted_at":"2025-06-02T01:51:20Z","abstract_excerpt":"Sparse Autoencoders (SAEs) are increasingly used to interpret foundation models, but their role as an actionable intervention space remains less understood, especially in vision. We study whether sparse visual features can be used not only for post-hoc analysis, but also to steer frozen vision-language models. We introduce Visual Sparse Steering (VS2), a label-free method that trains a top-$k$ SAE on unlabeled activations from a frozen CLIP image encoder and, at test time, constructs an interpretable steering vector by amplifying the input's active sparse features and decoding the induced chan"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Across CIFAR-100, CUB-200, and Tiny-ImageNet and two CLIP backbones, VS2 improves zero-shot top-1 accuracy by 3.45-4.12%, 0.93-1.08%, and 1.50-1.84% respectively, while remaining forward-only and adding minimal compute overhead.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The assumption that sparse features extracted by an SAE trained on unlabeled in-domain activations contain task-relevant information that can be reliably turned into an effective steering vector without any labeled data or optimization at test time.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"VS2 constructs steering vectors from sparse SAE features on unlabeled in-domain activations to improve zero-shot accuracy of CLIP models by 0.93-4.12% on CIFAR-100, CUB-200, and Tiny-ImageNet while remaining forward-pass only.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Visual Sparse Steering extracts a steering vector from SAE features on unlabeled data to adapt CLIP models at test time and raise zero-shot accuracy by 1-4%.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"2ecdf7b5492c3482a1b7a7c8a1a375ee5043f7f7a95c5e62fd6cce1a54b572b2"},"source":{"id":"2506.01247","kind":"arxiv","version":3},"verdict":{"id":"09872826-c8d1-4c8d-a805-f13d1b06c8d0","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-19T11:30:34.663503Z","strongest_claim":"Across CIFAR-100, CUB-200, and Tiny-ImageNet and two CLIP backbones, VS2 improves zero-shot top-1 accuracy by 3.45-4.12%, 0.93-1.08%, and 1.50-1.84% respectively, while remaining forward-only and adding minimal compute overhead.","one_line_summary":"VS2 constructs steering vectors from sparse SAE features on unlabeled in-domain activations to improve zero-shot accuracy of CLIP models by 0.93-4.12% on CIFAR-100, CUB-200, and Tiny-ImageNet while remaining forward-pass only.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The assumption that sparse features extracted by an SAE trained on unlabeled in-domain activations contain task-relevant information that can be reliably turned into an effective steering vector without any labeled data or optimization at test time.","pith_extraction_headline":"Visual Sparse Steering extracts a steering vector from SAE features on unlabeled data to adapt CLIP models at test time and raise zero-shot accuracy by 1-4%."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2506.01247/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":1,"snapshot_sha256":"fd26c79d5ea213985780555d01c989f50b7d09bea09f6dc27dc35167fa4b8ee9"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"09872826-c8d1-4c8d-a805-f13d1b06c8d0"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-28T01:04:28Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"luHsS73BxUEnGK1zSj/B72nothRaogu8tUnhi7L6JOaliBfmVFv7sv+ZJoCs4LRkwRml3nDZAeXc1RrJtA4dCg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-02T04:18:09.819810Z"},"content_sha256":"4b296339b8155bed2c77435db1ff14bdd8f85473d8193782ab3818de36867f86","schema_version":"1.0","event_id":"sha256:4b296339b8155bed2c77435db1ff14bdd8f85473d8193782ab3818de36867f86"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/KF37V5QUANOISNQBGSURYCLID6/bundle.json","state_url":"https://pith.science/pith/KF37V5QUANOISNQBGSURYCLID6/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/KF37V5QUANOISNQBGSURYCLID6/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-02T04:18:09Z","links":{"resolver":"https://pith.science/pith/KF37V5QUANOISNQBGSURYCLID6","bundle":"https://pith.science/pith/KF37V5QUANOISNQBGSURYCLID6/bundle.json","state":"https://pith.science/pith/KF37V5QUANOISNQBGSURYCLID6/state.json","well_known_bundle":"https://pith.science/.well-known/pith/KF37V5QUANOISNQBGSURYCLID6/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:KF37V5QUANOISNQBGSURYCLID6","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"6f5c4b69b71d0f8723bd4a66d3d3ecdce6b9a42bb9bf40880c2660c610d64bb2","cross_cats_sorted":["cs.AI","cs.LG"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2025-06-02T01:51:20Z","title_canon_sha256":"6569d1084f929db00478bca025773fc5878573e280f030c727caa9835d7d046f"},"schema_version":"1.0","source":{"id":"2506.01247","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2506.01247","created_at":"2026-05-28T01:04:28Z"},{"alias_kind":"arxiv_version","alias_value":"2506.01247v3","created_at":"2026-05-28T01:04:28Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2506.01247","created_at":"2026-05-28T01:04:28Z"},{"alias_kind":"pith_short_12","alias_value":"KF37V5QUANOI","created_at":"2026-05-28T01:04:28Z"},{"alias_kind":"pith_short_16","alias_value":"KF37V5QUANOISNQB","created_at":"2026-05-28T01:04:28Z"},{"alias_kind":"pith_short_8","alias_value":"KF37V5QU","created_at":"2026-05-28T01:04:28Z"}],"graph_snapshots":[{"event_id":"sha256:4b296339b8155bed2c77435db1ff14bdd8f85473d8193782ab3818de36867f86","target":"graph","created_at":"2026-05-28T01:04:28Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Across CIFAR-100, CUB-200, and Tiny-ImageNet and two CLIP backbones, VS2 improves zero-shot top-1 accuracy by 3.45-4.12%, 0.93-1.08%, and 1.50-1.84% respectively, while remaining forward-only and adding minimal compute overhead."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The assumption that sparse features extracted by an SAE trained on unlabeled in-domain activations contain task-relevant information that can be reliably turned into an effective steering vector without any labeled data or optimization at test time."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"VS2 constructs steering vectors from sparse SAE features on unlabeled in-domain activations to improve zero-shot accuracy of CLIP models by 0.93-4.12% on CIFAR-100, CUB-200, and Tiny-ImageNet while remaining forward-pass only."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Visual Sparse Steering extracts a steering vector from SAE features on unlabeled data to adapt CLIP models at test time and raise zero-shot accuracy by 1-4%."}],"snapshot_sha256":"2ecdf7b5492c3482a1b7a7c8a1a375ee5043f7f7a95c5e62fd6cce1a54b572b2"},"formal_canon":{"evidence_count":1,"snapshot_sha256":"fd26c79d5ea213985780555d01c989f50b7d09bea09f6dc27dc35167fa4b8ee9"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2506.01247/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Sparse Autoencoders (SAEs) are increasingly used to interpret foundation models, but their role as an actionable intervention space remains less understood, especially in vision. We study whether sparse visual features can be used not only for post-hoc analysis, but also to steer frozen vision-language models. We introduce Visual Sparse Steering (VS2), a label-free method that trains a top-$k$ SAE on unlabeled activations from a frozen CLIP image encoder and, at test time, constructs an interpretable steering vector by amplifying the input's active sparse features and decoding the induced chan","authors_text":"Dimitris N. Metaxas, Gemma E. Moran, Gerasimos Chatzoudis, Hao Wang, Zhuowei Li","cross_cats":["cs.AI","cs.LG"],"headline":"Visual Sparse Steering extracts a steering vector from SAE features on unlabeled data to adapt CLIP models at test time and raise zero-shot accuracy by 1-4%.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2025-06-02T01:51:20Z","title":"Beyond Interpretability: When, Why, and How Sparse Autoencoders Enable Label-Free Visual Steering"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2506.01247","kind":"arxiv","version":3},"verdict":{"created_at":"2026-05-19T11:30:34.663503Z","id":"09872826-c8d1-4c8d-a805-f13d1b06c8d0","model_set":{"reader":"grok-4.3"},"one_line_summary":"VS2 constructs steering vectors from sparse SAE features on unlabeled in-domain activations to improve zero-shot accuracy of CLIP models by 0.93-4.12% on CIFAR-100, CUB-200, and Tiny-ImageNet while remaining forward-pass only.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Visual Sparse Steering extracts a steering vector from SAE features on unlabeled data to adapt CLIP models at test time and raise zero-shot accuracy by 1-4%.","strongest_claim":"Across CIFAR-100, CUB-200, and Tiny-ImageNet and two CLIP backbones, VS2 improves zero-shot top-1 accuracy by 3.45-4.12%, 0.93-1.08%, and 1.50-1.84% respectively, while remaining forward-only and adding minimal compute overhead.","weakest_assumption":"The assumption that sparse features extracted by an SAE trained on unlabeled in-domain activations contain task-relevant information that can be reliably turned into an effective steering vector without any labeled data or optimization at test time."}},"verdict_id":"09872826-c8d1-4c8d-a805-f13d1b06c8d0"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:681f63690fc98ff7eedb886f2d5c56588025987e20fcdaec87f85482e199d51a","target":"record","created_at":"2026-05-28T01:04:28Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"6f5c4b69b71d0f8723bd4a66d3d3ecdce6b9a42bb9bf40880c2660c610d64bb2","cross_cats_sorted":["cs.AI","cs.LG"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2025-06-02T01:51:20Z","title_canon_sha256":"6569d1084f929db00478bca025773fc5878573e280f030c727caa9835d7d046f"},"schema_version":"1.0","source":{"id":"2506.01247","kind":"arxiv","version":3}},"canonical_sha256":"5177faf614035c89360134a91c09681fa4f608e36a989d446fcb216cb2f8fd7d","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"5177faf614035c89360134a91c09681fa4f608e36a989d446fcb216cb2f8fd7d","first_computed_at":"2026-05-28T01:04:28.424360Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-28T01:04:28.424360Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"TCQC3xtX+EItL1tA/psjAPgCmLZvzuZ5Vo+/mQ71C8gxbfJleLf5vj1/6pJp5eOgAkrqD+FNNYA/yQ0Nh1crBA==","signature_status":"signed_v1","signed_at":"2026-05-28T01:04:28.424871Z","signed_message":"canonical_sha256_bytes"},"source_id":"2506.01247","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:681f63690fc98ff7eedb886f2d5c56588025987e20fcdaec87f85482e199d51a","sha256:4b296339b8155bed2c77435db1ff14bdd8f85473d8193782ab3818de36867f86"],"state_sha256":"9b5477db27d98117488a3f3e01db9d8fbe15d2740438d75aaf1efd069af8ab27"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"tmg5UT2SX1o1jd+WjURYZ8swa2FeriI3/16asthJXt3C8+FkVKxTDVdE7fi0vANoWCYM27GfFiUahVGhWyvuCQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-02T04:18:09.821911Z","bundle_sha256":"40518ebc91dac8aa29856cbf44e4994bcf40d2d78c6c3d81a15d0aa2ebcb087d"}}