{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:NEVAX5SDZXXLEG7I4U25MUOMTM","short_pith_number":"pith:NEVAX5SD","canonical_record":{"source":{"id":"2606.21197","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-19T08:08:43Z","cross_cats_sorted":["cs.AI","cs.LG"],"title_canon_sha256":"d5ba30a2d712f54a17f5306dc9bb420524bde0088a8c795c1a6ff3101876c178","abstract_canon_sha256":"4b4e7ca17a0313d3fb9b68d0deb7d981d3026f5a98be20d35970e702bc528404"},"schema_version":"1.0"},"canonical_sha256":"692a0bf643cdeeb21be8e535d651cc9b0eecb559fef478e74b5f9fc3bdbc95d9","source":{"kind":"arxiv","id":"2606.21197","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.21197","created_at":"2026-06-23T01:12:33Z"},{"alias_kind":"arxiv_version","alias_value":"2606.21197v1","created_at":"2026-06-23T01:12:33Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.21197","created_at":"2026-06-23T01:12:33Z"},{"alias_kind":"pith_short_12","alias_value":"NEVAX5SDZXXL","created_at":"2026-06-23T01:12:33Z"},{"alias_kind":"pith_short_16","alias_value":"NEVAX5SDZXXLEG7I","created_at":"2026-06-23T01:12:33Z"},{"alias_kind":"pith_short_8","alias_value":"NEVAX5SD","created_at":"2026-06-23T01:12:33Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:NEVAX5SDZXXLEG7I4U25MUOMTM","target":"record","payload":{"canonical_record":{"source":{"id":"2606.21197","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-19T08:08:43Z","cross_cats_sorted":["cs.AI","cs.LG"],"title_canon_sha256":"d5ba30a2d712f54a17f5306dc9bb420524bde0088a8c795c1a6ff3101876c178","abstract_canon_sha256":"4b4e7ca17a0313d3fb9b68d0deb7d981d3026f5a98be20d35970e702bc528404"},"schema_version":"1.0"},"canonical_sha256":"692a0bf643cdeeb21be8e535d651cc9b0eecb559fef478e74b5f9fc3bdbc95d9","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-23T01:12:33.122216Z","signature_b64":"iVnkBVjuTTup6PiyxG1esKBQ5GCLeqdtrh7PAAfaU1O6YyR2kakvoChER1J9fLDI+sRjR16Mg3wDw/D+UbxOBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"692a0bf643cdeeb21be8e535d651cc9b0eecb559fef478e74b5f9fc3bdbc95d9","last_reissued_at":"2026-06-23T01:12:33.121738Z","signature_status":"signed_v1","first_computed_at":"2026-06-23T01:12:33.121738Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2606.21197","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-23T01:12:33Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Y0G31nrODb/OZkxkfzZgufmPIwCL0iV06z5W9CzM4YpJI/t056i52NQoBJnzHwjkVp4v8fUMV1zvysf5ijGFAQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-29T08:47:08.090658Z"},"content_sha256":"81024d87944d64cb234185f1b684cd28b62f6741d44e11da14885355f3d9fad3","schema_version":"1.0","event_id":"sha256:81024d87944d64cb234185f1b684cd28b62f6741d44e11da14885355f3d9fad3"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:NEVAX5SDZXXLEG7I4U25MUOMTM","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Extraction and Analysis of Multimodal Concepts in Vision Language Models through Sparse Autoencoders","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.LG"],"primary_cat":"cs.CV","authors_text":"Jae Hee Lee, Sergio Lanza, Stefan Wermter","submitted_at":"2026-06-19T08:08:43Z","abstract_excerpt":"Vision Language Models (VLMs) have demonstrated impressive performance in tasks requiring joint understanding of images and text, such as image captioning and Visual Question Answering (VQA), but our understanding of their internal processes remains limited. Recently, Sparse Autoencoders (SAEs) have emerged as a promising tool to support the interpretation of concepts encoded in VLMs. However, most SAE-based approaches focus only on textual or visual concepts separately, ignoring multimodal concepts.\n  This limitation hinders a comprehensive understanding of VLMs, since concepts that integrate"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.21197","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.21197/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-23T01:12:33Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"6ru9liysn8ReRbe2M3n4iz8VDPOn/KLHbFUg8CthX0gTP9sKl3ROODA7QKdghiiF/dLORECjoMpyww3R77ApAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-29T08:47:08.091035Z"},"content_sha256":"6774fe73c94fd39105b3784a6536900f6f91a8d41f0ec470ec8de58630ac1aa6","schema_version":"1.0","event_id":"sha256:6774fe73c94fd39105b3784a6536900f6f91a8d41f0ec470ec8de58630ac1aa6"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/NEVAX5SDZXXLEG7I4U25MUOMTM/bundle.json","state_url":"https://pith.science/pith/NEVAX5SDZXXLEG7I4U25MUOMTM/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/NEVAX5SDZXXLEG7I4U25MUOMTM/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-29T08:47:08Z","links":{"resolver":"https://pith.science/pith/NEVAX5SDZXXLEG7I4U25MUOMTM","bundle":"https://pith.science/pith/NEVAX5SDZXXLEG7I4U25MUOMTM/bundle.json","state":"https://pith.science/pith/NEVAX5SDZXXLEG7I4U25MUOMTM/state.json","well_known_bundle":"https://pith.science/.well-known/pith/NEVAX5SDZXXLEG7I4U25MUOMTM/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:NEVAX5SDZXXLEG7I4U25MUOMTM","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"4b4e7ca17a0313d3fb9b68d0deb7d981d3026f5a98be20d35970e702bc528404","cross_cats_sorted":["cs.AI","cs.LG"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-19T08:08:43Z","title_canon_sha256":"d5ba30a2d712f54a17f5306dc9bb420524bde0088a8c795c1a6ff3101876c178"},"schema_version":"1.0","source":{"id":"2606.21197","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.21197","created_at":"2026-06-23T01:12:33Z"},{"alias_kind":"arxiv_version","alias_value":"2606.21197v1","created_at":"2026-06-23T01:12:33Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.21197","created_at":"2026-06-23T01:12:33Z"},{"alias_kind":"pith_short_12","alias_value":"NEVAX5SDZXXL","created_at":"2026-06-23T01:12:33Z"},{"alias_kind":"pith_short_16","alias_value":"NEVAX5SDZXXLEG7I","created_at":"2026-06-23T01:12:33Z"},{"alias_kind":"pith_short_8","alias_value":"NEVAX5SD","created_at":"2026-06-23T01:12:33Z"}],"graph_snapshots":[{"event_id":"sha256:6774fe73c94fd39105b3784a6536900f6f91a8d41f0ec470ec8de58630ac1aa6","target":"graph","created_at":"2026-06-23T01:12:33Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.21197/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Vision Language Models (VLMs) have demonstrated impressive performance in tasks requiring joint understanding of images and text, such as image captioning and Visual Question Answering (VQA), but our understanding of their internal processes remains limited. Recently, Sparse Autoencoders (SAEs) have emerged as a promising tool to support the interpretation of concepts encoded in VLMs. However, most SAE-based approaches focus only on textual or visual concepts separately, ignoring multimodal concepts.\n  This limitation hinders a comprehensive understanding of VLMs, since concepts that integrate","authors_text":"Jae Hee Lee, Sergio Lanza, Stefan Wermter","cross_cats":["cs.AI","cs.LG"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-19T08:08:43Z","title":"Extraction and Analysis of Multimodal Concepts in Vision Language Models through Sparse Autoencoders"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.21197","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:81024d87944d64cb234185f1b684cd28b62f6741d44e11da14885355f3d9fad3","target":"record","created_at":"2026-06-23T01:12:33Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"4b4e7ca17a0313d3fb9b68d0deb7d981d3026f5a98be20d35970e702bc528404","cross_cats_sorted":["cs.AI","cs.LG"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-19T08:08:43Z","title_canon_sha256":"d5ba30a2d712f54a17f5306dc9bb420524bde0088a8c795c1a6ff3101876c178"},"schema_version":"1.0","source":{"id":"2606.21197","kind":"arxiv","version":1}},"canonical_sha256":"692a0bf643cdeeb21be8e535d651cc9b0eecb559fef478e74b5f9fc3bdbc95d9","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"692a0bf643cdeeb21be8e535d651cc9b0eecb559fef478e74b5f9fc3bdbc95d9","first_computed_at":"2026-06-23T01:12:33.121738Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-23T01:12:33.121738Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"iVnkBVjuTTup6PiyxG1esKBQ5GCLeqdtrh7PAAfaU1O6YyR2kakvoChER1J9fLDI+sRjR16Mg3wDw/D+UbxOBA==","signature_status":"signed_v1","signed_at":"2026-06-23T01:12:33.122216Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.21197","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:81024d87944d64cb234185f1b684cd28b62f6741d44e11da14885355f3d9fad3","sha256:6774fe73c94fd39105b3784a6536900f6f91a8d41f0ec470ec8de58630ac1aa6"],"state_sha256":"81815fea68971decf8a3d7549d62a1dc273ad624b440587eadb36b2f94271319"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Rjdms8lKkoJyQnaHdtgWuD/5D2w5/MvqXZ21sgEI9WVJMEKJu/AET4epbDUAQY6TuM+Ak5AyrYZp/dpqoLXHBg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-29T08:47:08.093040Z","bundle_sha256":"3f637dbeadc873735954cc6f1a362752370e3ae11aaeb021e5867d9ca45a0194"}}