{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2024:IA2V3ZBSUPZJVBTXTVJ7MKDOQS","short_pith_number":"pith:IA2V3ZBS","canonical_record":{"source":{"id":"2404.14082","kind":"arxiv","version":3},"metadata":{"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.AI","submitted_at":"2024-04-22T11:01:51Z","cross_cats_sorted":[],"title_canon_sha256":"d09de4d41aef7c9cb9e614c76e9bcd3bc085b40b144c35c5feda348312f6f4d9","abstract_canon_sha256":"2ac7b25eafee0ebcde1ec8556a2bace039c5d31e1cd3289e24faedb5d18c2d12"},"schema_version":"1.0"},"canonical_sha256":"40355de432a3f29a86779d53f6286e84a66de360acc42cc846275ad9ae7a958d","source":{"kind":"arxiv","id":"2404.14082","version":3},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2404.14082","created_at":"2026-05-22T14:16:37Z"},{"alias_kind":"arxiv_version","alias_value":"2404.14082v3","created_at":"2026-05-22T14:16:37Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2404.14082","created_at":"2026-05-22T14:16:37Z"},{"alias_kind":"pith_short_12","alias_value":"IA2V3ZBSUPZJ","created_at":"2026-05-22T14:16:37Z"},{"alias_kind":"pith_short_16","alias_value":"IA2V3ZBSUPZJVBTX","created_at":"2026-05-22T14:16:37Z"},{"alias_kind":"pith_short_8","alias_value":"IA2V3ZBS","created_at":"2026-05-22T14:16:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2024:IA2V3ZBSUPZJVBTXTVJ7MKDOQS","target":"record","payload":{"canonical_record":{"source":{"id":"2404.14082","kind":"arxiv","version":3},"metadata":{"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.AI","submitted_at":"2024-04-22T11:01:51Z","cross_cats_sorted":[],"title_canon_sha256":"d09de4d41aef7c9cb9e614c76e9bcd3bc085b40b144c35c5feda348312f6f4d9","abstract_canon_sha256":"2ac7b25eafee0ebcde1ec8556a2bace039c5d31e1cd3289e24faedb5d18c2d12"},"schema_version":"1.0"},"canonical_sha256":"40355de432a3f29a86779d53f6286e84a66de360acc42cc846275ad9ae7a958d","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-22T14:16:37.904289Z","signature_b64":"cOHZIp2+IWtMtnBbg6PZcUxHFGCKOfa7E1ryefoKNS9oyok/gX5ZhHp58hBYdtBSbCq6ntCPxsYyjViG7EoIDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"40355de432a3f29a86779d53f6286e84a66de360acc42cc846275ad9ae7a958d","last_reissued_at":"2026-05-22T14:16:37.901546Z","signature_status":"signed_v1","first_computed_at":"2026-05-22T14:16:37.901546Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2404.14082","source_version":3,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-22T14:16:37Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"l8ikTj8Aol74dhBGkqwJQhFf3RiaiHJAQlUyoor4dJWmBjrHxBtqOiGyslBj61xtnZpqzSeXAxY6qLVf69+pDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T15:39:14.126122Z"},"content_sha256":"c41c18a1bbdca6f616b28dc40f2172225122c5cf947cd31e20d5c0521899f7cc","schema_version":"1.0","event_id":"sha256:c41c18a1bbdca6f616b28dc40f2172225122c5cf947cd31e20d5c0521899f7cc"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2024:IA2V3ZBSUPZJVBTXTVJ7MKDOQS","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Mechanistic Interpretability for AI Safety -- A Review","license":"http://creativecommons.org/licenses/by-sa/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Efstratios Gavves, Leonard Bereska","submitted_at":"2024-04-22T11:01:51Z","abstract_excerpt":"Understanding AI systems' inner workings is critical for ensuring value alignment and safety. This review explores mechanistic interpretability: reverse engineering the computational mechanisms and representations learned by neural networks into human-understandable algorithms and concepts to provide a granular, causal understanding. We establish foundational concepts such as features encoding knowledge within neural activations and hypotheses about their representation and computation. We survey methodologies for causally dissecting model behaviors and assess the relevance of mechanistic inte"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2404.14082","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2404.14082/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-22T14:16:37Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"kkyX7AwtGru4l5wGDD51tHysnAx4mWvT7Yxn1N4agu3oq9Dpwug9N0Ve8UWVhEIAyglSSt1EjkesubAtBDHSAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T15:39:14.126910Z"},"content_sha256":"affe5f97c87c4ac2117da94ecc43496e3e9196c81bd9c365ed654ca0a225b221","schema_version":"1.0","event_id":"sha256:affe5f97c87c4ac2117da94ecc43496e3e9196c81bd9c365ed654ca0a225b221"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/IA2V3ZBSUPZJVBTXTVJ7MKDOQS/bundle.json","state_url":"https://pith.science/pith/IA2V3ZBSUPZJVBTXTVJ7MKDOQS/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/IA2V3ZBSUPZJVBTXTVJ7MKDOQS/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-27T15:39:14Z","links":{"resolver":"https://pith.science/pith/IA2V3ZBSUPZJVBTXTVJ7MKDOQS","bundle":"https://pith.science/pith/IA2V3ZBSUPZJVBTXTVJ7MKDOQS/bundle.json","state":"https://pith.science/pith/IA2V3ZBSUPZJVBTXTVJ7MKDOQS/state.json","well_known_bundle":"https://pith.science/.well-known/pith/IA2V3ZBSUPZJVBTXTVJ7MKDOQS/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2024:IA2V3ZBSUPZJVBTXTVJ7MKDOQS","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"2ac7b25eafee0ebcde1ec8556a2bace039c5d31e1cd3289e24faedb5d18c2d12","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.AI","submitted_at":"2024-04-22T11:01:51Z","title_canon_sha256":"d09de4d41aef7c9cb9e614c76e9bcd3bc085b40b144c35c5feda348312f6f4d9"},"schema_version":"1.0","source":{"id":"2404.14082","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2404.14082","created_at":"2026-05-22T14:16:37Z"},{"alias_kind":"arxiv_version","alias_value":"2404.14082v3","created_at":"2026-05-22T14:16:37Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2404.14082","created_at":"2026-05-22T14:16:37Z"},{"alias_kind":"pith_short_12","alias_value":"IA2V3ZBSUPZJ","created_at":"2026-05-22T14:16:37Z"},{"alias_kind":"pith_short_16","alias_value":"IA2V3ZBSUPZJVBTX","created_at":"2026-05-22T14:16:37Z"},{"alias_kind":"pith_short_8","alias_value":"IA2V3ZBS","created_at":"2026-05-22T14:16:37Z"}],"graph_snapshots":[{"event_id":"sha256:affe5f97c87c4ac2117da94ecc43496e3e9196c81bd9c365ed654ca0a225b221","target":"graph","created_at":"2026-05-22T14:16:37Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2404.14082/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Understanding AI systems' inner workings is critical for ensuring value alignment and safety. This review explores mechanistic interpretability: reverse engineering the computational mechanisms and representations learned by neural networks into human-understandable algorithms and concepts to provide a granular, causal understanding. We establish foundational concepts such as features encoding knowledge within neural activations and hypotheses about their representation and computation. We survey methodologies for causally dissecting model behaviors and assess the relevance of mechanistic inte","authors_text":"Efstratios Gavves, Leonard Bereska","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.AI","submitted_at":"2024-04-22T11:01:51Z","title":"Mechanistic Interpretability for AI Safety -- A Review"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2404.14082","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:c41c18a1bbdca6f616b28dc40f2172225122c5cf947cd31e20d5c0521899f7cc","target":"record","created_at":"2026-05-22T14:16:37Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"2ac7b25eafee0ebcde1ec8556a2bace039c5d31e1cd3289e24faedb5d18c2d12","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.AI","submitted_at":"2024-04-22T11:01:51Z","title_canon_sha256":"d09de4d41aef7c9cb9e614c76e9bcd3bc085b40b144c35c5feda348312f6f4d9"},"schema_version":"1.0","source":{"id":"2404.14082","kind":"arxiv","version":3}},"canonical_sha256":"40355de432a3f29a86779d53f6286e84a66de360acc42cc846275ad9ae7a958d","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"40355de432a3f29a86779d53f6286e84a66de360acc42cc846275ad9ae7a958d","first_computed_at":"2026-05-22T14:16:37.901546Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-22T14:16:37.901546Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"cOHZIp2+IWtMtnBbg6PZcUxHFGCKOfa7E1ryefoKNS9oyok/gX5ZhHp58hBYdtBSbCq6ntCPxsYyjViG7EoIDQ==","signature_status":"signed_v1","signed_at":"2026-05-22T14:16:37.904289Z","signed_message":"canonical_sha256_bytes"},"source_id":"2404.14082","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:c41c18a1bbdca6f616b28dc40f2172225122c5cf947cd31e20d5c0521899f7cc","sha256:affe5f97c87c4ac2117da94ecc43496e3e9196c81bd9c365ed654ca0a225b221"],"state_sha256":"0d208f87539d50721f56f316c27b998b039c0c53aab33a1fe1eb6312a9ddcafe"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"s0ZDdYHK/6JVdNPgrFmKLNj+82F+e3Vdr4qOZ4MOekEhI02rV+5lDQCe5UWyWc8Ju89zNsVs8wbwXaiJZ1EMBQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-27T15:39:14.130749Z","bundle_sha256":"b197feb4c2b53ef7366cdebec8dc1c2ce7e7e9014eda049d7fad418f2d091615"}}