{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:6IV7I75EBOVETGY7TKL7GUAVOV","short_pith_number":"pith:6IV7I75E","canonical_record":{"source":{"id":"2605.06890","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-05-07T19:47:30Z","cross_cats_sorted":["cs.MA"],"title_canon_sha256":"9395ffb1953fc09f81fb0ddd480e4d129d1955cd4e21c6ca6ccc7d41dd643e8d","abstract_canon_sha256":"7bc38ef75e145719bbda9943dbdd8387887e39843ae163fd3d16e1e9ed558bb4"},"schema_version":"1.0"},"canonical_sha256":"f22bf47fa40baa499b1f9a97f350157571fb79107dc989c4b39993cc3cac4990","source":{"kind":"arxiv","id":"2605.06890","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.06890","created_at":"2026-05-22T01:03:19Z"},{"alias_kind":"arxiv_version","alias_value":"2605.06890v2","created_at":"2026-05-22T01:03:19Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.06890","created_at":"2026-05-22T01:03:19Z"},{"alias_kind":"pith_short_12","alias_value":"6IV7I75EBOVE","created_at":"2026-05-22T01:03:19Z"},{"alias_kind":"pith_short_16","alias_value":"6IV7I75EBOVETGY7","created_at":"2026-05-22T01:03:19Z"},{"alias_kind":"pith_short_8","alias_value":"6IV7I75E","created_at":"2026-05-22T01:03:19Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:6IV7I75EBOVETGY7TKL7GUAVOV","target":"record","payload":{"canonical_record":{"source":{"id":"2605.06890","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-05-07T19:47:30Z","cross_cats_sorted":["cs.MA"],"title_canon_sha256":"9395ffb1953fc09f81fb0ddd480e4d129d1955cd4e21c6ca6ccc7d41dd643e8d","abstract_canon_sha256":"7bc38ef75e145719bbda9943dbdd8387887e39843ae163fd3d16e1e9ed558bb4"},"schema_version":"1.0"},"canonical_sha256":"f22bf47fa40baa499b1f9a97f350157571fb79107dc989c4b39993cc3cac4990","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-22T01:03:19.956255Z","signature_b64":"dCrSOkfDN1pIQktloCzhZzSOR93Qg5zSJSlAwA3rxhCeqLej0adN3/djV0XWo3FhlBa42oWmoknMnZml83GMBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"f22bf47fa40baa499b1f9a97f350157571fb79107dc989c4b39993cc3cac4990","last_reissued_at":"2026-05-22T01:03:19.955494Z","signature_status":"signed_v1","first_computed_at":"2026-05-22T01:03:19.955494Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.06890","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-22T01:03:19Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"1xdKJEMEJh4lzoK2bX2FXHcGEzlUHEI+JMRvPIgiXF9ZLmeBeMkNBxSQa5WFQ5Q5onzcWU9O3U6rji5vdgdMCw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-29T18:20:55.642019Z"},"content_sha256":"e422dcb9beff45ef2dace75af9244d6e9bf688666ee57f8f26913793f7276f4a","schema_version":"1.0","event_id":"sha256:e422dcb9beff45ef2dace75af9244d6e9bf688666ee57f8f26913793f7276f4a"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:6IV7I75EBOVETGY7TKL7GUAVOV","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Beyond the Black Box: Interpretability of Agentic AI Tool Use","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"A toolkit of sparse autoencoders and linear probes can identify the internal features that drive tool-use decisions inside AI agents before they act.","cross_cats":["cs.MA"],"primary_cat":"cs.AI","authors_text":"Ariye Shater, Hariom Tatsat","submitted_at":"2026-05-07T19:47:30Z","abstract_excerpt":"AI agents are promising for high-stakes enterprise workflows, but dependable deployment remains limited because tool-use failures are difficult to diagnose and control. Agents may skip required tool calls, invoke tools unnecessarily, or take actions whose consequence becomes visible only after execution. Existing observability methods are mostly external: prompts reveal correlations, evaluations score outputs, and logs arrive only after the model has already acted. In long-horizon settings, these failures are especially costly because an early tool mistake can alter the rest of the trajectory,"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"By decomposing activations into sparse features, it identifies the internal layers and features most associated with tool decisions and tests their functional importance through feature ablation.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That the sparse features extracted by SAEs and the predictions from linear probes correspond to causally relevant internal representations of tool-use decisions rather than spurious correlations.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"A mechanistic interpretability toolkit with SAEs and probes enables pre-action inference of tool decisions in AI agents trained on function-calling trajectories.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"A toolkit of sparse autoencoders and linear probes can identify the internal features that drive tool-use decisions inside AI agents before they act.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"91549ee9599e07208fb0e08a84b341547c5d266aadd30befde987affaee6359e"},"source":{"id":"2605.06890","kind":"arxiv","version":2},"verdict":{"id":"dc75fb37-1a98-4b13-b456-17d1ff59dff4","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-11T01:12:09.997281Z","strongest_claim":"By decomposing activations into sparse features, it identifies the internal layers and features most associated with tool decisions and tests their functional importance through feature ablation.","one_line_summary":"A mechanistic interpretability toolkit with SAEs and probes enables pre-action inference of tool decisions in AI agents trained on function-calling trajectories.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That the sparse features extracted by SAEs and the predictions from linear probes correspond to causally relevant internal representations of tool-use decisions rather than spurious correlations.","pith_extraction_headline":"A toolkit of sparse autoencoders and linear probes can identify the internal features that drive tool-use decisions inside AI agents before they act."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.06890/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"claim_evidence","ran_at":"2026-05-20T12:02:03.672718Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"ai_meta_artifact","ran_at":"2026-05-20T06:41:06.333495Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"doi_title_agreement","ran_at":"2026-05-19T17:31:19.518942Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"doi_compliance","ran_at":"2026-05-19T12:19:57.092328Z","status":"completed","version":"1.0.0","findings_count":0}],"snapshot_sha256":"651427d9a7f94981ac462f83f2543f388e9393f9661be14195ffcd3cce826df4"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":2,"snapshot_sha256":"f854a256f40181a2b3dcb8b7dfd4e22f38dbe979420ff29b0c0cde7ba89cc23f"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"dc75fb37-1a98-4b13-b456-17d1ff59dff4"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-22T01:03:19Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"PoKzdoXQt/pyyG/PV+G+ks1btpXRvQghZ/8UJRE3/p7uMmkh0ajyy4nBF8UCYxZQVoDKAIS4FzhbRMNkKy2mBQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-29T18:20:55.642525Z"},"content_sha256":"e236f4f095750937b12a9f821cca657e9194cff2fd681d2df613dcacf2efbc5a","schema_version":"1.0","event_id":"sha256:e236f4f095750937b12a9f821cca657e9194cff2fd681d2df613dcacf2efbc5a"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/6IV7I75EBOVETGY7TKL7GUAVOV/bundle.json","state_url":"https://pith.science/pith/6IV7I75EBOVETGY7TKL7GUAVOV/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/6IV7I75EBOVETGY7TKL7GUAVOV/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-29T18:20:55Z","links":{"resolver":"https://pith.science/pith/6IV7I75EBOVETGY7TKL7GUAVOV","bundle":"https://pith.science/pith/6IV7I75EBOVETGY7TKL7GUAVOV/bundle.json","state":"https://pith.science/pith/6IV7I75EBOVETGY7TKL7GUAVOV/state.json","well_known_bundle":"https://pith.science/.well-known/pith/6IV7I75EBOVETGY7TKL7GUAVOV/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:6IV7I75EBOVETGY7TKL7GUAVOV","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"7bc38ef75e145719bbda9943dbdd8387887e39843ae163fd3d16e1e9ed558bb4","cross_cats_sorted":["cs.MA"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-05-07T19:47:30Z","title_canon_sha256":"9395ffb1953fc09f81fb0ddd480e4d129d1955cd4e21c6ca6ccc7d41dd643e8d"},"schema_version":"1.0","source":{"id":"2605.06890","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.06890","created_at":"2026-05-22T01:03:19Z"},{"alias_kind":"arxiv_version","alias_value":"2605.06890v2","created_at":"2026-05-22T01:03:19Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.06890","created_at":"2026-05-22T01:03:19Z"},{"alias_kind":"pith_short_12","alias_value":"6IV7I75EBOVE","created_at":"2026-05-22T01:03:19Z"},{"alias_kind":"pith_short_16","alias_value":"6IV7I75EBOVETGY7","created_at":"2026-05-22T01:03:19Z"},{"alias_kind":"pith_short_8","alias_value":"6IV7I75E","created_at":"2026-05-22T01:03:19Z"}],"graph_snapshots":[{"event_id":"sha256:e236f4f095750937b12a9f821cca657e9194cff2fd681d2df613dcacf2efbc5a","target":"graph","created_at":"2026-05-22T01:03:19Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"By decomposing activations into sparse features, it identifies the internal layers and features most associated with tool decisions and tests their functional importance through feature ablation."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That the sparse features extracted by SAEs and the predictions from linear probes correspond to causally relevant internal representations of tool-use decisions rather than spurious correlations."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"A mechanistic interpretability toolkit with SAEs and probes enables pre-action inference of tool decisions in AI agents trained on function-calling trajectories."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"A toolkit of sparse autoencoders and linear probes can identify the internal features that drive tool-use decisions inside AI agents before they act."}],"snapshot_sha256":"91549ee9599e07208fb0e08a84b341547c5d266aadd30befde987affaee6359e"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"f854a256f40181a2b3dcb8b7dfd4e22f38dbe979420ff29b0c0cde7ba89cc23f"},"integrity":{"available":true,"clean":true,"detectors_run":[{"findings_count":0,"name":"claim_evidence","ran_at":"2026-05-20T12:02:03.672718Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"ai_meta_artifact","ran_at":"2026-05-20T06:41:06.333495Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"doi_title_agreement","ran_at":"2026-05-19T17:31:19.518942Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"doi_compliance","ran_at":"2026-05-19T12:19:57.092328Z","status":"completed","version":"1.0.0"}],"endpoint":"/pith/2605.06890/integrity.json","findings":[],"snapshot_sha256":"651427d9a7f94981ac462f83f2543f388e9393f9661be14195ffcd3cce826df4","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"AI agents are promising for high-stakes enterprise workflows, but dependable deployment remains limited because tool-use failures are difficult to diagnose and control. Agents may skip required tool calls, invoke tools unnecessarily, or take actions whose consequence becomes visible only after execution. Existing observability methods are mostly external: prompts reveal correlations, evaluations score outputs, and logs arrive only after the model has already acted. In long-horizon settings, these failures are especially costly because an early tool mistake can alter the rest of the trajectory,","authors_text":"Ariye Shater, Hariom Tatsat","cross_cats":["cs.MA"],"headline":"A toolkit of sparse autoencoders and linear probes can identify the internal features that drive tool-use decisions inside AI agents before they act.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-05-07T19:47:30Z","title":"Beyond the Black Box: Interpretability of Agentic AI Tool Use"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.06890","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-11T01:12:09.997281Z","id":"dc75fb37-1a98-4b13-b456-17d1ff59dff4","model_set":{"reader":"grok-4.3"},"one_line_summary":"A mechanistic interpretability toolkit with SAEs and probes enables pre-action inference of tool decisions in AI agents trained on function-calling trajectories.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"A toolkit of sparse autoencoders and linear probes can identify the internal features that drive tool-use decisions inside AI agents before they act.","strongest_claim":"By decomposing activations into sparse features, it identifies the internal layers and features most associated with tool decisions and tests their functional importance through feature ablation.","weakest_assumption":"That the sparse features extracted by SAEs and the predictions from linear probes correspond to causally relevant internal representations of tool-use decisions rather than spurious correlations."}},"verdict_id":"dc75fb37-1a98-4b13-b456-17d1ff59dff4"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:e422dcb9beff45ef2dace75af9244d6e9bf688666ee57f8f26913793f7276f4a","target":"record","created_at":"2026-05-22T01:03:19Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"7bc38ef75e145719bbda9943dbdd8387887e39843ae163fd3d16e1e9ed558bb4","cross_cats_sorted":["cs.MA"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-05-07T19:47:30Z","title_canon_sha256":"9395ffb1953fc09f81fb0ddd480e4d129d1955cd4e21c6ca6ccc7d41dd643e8d"},"schema_version":"1.0","source":{"id":"2605.06890","kind":"arxiv","version":2}},"canonical_sha256":"f22bf47fa40baa499b1f9a97f350157571fb79107dc989c4b39993cc3cac4990","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"f22bf47fa40baa499b1f9a97f350157571fb79107dc989c4b39993cc3cac4990","first_computed_at":"2026-05-22T01:03:19.955494Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-22T01:03:19.955494Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"dCrSOkfDN1pIQktloCzhZzSOR93Qg5zSJSlAwA3rxhCeqLej0adN3/djV0XWo3FhlBa42oWmoknMnZml83GMBg==","signature_status":"signed_v1","signed_at":"2026-05-22T01:03:19.956255Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.06890","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:e422dcb9beff45ef2dace75af9244d6e9bf688666ee57f8f26913793f7276f4a","sha256:e236f4f095750937b12a9f821cca657e9194cff2fd681d2df613dcacf2efbc5a"],"state_sha256":"987019757d5baf5c05ac5a69542cf0fea0f6bec1f6c05ea51db87096dfa5ad64"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"x3ODUoDA8MJ46ncb3/igu5ef9hCOBFODpitb2f/rWpAi7Wu1dBNoYYXrbhNEBW9azfnSaV0x7OTPqJ/fslcOAA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-29T18:20:55.644768Z","bundle_sha256":"f0a623e2d9e1eb3e936768ab62232fe5f43c9aae2dd6019cd8c6c8d7c2a06c61"}}