{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:3QL3UTVI5NNSJZEZL6K43HKMTT","short_pith_number":"pith:3QL3UTVI","schema_version":"1.0","canonical_sha256":"dc17ba4ea8eb5b24e4995f95cd9d4c9ce260c28cb46dede4643183e7a1c32ee4","source":{"kind":"arxiv","id":"2510.03992","version":2},"attestation_state":"computed","paper":{"title":"Quantitative Certification of Agentic Tool Selection","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CR","authors_text":"Gagandeep Singh, Isha Chaudhary, Jehyeok Yeon","submitted_at":"2025-10-05T01:50:34Z","abstract_excerpt":"Large language models (LLMs) are increasingly deployed in agentic systems, where a fundamental task is mapping user intents to relevant external tools. Errors in tool selection can have severe outcomes, such as unauthorized data access, even without modifying the agent's underlying model. Existing evaluations measure performance on curated, benign benchmarks. However, a pipeline's behavior in deployment depends on the tool pool the agent actually encounters, which in open registries is shaped by third parties. We introduce LLMCert-T, the first statistical framework that returns \\textbf{high-co"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2510.03992","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CR","submitted_at":"2025-10-05T01:50:34Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"f617f75f7fa7a9b8796c3437efc2168f5bd4b588f349b29edb8652e96fff990b","abstract_canon_sha256":"6f4033db1833d3fb25cc6fb95d594485bd84f91343d095a1507edfe5f876a9e5"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T02:44:33.143030Z","signature_b64":"j0XK52q9PQz1QtaYXC9l94HQjOWRtTNUB4fXov4uraRtcJy3FJzHZ0UwudAD8vWqznwObO8ijR4WX9BPBIZcAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"dc17ba4ea8eb5b24e4995f95cd9d4c9ce260c28cb46dede4643183e7a1c32ee4","last_reissued_at":"2026-05-18T02:44:33.142463Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T02:44:33.142463Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Quantitative Certification of Agentic Tool Selection","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CR","authors_text":"Gagandeep Singh, Isha Chaudhary, Jehyeok Yeon","submitted_at":"2025-10-05T01:50:34Z","abstract_excerpt":"Large language models (LLMs) are increasingly deployed in agentic systems, where a fundamental task is mapping user intents to relevant external tools. Errors in tool selection can have severe outcomes, such as unauthorized data access, even without modifying the agent's underlying model. Existing evaluations measure performance on curated, benign benchmarks. However, a pipeline's behavior in deployment depends on the tool pool the agent actually encounters, which in open registries is shaped by third parties. We introduce LLMCert-T, the first statistical framework that returns \\textbf{high-co"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2510.03992","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2510.03992","created_at":"2026-05-18T02:44:33.142559+00:00"},{"alias_kind":"arxiv_version","alias_value":"2510.03992v2","created_at":"2026-05-18T02:44:33.142559+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2510.03992","created_at":"2026-05-18T02:44:33.142559+00:00"},{"alias_kind":"pith_short_12","alias_value":"3QL3UTVI5NNS","created_at":"2026-05-18T12:33:37.589309+00:00"},{"alias_kind":"pith_short_16","alias_value":"3QL3UTVI5NNSJZEZ","created_at":"2026-05-18T12:33:37.589309+00:00"},{"alias_kind":"pith_short_8","alias_value":"3QL3UTVI","created_at":"2026-05-18T12:33:37.589309+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":2,"internal_anchor_count":2,"sample":[{"citing_arxiv_id":"2512.05439","citing_title":"BEAVER: An Efficient Deterministic LLM Verifier","ref_index":59,"is_internal_anchor":true},{"citing_arxiv_id":"2605.11781","citing_title":"Five Attacks on x402 Agentic Payment Protocol","ref_index":33,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/3QL3UTVI5NNSJZEZL6K43HKMTT","json":"https://pith.science/pith/3QL3UTVI5NNSJZEZL6K43HKMTT.json","graph_json":"https://pith.science/api/pith-number/3QL3UTVI5NNSJZEZL6K43HKMTT/graph.json","events_json":"https://pith.science/api/pith-number/3QL3UTVI5NNSJZEZL6K43HKMTT/events.json","paper":"https://pith.science/paper/3QL3UTVI"},"agent_actions":{"view_html":"https://pith.science/pith/3QL3UTVI5NNSJZEZL6K43HKMTT","download_json":"https://pith.science/pith/3QL3UTVI5NNSJZEZL6K43HKMTT.json","view_paper":"https://pith.science/paper/3QL3UTVI","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2510.03992&json=true","fetch_graph":"https://pith.science/api/pith-number/3QL3UTVI5NNSJZEZL6K43HKMTT/graph.json","fetch_events":"https://pith.science/api/pith-number/3QL3UTVI5NNSJZEZL6K43HKMTT/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/3QL3UTVI5NNSJZEZL6K43HKMTT/action/timestamp_anchor","attest_storage":"https://pith.science/pith/3QL3UTVI5NNSJZEZL6K43HKMTT/action/storage_attestation","attest_author":"https://pith.science/pith/3QL3UTVI5NNSJZEZL6K43HKMTT/action/author_attestation","sign_citation":"https://pith.science/pith/3QL3UTVI5NNSJZEZL6K43HKMTT/action/citation_signature","submit_replication":"https://pith.science/pith/3QL3UTVI5NNSJZEZL6K43HKMTT/action/replication_record"}},"created_at":"2026-05-18T02:44:33.142559+00:00","updated_at":"2026-05-18T02:44:33.142559+00:00"}