{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2024:2AZN4T3IWWHUL2Z2HS2QUGTQK2","short_pith_number":"pith:2AZN4T3I","schema_version":"1.0","canonical_sha256":"d032de4f68b58f45eb3a3cb50a1a7056af15fe195c74d2401ade832940e11fe5","source":{"kind":"arxiv","id":"2406.09187","version":3},"attestation_state":"computed","paper":{"title":"GuardAgent: Safeguard LLM Agents by a Guard Agent via Knowledge-Enabled Reasoning","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Bo Li, Carl Yang, Chulin Xie, Dawn Song, Han Xie, Jiawei Zhang, Junyuan Hong, Linzhi Zheng, Qinbin Li, Yanjie Li, Zhen Xiang, Zidi Xiong","submitted_at":"2024-06-13T14:49:26Z","abstract_excerpt":"The rapid advancement of large language model (LLM) agents has raised new concerns regarding their safety and security. In this paper, we propose GuardAgent, the first guardrail agent to protect target agents by dynamically checking whether their actions satisfy given safety guard requests. Specifically, GuardAgent first analyzes the safety guard requests to generate a task plan, and then maps this plan into guardrail code for execution. By performing the code execution, GuardAgent can deterministically follow the safety guard request and safeguard target agents. In both steps, an LLM is utili"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2406.09187","kind":"arxiv","version":3},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2024-06-13T14:49:26Z","cross_cats_sorted":[],"title_canon_sha256":"07c4cdd18810533b08dc1b9e4481ad6fda860ff0b013ea4f7a340e9617993759","abstract_canon_sha256":"eaa3461325f7ccca9410d7664fa9835a76907e85ffceb8fd6a5449e7761e75ac"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-21T01:52:24.270754Z","signature_b64":"FP3Q5lHDjDkfWbT20arlx68re34RjLcIOIj60n6DF409Q7KQfiwfXbbffx3m3Bb4PY/OJBhHNN2ev6GkkcnwBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"d032de4f68b58f45eb3a3cb50a1a7056af15fe195c74d2401ade832940e11fe5","last_reissued_at":"2026-05-21T01:52:24.267505Z","signature_status":"signed_v1","first_computed_at":"2026-05-21T01:52:24.267505Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"GuardAgent: Safeguard LLM Agents by a Guard Agent via Knowledge-Enabled Reasoning","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Bo Li, Carl Yang, Chulin Xie, Dawn Song, Han Xie, Jiawei Zhang, Junyuan Hong, Linzhi Zheng, Qinbin Li, Yanjie Li, Zhen Xiang, Zidi Xiong","submitted_at":"2024-06-13T14:49:26Z","abstract_excerpt":"The rapid advancement of large language model (LLM) agents has raised new concerns regarding their safety and security. In this paper, we propose GuardAgent, the first guardrail agent to protect target agents by dynamically checking whether their actions satisfy given safety guard requests. Specifically, GuardAgent first analyzes the safety guard requests to generate a task plan, and then maps this plan into guardrail code for execution. By performing the code execution, GuardAgent can deterministically follow the safety guard request and safeguard target agents. In both steps, an LLM is utili"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2406.09187","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2406.09187/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2406.09187","created_at":"2026-05-21T01:52:24.267629+00:00"},{"alias_kind":"arxiv_version","alias_value":"2406.09187v3","created_at":"2026-05-21T01:52:24.267629+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2406.09187","created_at":"2026-05-21T01:52:24.267629+00:00"},{"alias_kind":"pith_short_12","alias_value":"2AZN4T3IWWHU","created_at":"2026-05-21T01:52:24.267629+00:00"},{"alias_kind":"pith_short_16","alias_value":"2AZN4T3IWWHUL2Z2","created_at":"2026-05-21T01:52:24.267629+00:00"},{"alias_kind":"pith_short_8","alias_value":"2AZN4T3I","created_at":"2026-05-21T01:52:24.267629+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":21,"internal_anchor_count":21,"sample":[{"citing_arxiv_id":"2605.16265","citing_title":"AgentWall: A Runtime Safety Layer for Local AI Agents","ref_index":3,"is_internal_anchor":true},{"citing_arxiv_id":"2605.16282","citing_title":"Taxonomy and Consistency Analysis of Safety Benchmarks for AI Agents","ref_index":56,"is_internal_anchor":true},{"citing_arxiv_id":"2605.17380","citing_title":"ADR: An Agentic Detection System for Enterprise Agentic AI Security","ref_index":31,"is_internal_anchor":true},{"citing_arxiv_id":"2605.19940","citing_title":"Robotics-Inspired Guardrails for Foundation Models in Socially Sensitive Domains","ref_index":52,"is_internal_anchor":true},{"citing_arxiv_id":"2605.20173","citing_title":"A Methodology for Selecting and Composing Runtime Architecture Patterns for Production LLM Agents","ref_index":34,"is_internal_anchor":true},{"citing_arxiv_id":"2509.06921","citing_title":"Neuro-Symbolic AI for Cybersecurity: State of the Art, Challenges, and Opportunities","ref_index":161,"is_internal_anchor":true},{"citing_arxiv_id":"2510.23883","citing_title":"Agentic AI Security: Threats, Defenses, Evaluation, and Open Challenges","ref_index":205,"is_internal_anchor":true},{"citing_arxiv_id":"2601.18842","citing_title":"GUIGuard-Bench: Toward a General Evaluation for Privacy-Preserving GUI Agents","ref_index":27,"is_internal_anchor":true},{"citing_arxiv_id":"2602.16708","citing_title":"Formal Policy Enforcement for Real-World Agentic Systems","ref_index":65,"is_internal_anchor":true},{"citing_arxiv_id":"2603.00991","citing_title":"Tracking Capabilities for Safer Agents","ref_index":80,"is_internal_anchor":true},{"citing_arxiv_id":"2503.18666","citing_title":"AgentSpec: Customizable Runtime Enforcement for Safe and Reliable LLM Agents","ref_index":46,"is_internal_anchor":true},{"citing_arxiv_id":"2410.09024","citing_title":"AgentHarm: A Benchmark for Measuring Harmfulness of LLM Agents","ref_index":29,"is_internal_anchor":true},{"citing_arxiv_id":"2605.11039","citing_title":"The Granularity Mismatch in Agent Security: Argument-Level Provenance Solves Enforcement and Isolates the LLM Reasoning Bottleneck","ref_index":29,"is_internal_anchor":true},{"citing_arxiv_id":"2605.09278","citing_title":"EquiMem: Calibrating Shared Memory in Multi-Agent Debate via Game-Theoretic Equilibrium","ref_index":81,"is_internal_anchor":true},{"citing_arxiv_id":"2604.25562","citing_title":"SnapGuard: Lightweight Prompt Injection Detection for Screenshot-Based Web Agents","ref_index":49,"is_internal_anchor":true},{"citing_arxiv_id":"2605.05704","citing_title":"SafeHarbor: Hierarchical Memory-Augmented Guardrail for LLM Agent Safety","ref_index":8,"is_internal_anchor":true},{"citing_arxiv_id":"2605.00741","citing_title":"Self-Adaptive Multi-Agent LLM-Based Security Pattern Selection for IoT Systems","ref_index":57,"is_internal_anchor":true},{"citing_arxiv_id":"2604.07833","citing_title":"Harnessing Embodied Agents: Runtime Governance for Policy-Constrained Execution","ref_index":46,"is_internal_anchor":true},{"citing_arxiv_id":"2604.05119","citing_title":"Governance-Aware Agent Telemetry for Closed-Loop Enforcement in Multi-Agent AI Systems","ref_index":16,"is_internal_anchor":true},{"citing_arxiv_id":"2604.15505","citing_title":"PolicyBank: Evolving Policy Understanding for LLM Agents","ref_index":1,"is_internal_anchor":true},{"citing_arxiv_id":"2604.15579","citing_title":"Symbolic Guardrails for Domain-Specific Agents: Stronger Safety and Security Guarantees Without Sacrificing Utility","ref_index":72,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/2AZN4T3IWWHUL2Z2HS2QUGTQK2","json":"https://pith.science/pith/2AZN4T3IWWHUL2Z2HS2QUGTQK2.json","graph_json":"https://pith.science/api/pith-number/2AZN4T3IWWHUL2Z2HS2QUGTQK2/graph.json","events_json":"https://pith.science/api/pith-number/2AZN4T3IWWHUL2Z2HS2QUGTQK2/events.json","paper":"https://pith.science/paper/2AZN4T3I"},"agent_actions":{"view_html":"https://pith.science/pith/2AZN4T3IWWHUL2Z2HS2QUGTQK2","download_json":"https://pith.science/pith/2AZN4T3IWWHUL2Z2HS2QUGTQK2.json","view_paper":"https://pith.science/paper/2AZN4T3I","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2406.09187&json=true","fetch_graph":"https://pith.science/api/pith-number/2AZN4T3IWWHUL2Z2HS2QUGTQK2/graph.json","fetch_events":"https://pith.science/api/pith-number/2AZN4T3IWWHUL2Z2HS2QUGTQK2/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/2AZN4T3IWWHUL2Z2HS2QUGTQK2/action/timestamp_anchor","attest_storage":"https://pith.science/pith/2AZN4T3IWWHUL2Z2HS2QUGTQK2/action/storage_attestation","attest_author":"https://pith.science/pith/2AZN4T3IWWHUL2Z2HS2QUGTQK2/action/author_attestation","sign_citation":"https://pith.science/pith/2AZN4T3IWWHUL2Z2HS2QUGTQK2/action/citation_signature","submit_replication":"https://pith.science/pith/2AZN4T3IWWHUL2Z2HS2QUGTQK2/action/replication_record"}},"created_at":"2026-05-21T01:52:24.267629+00:00","updated_at":"2026-05-21T01:52:24.267629+00:00"}