{"paper":{"title":"Do Biological Structural Guarantees Earn Their Complexity?","license":"http://creativecommons.org/licenses/by/4.0/","headline":"Biological structural guarantees in AI agents require empirical tests to see if they earn their complexity.","cross_cats":["cs.AI"],"primary_cat":"q-bio.QM","authors_text":"Bogdan Banu","submitted_at":"2026-05-13T09:36:04Z","abstract_excerpt":"Biologically-inspired AI agent frameworks claim reliability benefits through structural guarantees adapted from gene regulatory networks, immune systems, and metabolic control. These claims are rarely tested empirically against simpler alternatives. We present three deep benchmarks: metabolic priority gating, autoinducer-based quorum sensing, and Bayesian stagnation detection, each comparing a biologically-grounded implementation against a naive non-biological alternative and an ablated control, across 1,000 trials per seed and 10 seeds (10M+ data points total)."},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Biologically-inspired AI agent frameworks claim reliability benefits through structural guarantees adapted from gene regulatory networks, immune systems, and metabolic control, but these claims are rarely tested empirically against simpler alternatives.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That the three chosen benchmarks (metabolic priority gating, autoinducer-based quorum sensing, and Bayesian stagnation detection) are fair and representative tests of the reliability benefits claimed for biological structural guarantees.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"Empirical head-to-head comparison of biologically-grounded AI agent implementations against naive alternatives and ablated controls in three benchmarks across 10 million data points.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Biological structural guarantees in AI agents require empirical tests to see if they earn their complexity.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"08490f593414391cf0dd10791c9dbd0fac7a21e070c2d644aef9fc1e82267fbf"},"source":{"id":"2605.15225","kind":"arxiv","version":1},"verdict":{"id":"c386bd04-1888-4830-808c-999483c885a2","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-19T17:49:48.026012Z","strongest_claim":"Biologically-inspired AI agent frameworks claim reliability benefits through structural guarantees adapted from gene regulatory networks, immune systems, and metabolic control, but these claims are rarely tested empirically against simpler alternatives.","one_line_summary":"Empirical head-to-head comparison of biologically-grounded AI agent implementations against naive alternatives and ablated controls in three benchmarks across 10 million data points.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That the three chosen benchmarks (metabolic priority gating, autoinducer-based quorum sensing, and Bayesian stagnation detection) are fair and representative tests of the reliability benefits claimed for biological structural guarantees.","pith_extraction_headline":"Biological structural guarantees in AI agents require empirical tests to see if they earn their complexity."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.15225/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"claim_evidence","ran_at":"2026-05-19T20:01:57.016676Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"doi_compliance","ran_at":"2026-05-19T18:01:22.856629Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"doi_title_agreement","ran_at":"2026-05-19T18:01:18.622276Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"ai_meta_artifact","ran_at":"2026-05-19T13:33:22.833519Z","status":"skipped","version":"1.0.0","findings_count":0}],"snapshot_sha256":"71deb3c3f246b58961f7d797cdd8ab6b42c5fe303158849d8bc8dbb8295a49f4"},"references":{"count":12,"sample":[{"doi":"","year":2026,"title":"Delegation in multi-agent systems: When and how can delegating to networked agents beat the single best agent?arXiv preprint arXiv:2603.26993, 2026","work_id":"a1afaa28-2fed-4f13-b712-da6ff7ec8a84","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2026,"title":"Convergence by composition: A structured adapter architecture for multi-agent system integration, 2026","work_id":"75227bba-b8d4-4069-8017-76a01a9888a8","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2026,"title":"Harness Engineering as Categorical Architecture","work_id":"f2aa3ca7-3d98-4e08-b220-ea8b4a7b882c","ref_index":3,"cited_arxiv_id":"2605.12239","is_internal_anchor":true},{"doi":"","year":2026,"title":"Operon: Biomimetic wiring diagrams for robust agentic systems.https: //github.com/coredipper/operon, 2026","work_id":"19fccba5-4b0d-48e4-a291-02a585c570ec","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":1999,"title":"Oxford University Press, 1999","work_id":"d173fe15-c04a-40d6-957a-d8589045f752","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":12,"snapshot_sha256":"bd39b4d57b235ab72d3f37455cd4c587b05cac08b13966723c84e963cf1f72fd","internal_anchors":4},"formal_canon":{"evidence_count":2,"snapshot_sha256":"45e0c29c24e4381ad2b6e941c66718f39be1177e64c71521e02d90dff8739798"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"}