{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:N4KZ2OBIRTN5N3JZNDSUEJ32MB","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"e07d5247f2ccda85101ec29a435e4016fbd108ec239017796ddec4550acc8d87","cross_cats_sorted":["cs.AI"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CR","submitted_at":"2026-05-14T18:00:30Z","title_canon_sha256":"0533a52c9c6ddc4216dc7de3c863b7574648ef8b55b1f10de75e9f6d13384a85"},"schema_version":"1.0","source":{"id":"2605.15281","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.15281","created_at":"2026-05-20T00:00:50Z"},{"alias_kind":"arxiv_version","alias_value":"2605.15281v1","created_at":"2026-05-20T00:00:50Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.15281","created_at":"2026-05-20T00:00:50Z"},{"alias_kind":"pith_short_12","alias_value":"N4KZ2OBIRTN5","created_at":"2026-05-20T00:00:50Z"},{"alias_kind":"pith_short_16","alias_value":"N4KZ2OBIRTN5N3JZ","created_at":"2026-05-20T00:00:50Z"},{"alias_kind":"pith_short_8","alias_value":"N4KZ2OBI","created_at":"2026-05-20T00:00:50Z"}],"graph_snapshots":[{"event_id":"sha256:24225d2f3a98f4a94b6ea75436e4c7d8ef28d5f09af68eb363797a94d8bb8001","target":"graph","created_at":"2026-05-20T00:00:50Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Evaluated across four production applications and 176 scenarios, the framework improves script generation success from 55% to 93%, achieves an 8x reduction in navigation failures, eliminates 80% of timing-related race conditions, and reduces test creation time by 75% compared to manual Selenium authoring. It detects 85% of authentication bypass vulnerabilities and 95% of input validation flaws with false positive rates below 12%."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The four production applications and 176 scenarios used for evaluation are representative of typical web applications and that natural-language descriptions of attack scenarios can be reliably mapped to complete OWASP-aligned probes without missing critical edge cases or introducing bias in the reported detection rates."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"AI agents generate and execute natural-language web tests with built-in security validation, raising success rates and cutting failures in production applications."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"An AI agent framework converts natural-language instructions into reliable web test scripts and OWASP-aligned security probes."}],"snapshot_sha256":"d80ed0b79a292823cf63faf10700447685d4807cbe96ea796bc8cc38507236d8"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[{"findings_count":0,"name":"doi_compliance","ran_at":"2026-05-19T15:31:34.045612Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"doi_title_agreement","ran_at":"2026-05-19T15:31:17.914015Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"claim_evidence","ran_at":"2026-05-19T14:41:54.250330Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"ai_meta_artifact","ran_at":"2026-05-19T13:33:22.797186Z","status":"skipped","version":"1.0.0"}],"endpoint":"/pith/2605.15281/integrity.json","findings":[],"snapshot_sha256":"7fc53c4f6aa1336730949671d7ca1605b0ca85d2951c6e833672f7c97ca01e84","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Modern web test suites rot. A UI refactor breaks locators, a timing change causes race conditions, and within weeks developers abandon the suite entirely. This paper presents an AI-driven autonomous testing framework that addresses these failure modes through five integrated strategies - navigation reliability, context-aware selector generation, post-generation validation, smart wait injection, and failure learning - implemented over a containerised worker architecture that decouples orchestration from long-running browser execution. Evaluated across four production applications and 176 scenar","authors_text":"Shrey Tyagi, Siva Rama Krishna Varma Bayyavarapu, Vinil Pasupuleti","cross_cats":["cs.AI"],"headline":"An AI agent framework converts natural-language instructions into reliable web test scripts and OWASP-aligned security probes.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CR","submitted_at":"2026-05-14T18:00:30Z","title":"Autonomous Intelligent Agents for Natural-Language-Driven Web Execution with Integrated Security Assurance"},"references":{"count":21,"internal_anchors":4,"resolved_work":21,"sample":[{"cited_arxiv_id":"2107.03374","doi":"","is_internal_anchor":true,"ref_index":1,"title":"Evaluating Large Language Models Trained on Code","work_id":"042493e9-b26f-4b4e-bbde-382072ca9b08","year":2021},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"Reducing Web Test Cases Aging by Means of Robust XPath Locators,","work_id":"96bf3264-554a-4cea-9bd7-5b0cabf0ce71","year":2014},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"OW ASP Testing Guide v4.2,","work_id":"8146fee6-f8de-4d6d-ada7-77a1549e15fc","year":2023},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"The Tangled Web: A Guide to Securing Modern Web Applications,","work_id":"1682fc12-7326-4fc6-8d78-ed18a161f7e7","year":2011},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"Enemy of the State: A State-Aware Black-Box Web Vulnerability Scanner,","work_id":"95da4900-58f1-471d-a9c1-03406e9a961f","year":2012}],"snapshot_sha256":"749340b1eb862ae44695e1e6bb8ee6504ddee1f5edd7d33b90b0402553ec9f5c"},"source":{"id":"2605.15281","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-19T15:18:20.196900Z","id":"763c3b3e-822a-4865-9c2a-ed9673d1eb6b","model_set":{"reader":"grok-4.3"},"one_line_summary":"AI agents generate and execute natural-language web tests with built-in security validation, raising success rates and cutting failures in production applications.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"An AI agent framework converts natural-language instructions into reliable web test scripts and OWASP-aligned security probes.","strongest_claim":"Evaluated across four production applications and 176 scenarios, the framework improves script generation success from 55% to 93%, achieves an 8x reduction in navigation failures, eliminates 80% of timing-related race conditions, and reduces test creation time by 75% compared to manual Selenium authoring. It detects 85% of authentication bypass vulnerabilities and 95% of input validation flaws with false positive rates below 12%.","weakest_assumption":"The four production applications and 176 scenarios used for evaluation are representative of typical web applications and that natural-language descriptions of attack scenarios can be reliably mapped to complete OWASP-aligned probes without missing critical edge cases or introducing bias in the reported detection rates."}},"verdict_id":"763c3b3e-822a-4865-9c2a-ed9673d1eb6b"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:6f84feee996773758efc3373edf5aca0a900e7dc8e223f26b82f2e57ee24971d","target":"record","created_at":"2026-05-20T00:00:50Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"e07d5247f2ccda85101ec29a435e4016fbd108ec239017796ddec4550acc8d87","cross_cats_sorted":["cs.AI"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CR","submitted_at":"2026-05-14T18:00:30Z","title_canon_sha256":"0533a52c9c6ddc4216dc7de3c863b7574648ef8b55b1f10de75e9f6d13384a85"},"schema_version":"1.0","source":{"id":"2605.15281","kind":"arxiv","version":1}},"canonical_sha256":"6f159d38288cdbd6ed3968e542277a606c3a9018c70d554fc56d073f3c7f8f89","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"6f159d38288cdbd6ed3968e542277a606c3a9018c70d554fc56d073f3c7f8f89","first_computed_at":"2026-05-20T00:00:50.510046Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-20T00:00:50.510046Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"4V2YDhrES556D+D7wIT3t2htISH+FVHIi3ZBlH7PrnRvCSORYWdfOA/SwR4lB80ry6LOMk4RxGAHoROUgku6AA==","signature_status":"signed_v1","signed_at":"2026-05-20T00:00:50.510780Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.15281","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:6f84feee996773758efc3373edf5aca0a900e7dc8e223f26b82f2e57ee24971d","sha256:24225d2f3a98f4a94b6ea75436e4c7d8ef28d5f09af68eb363797a94d8bb8001"],"state_sha256":"8e29962d89d63fb579ad9a4c508d295aa67d19b55291256b8a76458a7e59e960"}