{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:YDSBATVFRPYN5JTS7XUHBQFIUK","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"7f7fb96c53514b14edd2a12752b1ff05dd1015160ffb9aa1726cd07e62dd9701","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CR","submitted_at":"2026-05-28T15:39:43Z","title_canon_sha256":"e509491078bb856896a1094b683cb1d53890c1c6017e99f68bde6b58132a78a8"},"schema_version":"1.0","source":{"id":"2605.30096","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.30096","created_at":"2026-05-29T02:06:09Z"},{"alias_kind":"arxiv_version","alias_value":"2605.30096v1","created_at":"2026-05-29T02:06:09Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.30096","created_at":"2026-05-29T02:06:09Z"},{"alias_kind":"pith_short_12","alias_value":"YDSBATVFRPYN","created_at":"2026-05-29T02:06:09Z"},{"alias_kind":"pith_short_16","alias_value":"YDSBATVFRPYN5JTS","created_at":"2026-05-29T02:06:09Z"},{"alias_kind":"pith_short_8","alias_value":"YDSBATVF","created_at":"2026-05-29T02:06:09Z"}],"graph_snapshots":[{"event_id":"sha256:b774c8b4e295ac534a62648655fcbf60a100949eb684deaaac9222dffc292ad8","target":"graph","created_at":"2026-05-29T02:06:09Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.30096/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Large language models (LLMs) can autonomously conduct multi-stage cyber attacks, but the consistency of their offensive behavior under repeated trials remains unstudied. This work presents the first large-scale empirical measurement of LLM attack consistency: 400 autonomous penetration testing runs (4 models, 100 each) against an identical honeypot hosting OWASP Juice Shop and two additional vulnerable services, holding prompt, orchestrator, and target constant. No model emitted a content refusal that survived the orchestrator's one-shot authorization re-prompt at iterations 0-1. Claude Sonnet","authors_text":"Galip Tolga Erdem","cross_cats":["cs.AI"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CR","submitted_at":"2026-05-28T15:39:43Z","title":"How Reliable Are AI Attackers Against a Fixed Vulnerable Target? A 400-Run Empirical Study of LLM Penetration Testing Consistency"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.30096","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:5ac0a1bfc33967caadc4c5ab71c03614d35e750a907920b13f76551e401f798f","target":"record","created_at":"2026-05-29T02:06:09Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"7f7fb96c53514b14edd2a12752b1ff05dd1015160ffb9aa1726cd07e62dd9701","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CR","submitted_at":"2026-05-28T15:39:43Z","title_canon_sha256":"e509491078bb856896a1094b683cb1d53890c1c6017e99f68bde6b58132a78a8"},"schema_version":"1.0","source":{"id":"2605.30096","kind":"arxiv","version":1}},"canonical_sha256":"c0e4104ea58bf0dea672fde870c0a8a28e96c3d137b3b0f3c7d2882335f038e8","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"c0e4104ea58bf0dea672fde870c0a8a28e96c3d137b3b0f3c7d2882335f038e8","first_computed_at":"2026-05-29T02:06:09.771664Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-29T02:06:09.771664Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"InZjWIQdcqg6l6kOXMSk3z5dkfv+3s9Si3m+Ueqci3HjfYNYovE4azwTrmh9PphDRyC0zpgve7xpXjBCvinuAA==","signature_status":"signed_v1","signed_at":"2026-05-29T02:06:09.772039Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.30096","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:5ac0a1bfc33967caadc4c5ab71c03614d35e750a907920b13f76551e401f798f","sha256:b774c8b4e295ac534a62648655fcbf60a100949eb684deaaac9222dffc292ad8"],"state_sha256":"61a68670f79da06fd3b7477266f7c125c3805b718c05d61865bb3f52d26e13b0"}