{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:7ZUAV4UXT6NT6PWCKPN3ZH4Q4Z","short_pith_number":"pith:7ZUAV4UX","schema_version":"1.0","canonical_sha256":"fe680af2979f9b3f3ec253dbbc9f90e655bd4488c66a7fb4f81b6470bfddb612","source":{"kind":"arxiv","id":"2605.30777","version":1},"attestation_state":"computed","paper":{"title":"What Breaks When LLMs Code? Characterizing Operational Safety Failures of Agentic Code Assistants","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.SE","authors_text":"Alif Al Hasan, Sumon Biswas","submitted_at":"2026-05-29T03:09:37Z","abstract_excerpt":"Autonomous coding agents built on large language models (LLMs) are rapidly being integrated into development workflows, yet their operational safety properties remain poorly understood beyond evaluations of explicitly malicious inputs. In practice, high-impact failures arise during benign, goal-directed use through environment breakage, fabricated success reports, etc. that current benchmarks do not capture. What categories of operational safety failures actually occur when coding agents are used for everyday development tasks and what is their impact? We present an incident-driven empirical s"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.30777","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.SE","submitted_at":"2026-05-29T03:09:37Z","cross_cats_sorted":[],"title_canon_sha256":"c04efcfa83527d0d97482356b98925a8d188c36253d938e862a1c4d2a1ad7fbf","abstract_canon_sha256":"ccbbb89bef73fc20afe85158a58cb49e10e62abd939696896ce9e6e7923ab0f1"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-01T01:03:15.982704Z","signature_b64":"UA9mFmZUFRerfQKsnVSnIRJvosZdYRDxENyQ6rPvhuQiNxI3f6i+ob23Ti98MCjxPRWIce239C6+rZgxMfjtBw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"fe680af2979f9b3f3ec253dbbc9f90e655bd4488c66a7fb4f81b6470bfddb612","last_reissued_at":"2026-06-01T01:03:15.982102Z","signature_status":"signed_v1","first_computed_at":"2026-06-01T01:03:15.982102Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"What Breaks When LLMs Code? Characterizing Operational Safety Failures of Agentic Code Assistants","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.SE","authors_text":"Alif Al Hasan, Sumon Biswas","submitted_at":"2026-05-29T03:09:37Z","abstract_excerpt":"Autonomous coding agents built on large language models (LLMs) are rapidly being integrated into development workflows, yet their operational safety properties remain poorly understood beyond evaluations of explicitly malicious inputs. In practice, high-impact failures arise during benign, goal-directed use through environment breakage, fabricated success reports, etc. that current benchmarks do not capture. What categories of operational safety failures actually occur when coding agents are used for everyday development tasks and what is their impact? We present an incident-driven empirical s"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.30777","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.30777/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.30777","created_at":"2026-06-01T01:03:15.982200+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.30777v1","created_at":"2026-06-01T01:03:15.982200+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.30777","created_at":"2026-06-01T01:03:15.982200+00:00"},{"alias_kind":"pith_short_12","alias_value":"7ZUAV4UXT6NT","created_at":"2026-06-01T01:03:15.982200+00:00"},{"alias_kind":"pith_short_16","alias_value":"7ZUAV4UXT6NT6PWC","created_at":"2026-06-01T01:03:15.982200+00:00"},{"alias_kind":"pith_short_8","alias_value":"7ZUAV4UX","created_at":"2026-06-01T01:03:15.982200+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/7ZUAV4UXT6NT6PWCKPN3ZH4Q4Z","json":"https://pith.science/pith/7ZUAV4UXT6NT6PWCKPN3ZH4Q4Z.json","graph_json":"https://pith.science/api/pith-number/7ZUAV4UXT6NT6PWCKPN3ZH4Q4Z/graph.json","events_json":"https://pith.science/api/pith-number/7ZUAV4UXT6NT6PWCKPN3ZH4Q4Z/events.json","paper":"https://pith.science/paper/7ZUAV4UX"},"agent_actions":{"view_html":"https://pith.science/pith/7ZUAV4UXT6NT6PWCKPN3ZH4Q4Z","download_json":"https://pith.science/pith/7ZUAV4UXT6NT6PWCKPN3ZH4Q4Z.json","view_paper":"https://pith.science/paper/7ZUAV4UX","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.30777&json=true","fetch_graph":"https://pith.science/api/pith-number/7ZUAV4UXT6NT6PWCKPN3ZH4Q4Z/graph.json","fetch_events":"https://pith.science/api/pith-number/7ZUAV4UXT6NT6PWCKPN3ZH4Q4Z/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/7ZUAV4UXT6NT6PWCKPN3ZH4Q4Z/action/timestamp_anchor","attest_storage":"https://pith.science/pith/7ZUAV4UXT6NT6PWCKPN3ZH4Q4Z/action/storage_attestation","attest_author":"https://pith.science/pith/7ZUAV4UXT6NT6PWCKPN3ZH4Q4Z/action/author_attestation","sign_citation":"https://pith.science/pith/7ZUAV4UXT6NT6PWCKPN3ZH4Q4Z/action/citation_signature","submit_replication":"https://pith.science/pith/7ZUAV4UXT6NT6PWCKPN3ZH4Q4Z/action/replication_record"}},"created_at":"2026-06-01T01:03:15.982200+00:00","updated_at":"2026-06-01T01:03:15.982200+00:00"}