{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:M5LOKMGP6FHGYAWOSX23YLHQTS","short_pith_number":"pith:M5LOKMGP","schema_version":"1.0","canonical_sha256":"6756e530cff14e6c02ce95f5bc2cf09cbd74529bfd29e5cbd4eb28b5d8e3acbb","source":{"kind":"arxiv","id":"2605.17561","version":1},"attestation_state":"computed","paper":{"title":"Automated Root-Cause Subclassification and No-Code Fix Generation for Invalid Bug Reports","license":"http://creativecommons.org/licenses/by/4.0/","headline":"Large language models with retrieval and agent techniques can subclassify root causes of invalid bug reports and generate no-code fixes.","cross_cats":["cs.AI","cs.MA"],"primary_cat":"cs.SE","authors_text":"Emre Dinc, Eray Tuzun, Mahmut Furkan Gon, Tevfik Emre Sungur","submitted_at":"2026-05-17T17:45:13Z","abstract_excerpt":"Issues faced when using software are reported in the form of bug reports. However, many bug reports are invalid, meaning they do not require code changes, and are resolved with a no-code fix. Manually determining the root cause of the invalid bug reports and providing actionable resolutions by the customer support causes a serious waste of resources. Our goal is to introduce a standardized taxonomy for root-cause oriented invalid bug report subclassification, and perform experiments to test the accuracy of various approaches on invalid subclassification and no-code fix generation. We study how"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":true,"formal_links_present":true},"canonical_record":{"source":{"id":"2605.17561","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.SE","submitted_at":"2026-05-17T17:45:13Z","cross_cats_sorted":["cs.AI","cs.MA"],"title_canon_sha256":"27e08a3136ee2c5b59ef466e885e339e5464d65982514373f7f1246dd80b0a7f","abstract_canon_sha256":"2c87656401e1d2bb6ef3c6aa23d30ef10d86f2ca6af9035a6d04820ad9c5d51f"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:04:46.005525Z","signature_b64":"n/ItEAtwUOzWMyow3BJP66wJzNsDRY4nvFfg4aZ86TJh6dxFkCTT5Aobtv9BaoZXxRBrSYe3GcKMtVvN+FQzDw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"6756e530cff14e6c02ce95f5bc2cf09cbd74529bfd29e5cbd4eb28b5d8e3acbb","last_reissued_at":"2026-05-20T00:04:46.004490Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:04:46.004490Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Automated Root-Cause Subclassification and No-Code Fix Generation for Invalid Bug Reports","license":"http://creativecommons.org/licenses/by/4.0/","headline":"Large language models with retrieval and agent techniques can subclassify root causes of invalid bug reports and generate no-code fixes.","cross_cats":["cs.AI","cs.MA"],"primary_cat":"cs.SE","authors_text":"Emre Dinc, Eray Tuzun, Mahmut Furkan Gon, Tevfik Emre Sungur","submitted_at":"2026-05-17T17:45:13Z","abstract_excerpt":"Issues faced when using software are reported in the form of bug reports. However, many bug reports are invalid, meaning they do not require code changes, and are resolved with a no-code fix. Manually determining the root cause of the invalid bug reports and providing actionable resolutions by the customer support causes a serious waste of resources. Our goal is to introduce a standardized taxonomy for root-cause oriented invalid bug report subclassification, and perform experiments to test the accuracy of various approaches on invalid subclassification and no-code fix generation. We study how"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Retrieval augmented generation achieves the highest overall performance with 0.66 weighted F1 for subclassification of invalid bug reports, while agentic web search achieves the highest overall Judge LLM success rate at 68.9% for no-code fix generation.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The manually curated gold-standard benchmark accurately captures the distribution and labeling of invalid bug reports across real software projects and that the judge LLM evaluations align with human judgment of fix quality.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"RAG reaches 0.66 weighted F1 on invalid bug report subclassification while agentic web search reaches 68.9% judge success on no-code fix generation, using a new gold-standard benchmark.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Large language models with retrieval and agent techniques can subclassify root causes of invalid bug reports and generate no-code fixes.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"6f268288523bd1287478879f9faca4d6ce298799abd3033c50e61413449ba327"},"source":{"id":"2605.17561","kind":"arxiv","version":1},"verdict":{"id":"dfab004f-3cde-4d76-abe6-672b159f4517","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-19T22:21:35.727535Z","strongest_claim":"Retrieval augmented generation achieves the highest overall performance with 0.66 weighted F1 for subclassification of invalid bug reports, while agentic web search achieves the highest overall Judge LLM success rate at 68.9% for no-code fix generation.","one_line_summary":"RAG reaches 0.66 weighted F1 on invalid bug report subclassification while agentic web search reaches 68.9% judge success on no-code fix generation, using a new gold-standard benchmark.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The manually curated gold-standard benchmark accurately captures the distribution and labeling of invalid bug reports across real software projects and that the judge LLM evaluations align with human judgment of fix quality.","pith_extraction_headline":"Large language models with retrieval and agent techniques can subclassify root causes of invalid bug reports and generate no-code fixes."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.17561/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"doi_title_agreement","ran_at":"2026-05-19T22:31:19.573717Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"doi_compliance","ran_at":"2026-05-19T22:31:06.458360Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"ai_meta_artifact","ran_at":"2026-05-19T21:33:23.601114Z","status":"skipped","version":"1.0.0","findings_count":0},{"name":"claim_evidence","ran_at":"2026-05-19T21:21:57.533761Z","status":"completed","version":"1.0.0","findings_count":0}],"snapshot_sha256":"9aeca47f3bd7c6af25cc1dc66be10b019b979bafd70f83baccdd914dcd272989"},"references":{"count":61,"sample":[{"doi":"","year":2022,"title":"The cost of poor software quality in the us: A 2022 report,","work_id":"8a60f133-ab66-4a06-a500-83011a1c2679","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2025,"title":"(2025) Jira software: Issue and project tracking tool","work_id":"eee60a76-42ec-449c-8501-dda4a186e172","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2025,"title":"(2025) Github issues: Collaborative issue tracking platform","work_id":"a8c65ce2-6412-49bb-8f9b-4e501e1dab5c","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2020,"title":"Chaff from the wheat: Characterizing and determining valid bug reports,","work_id":"df350316-0416-4415-a1aa-3d159b45132a","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2023,"title":"A data-driven approach for understanding invalid bug reports: An industrial case study,","work_id":"20240247-ae02-4c21-9798-233519276a6e","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":61,"snapshot_sha256":"aa0eb68b13ef949c7a3dbb1c160bfdb754c457514d2e208fce7569083efd7e6f","internal_anchors":7},"formal_canon":{"evidence_count":2,"snapshot_sha256":"2f7f3f75b5d0868413fd153376578261c1bb481969ce020d4665a9e0f0aae059"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.17561","created_at":"2026-05-20T00:04:46.004632+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.17561v1","created_at":"2026-05-20T00:04:46.004632+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.17561","created_at":"2026-05-20T00:04:46.004632+00:00"},{"alias_kind":"pith_short_12","alias_value":"M5LOKMGP6FHG","created_at":"2026-05-20T00:04:46.004632+00:00"},{"alias_kind":"pith_short_16","alias_value":"M5LOKMGP6FHGYAWO","created_at":"2026-05-20T00:04:46.004632+00:00"},{"alias_kind":"pith_short_8","alias_value":"M5LOKMGP","created_at":"2026-05-20T00:04:46.004632+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":2,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/M5LOKMGP6FHGYAWOSX23YLHQTS","json":"https://pith.science/pith/M5LOKMGP6FHGYAWOSX23YLHQTS.json","graph_json":"https://pith.science/api/pith-number/M5LOKMGP6FHGYAWOSX23YLHQTS/graph.json","events_json":"https://pith.science/api/pith-number/M5LOKMGP6FHGYAWOSX23YLHQTS/events.json","paper":"https://pith.science/paper/M5LOKMGP"},"agent_actions":{"view_html":"https://pith.science/pith/M5LOKMGP6FHGYAWOSX23YLHQTS","download_json":"https://pith.science/pith/M5LOKMGP6FHGYAWOSX23YLHQTS.json","view_paper":"https://pith.science/paper/M5LOKMGP","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.17561&json=true","fetch_graph":"https://pith.science/api/pith-number/M5LOKMGP6FHGYAWOSX23YLHQTS/graph.json","fetch_events":"https://pith.science/api/pith-number/M5LOKMGP6FHGYAWOSX23YLHQTS/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/M5LOKMGP6FHGYAWOSX23YLHQTS/action/timestamp_anchor","attest_storage":"https://pith.science/pith/M5LOKMGP6FHGYAWOSX23YLHQTS/action/storage_attestation","attest_author":"https://pith.science/pith/M5LOKMGP6FHGYAWOSX23YLHQTS/action/author_attestation","sign_citation":"https://pith.science/pith/M5LOKMGP6FHGYAWOSX23YLHQTS/action/citation_signature","submit_replication":"https://pith.science/pith/M5LOKMGP6FHGYAWOSX23YLHQTS/action/replication_record"}},"created_at":"2026-05-20T00:04:46.004632+00:00","updated_at":"2026-05-20T00:04:46.004632+00:00"}