{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:PCSAMLJCKMHOI6ZGREEVSYXS4Y","short_pith_number":"pith:PCSAMLJC","schema_version":"1.0","canonical_sha256":"78a4062d22530ee47b2689095962f2e6036f51b4730297c0b76c1fed02911bae","source":{"kind":"arxiv","id":"2605.24137","version":1},"attestation_state":"computed","paper":{"title":"Empirical Analysis and Detection of Hallucinations in LLM-Generated Bug Report Summaries","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.SE","authors_text":"Abdallah Ayoub, Ahmad Abdel Latif, Gouri Ginde, Hinduja Nirujan, Shreyas Patil","submitted_at":"2026-05-22T18:55:46Z","abstract_excerpt":"Large Language Models (LLMs) are increasingly used to generate summaries of software bug reports, including sections such as Steps-to-Reproduce (S2R), Actual Behavior (AB), and Expected Behavior (EB). However, these models frequently produce hallucinations that can be convincing but unsupported by the source report. This can mislead developers and reduce trust in automated maintenance tools. Existing hallucination detection approaches typically evaluate outputs at the full-response level and do not consider the structure of technical documents. An initial exploratory study on 80 structured bug"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.24137","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.SE","submitted_at":"2026-05-22T18:55:46Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"59b2457da22f9ab291ecffb45a3d3f7c0daab29305c2626ca2193cfc877113a6","abstract_canon_sha256":"ec8b3a3367021170371bb8bca9cb646843ff6f7be4191fe3b24a575c284df2c3"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-26T01:02:48.182086Z","signature_b64":"tG/4txvxHGNQnYiutf9zVueUqerf6WoGYIFRqwhH87j2/6RaJPNdmSLLL7Ielmbcdh6FyfpZ/a7tIDcoqQ5cCw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"78a4062d22530ee47b2689095962f2e6036f51b4730297c0b76c1fed02911bae","last_reissued_at":"2026-05-26T01:02:48.181207Z","signature_status":"signed_v1","first_computed_at":"2026-05-26T01:02:48.181207Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Empirical Analysis and Detection of Hallucinations in LLM-Generated Bug Report Summaries","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.SE","authors_text":"Abdallah Ayoub, Ahmad Abdel Latif, Gouri Ginde, Hinduja Nirujan, Shreyas Patil","submitted_at":"2026-05-22T18:55:46Z","abstract_excerpt":"Large Language Models (LLMs) are increasingly used to generate summaries of software bug reports, including sections such as Steps-to-Reproduce (S2R), Actual Behavior (AB), and Expected Behavior (EB). However, these models frequently produce hallucinations that can be convincing but unsupported by the source report. This can mislead developers and reduce trust in automated maintenance tools. Existing hallucination detection approaches typically evaluate outputs at the full-response level and do not consider the structure of technical documents. An initial exploratory study on 80 structured bug"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.24137","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.24137/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.24137","created_at":"2026-05-26T01:02:48.181355+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.24137v1","created_at":"2026-05-26T01:02:48.181355+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.24137","created_at":"2026-05-26T01:02:48.181355+00:00"},{"alias_kind":"pith_short_12","alias_value":"PCSAMLJCKMHO","created_at":"2026-05-26T01:02:48.181355+00:00"},{"alias_kind":"pith_short_16","alias_value":"PCSAMLJCKMHOI6ZG","created_at":"2026-05-26T01:02:48.181355+00:00"},{"alias_kind":"pith_short_8","alias_value":"PCSAMLJC","created_at":"2026-05-26T01:02:48.181355+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/PCSAMLJCKMHOI6ZGREEVSYXS4Y","json":"https://pith.science/pith/PCSAMLJCKMHOI6ZGREEVSYXS4Y.json","graph_json":"https://pith.science/api/pith-number/PCSAMLJCKMHOI6ZGREEVSYXS4Y/graph.json","events_json":"https://pith.science/api/pith-number/PCSAMLJCKMHOI6ZGREEVSYXS4Y/events.json","paper":"https://pith.science/paper/PCSAMLJC"},"agent_actions":{"view_html":"https://pith.science/pith/PCSAMLJCKMHOI6ZGREEVSYXS4Y","download_json":"https://pith.science/pith/PCSAMLJCKMHOI6ZGREEVSYXS4Y.json","view_paper":"https://pith.science/paper/PCSAMLJC","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.24137&json=true","fetch_graph":"https://pith.science/api/pith-number/PCSAMLJCKMHOI6ZGREEVSYXS4Y/graph.json","fetch_events":"https://pith.science/api/pith-number/PCSAMLJCKMHOI6ZGREEVSYXS4Y/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/PCSAMLJCKMHOI6ZGREEVSYXS4Y/action/timestamp_anchor","attest_storage":"https://pith.science/pith/PCSAMLJCKMHOI6ZGREEVSYXS4Y/action/storage_attestation","attest_author":"https://pith.science/pith/PCSAMLJCKMHOI6ZGREEVSYXS4Y/action/author_attestation","sign_citation":"https://pith.science/pith/PCSAMLJCKMHOI6ZGREEVSYXS4Y/action/citation_signature","submit_replication":"https://pith.science/pith/PCSAMLJCKMHOI6ZGREEVSYXS4Y/action/replication_record"}},"created_at":"2026-05-26T01:02:48.181355+00:00","updated_at":"2026-05-26T01:02:48.181355+00:00"}