{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:QEGTRZSKM7TPLSA53H77I25ZDT","short_pith_number":"pith:QEGTRZSK","canonical_record":{"source":{"id":"2604.18309","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.SE","submitted_at":"2026-04-20T14:16:39Z","cross_cats_sorted":[],"title_canon_sha256":"cd7a2731dfd4788c8ba1214696a1ffbeff5d0e1a99fda2a2c02ccd7b0e4b98d8","abstract_canon_sha256":"896a0b51accb7ad6369fa5b7d6290a7ebfc7890c62a0e1bb4461c5c0e9330fb3"},"schema_version":"1.0"},"canonical_sha256":"810d38e64a67e6f5c81dd9fff46bb91cf94f839b823c465c08b967516ccff1b5","source":{"kind":"arxiv","id":"2604.18309","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2604.18309","created_at":"2026-05-21T01:05:19Z"},{"alias_kind":"arxiv_version","alias_value":"2604.18309v2","created_at":"2026-05-21T01:05:19Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.18309","created_at":"2026-05-21T01:05:19Z"},{"alias_kind":"pith_short_12","alias_value":"QEGTRZSKM7TP","created_at":"2026-05-21T01:05:19Z"},{"alias_kind":"pith_short_16","alias_value":"QEGTRZSKM7TPLSA5","created_at":"2026-05-21T01:05:19Z"},{"alias_kind":"pith_short_8","alias_value":"QEGTRZSK","created_at":"2026-05-21T01:05:19Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:QEGTRZSKM7TPLSA53H77I25ZDT","target":"record","payload":{"canonical_record":{"source":{"id":"2604.18309","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.SE","submitted_at":"2026-04-20T14:16:39Z","cross_cats_sorted":[],"title_canon_sha256":"cd7a2731dfd4788c8ba1214696a1ffbeff5d0e1a99fda2a2c02ccd7b0e4b98d8","abstract_canon_sha256":"896a0b51accb7ad6369fa5b7d6290a7ebfc7890c62a0e1bb4461c5c0e9330fb3"},"schema_version":"1.0"},"canonical_sha256":"810d38e64a67e6f5c81dd9fff46bb91cf94f839b823c465c08b967516ccff1b5","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-21T01:05:19.331007Z","signature_b64":"UahQo+NVTyaXoXN+ZfzVo2sz1XaRdnCS9zxbCrm3LWybhW/wHH+WcYNjMyWHI2qawTp0RtCHUkszJ6IF4YMxCw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"810d38e64a67e6f5c81dd9fff46bb91cf94f839b823c465c08b967516ccff1b5","last_reissued_at":"2026-05-21T01:05:19.330590Z","signature_status":"signed_v1","first_computed_at":"2026-05-21T01:05:19.330590Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2604.18309","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-21T01:05:19Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"QQmKDFAfY6VRwJFUPlLJz0MklGu1L7Bj0k1KNuk/6iLqeXd6cIIpLJ3nYfwaBiGYv1WUAI/9Vfe8NQb3OO63AA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T04:54:25.383051Z"},"content_sha256":"91ccab3fee842e1cbb9016bdca296437fc131e52853863d7497d81ac93319b6b","schema_version":"1.0","event_id":"sha256:91ccab3fee842e1cbb9016bdca296437fc131e52853863d7497d81ac93319b6b"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:QEGTRZSKM7TPLSA53H77I25ZDT","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"From Program Slices to Causal Clarity: Evaluating Faithful, Actionable LLM-Generated Failure Explanations via Context Partitioning and LLM-as-a-Judge","license":"http://creativecommons.org/licenses/by/4.0/","headline":"Varying the composition of debugging context causally changes the quality of LLM-generated failure explanations, with targeted artifacts yielding better causal and actionable insights than large undifferentiated contexts.","cross_cats":[],"primary_cat":"cs.SE","authors_text":"Christian Medeiros Adriano, Germany), Holger Giese (Hasso Plattner Institute, Julius Porbeck, University of Potsdam","submitted_at":"2026-04-20T14:16:39Z","abstract_excerpt":"Large language model (LLM)-based debugging systems can generate failure explanations, but these explanations may be incomplete or incorrect. Misleading explanations are harmful for downstream tasks (e.g., bug triage, bug fixing). We investigate how explanation quality is affected by various LLM context configurations. Existing work predominantly treats LLM-generated failure explanations as an ad hoc by-product of debugging or repair workflows, using generic prompting over undifferentiated artifacts such as code, tests, and error messages rather than targeting explanations as a first-class outp"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Our results indicate that explanation quality is causally affected by context composition. Evidence-rich, failure-specific artifacts improve causal and action-oriented quality, whereas overly large contexts tend to yield vague explanations. Higher explanation-score quartiles are associated with higher downstream repair pass rates and, for some models, with fixes that are closer to the reference minimal fixes.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That the six evaluation criteria and LLM-as-a-judge scores faithfully reflect true causal and actionable quality, and that the 93 context configurations plus the chosen real bugs are representative enough to support general claims about context effects.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"Focused, failure-specific contexts such as program slices produce more causal and actionable LLM bug explanations than large undifferentiated contexts, and higher-quality explanations correlate with better downstream repair success rates.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Varying the composition of debugging context causally changes the quality of LLM-generated failure explanations, with targeted artifacts yielding better causal and actionable insights than large undifferentiated contexts.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"a05c117bc4b3205c24b1f0179f5d85910c3e8d2e7cb7f705309e85d0dafe5b28"},"source":{"id":"2604.18309","kind":"arxiv","version":2},"verdict":{"id":"4d7d44e7-21c9-45e9-b059-193a2366edc3","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-10T04:27:26.058727Z","strongest_claim":"Our results indicate that explanation quality is causally affected by context composition. Evidence-rich, failure-specific artifacts improve causal and action-oriented quality, whereas overly large contexts tend to yield vague explanations. Higher explanation-score quartiles are associated with higher downstream repair pass rates and, for some models, with fixes that are closer to the reference minimal fixes.","one_line_summary":"Focused, failure-specific contexts such as program slices produce more causal and actionable LLM bug explanations than large undifferentiated contexts, and higher-quality explanations correlate with better downstream repair success rates.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That the six evaluation criteria and LLM-as-a-judge scores faithfully reflect true causal and actionable quality, and that the 93 context configurations plus the chosen real bugs are representative enough to support general claims about context effects.","pith_extraction_headline":"Varying the composition of debugging context causally changes the quality of LLM-generated failure explanations, with targeted artifacts yielding better causal and actionable insights than large undifferentiated contexts."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2604.18309/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"doi_compliance","ran_at":"2026-05-20T04:11:47.108435Z","status":"completed","version":"1.0.0","findings_count":0}],"snapshot_sha256":"caaeec3a2a216b89840bf7adcb30f3bc483c9470c70c87389212f5404529c64c"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"4d7d44e7-21c9-45e9-b059-193a2366edc3"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-21T01:05:19Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"din4KsP7YmwS+4ME9ODCDsnzMR+tNetvHCQsPXU/KdNJ6dPr+6wKklaQZxx8+2qPgHGlTvghrnYsKMuT4sotCw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T04:54:25.384041Z"},"content_sha256":"29ace426fa52dfdb4325df710e67e1a448f51c6bc10a631de8d5e0fab4f60ad4","schema_version":"1.0","event_id":"sha256:29ace426fa52dfdb4325df710e67e1a448f51c6bc10a631de8d5e0fab4f60ad4"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/QEGTRZSKM7TPLSA53H77I25ZDT/bundle.json","state_url":"https://pith.science/pith/QEGTRZSKM7TPLSA53H77I25ZDT/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/QEGTRZSKM7TPLSA53H77I25ZDT/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-26T04:54:25Z","links":{"resolver":"https://pith.science/pith/QEGTRZSKM7TPLSA53H77I25ZDT","bundle":"https://pith.science/pith/QEGTRZSKM7TPLSA53H77I25ZDT/bundle.json","state":"https://pith.science/pith/QEGTRZSKM7TPLSA53H77I25ZDT/state.json","well_known_bundle":"https://pith.science/.well-known/pith/QEGTRZSKM7TPLSA53H77I25ZDT/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:QEGTRZSKM7TPLSA53H77I25ZDT","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"896a0b51accb7ad6369fa5b7d6290a7ebfc7890c62a0e1bb4461c5c0e9330fb3","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.SE","submitted_at":"2026-04-20T14:16:39Z","title_canon_sha256":"cd7a2731dfd4788c8ba1214696a1ffbeff5d0e1a99fda2a2c02ccd7b0e4b98d8"},"schema_version":"1.0","source":{"id":"2604.18309","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2604.18309","created_at":"2026-05-21T01:05:19Z"},{"alias_kind":"arxiv_version","alias_value":"2604.18309v2","created_at":"2026-05-21T01:05:19Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.18309","created_at":"2026-05-21T01:05:19Z"},{"alias_kind":"pith_short_12","alias_value":"QEGTRZSKM7TP","created_at":"2026-05-21T01:05:19Z"},{"alias_kind":"pith_short_16","alias_value":"QEGTRZSKM7TPLSA5","created_at":"2026-05-21T01:05:19Z"},{"alias_kind":"pith_short_8","alias_value":"QEGTRZSK","created_at":"2026-05-21T01:05:19Z"}],"graph_snapshots":[{"event_id":"sha256:29ace426fa52dfdb4325df710e67e1a448f51c6bc10a631de8d5e0fab4f60ad4","target":"graph","created_at":"2026-05-21T01:05:19Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Our results indicate that explanation quality is causally affected by context composition. Evidence-rich, failure-specific artifacts improve causal and action-oriented quality, whereas overly large contexts tend to yield vague explanations. Higher explanation-score quartiles are associated with higher downstream repair pass rates and, for some models, with fixes that are closer to the reference minimal fixes."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That the six evaluation criteria and LLM-as-a-judge scores faithfully reflect true causal and actionable quality, and that the 93 context configurations plus the chosen real bugs are representative enough to support general claims about context effects."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"Focused, failure-specific contexts such as program slices produce more causal and actionable LLM bug explanations than large undifferentiated contexts, and higher-quality explanations correlate with better downstream repair success rates."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Varying the composition of debugging context causally changes the quality of LLM-generated failure explanations, with targeted artifacts yielding better causal and actionable insights than large undifferentiated contexts."}],"snapshot_sha256":"a05c117bc4b3205c24b1f0179f5d85910c3e8d2e7cb7f705309e85d0dafe5b28"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[{"findings_count":0,"name":"doi_compliance","ran_at":"2026-05-20T04:11:47.108435Z","status":"completed","version":"1.0.0"}],"endpoint":"/pith/2604.18309/integrity.json","findings":[],"snapshot_sha256":"caaeec3a2a216b89840bf7adcb30f3bc483c9470c70c87389212f5404529c64c","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Large language model (LLM)-based debugging systems can generate failure explanations, but these explanations may be incomplete or incorrect. Misleading explanations are harmful for downstream tasks (e.g., bug triage, bug fixing). We investigate how explanation quality is affected by various LLM context configurations. Existing work predominantly treats LLM-generated failure explanations as an ad hoc by-product of debugging or repair workflows, using generic prompting over undifferentiated artifacts such as code, tests, and error messages rather than targeting explanations as a first-class outp","authors_text":"Christian Medeiros Adriano, Germany), Holger Giese (Hasso Plattner Institute, Julius Porbeck, University of Potsdam","cross_cats":[],"headline":"Varying the composition of debugging context causally changes the quality of LLM-generated failure explanations, with targeted artifacts yielding better causal and actionable insights than large undifferentiated contexts.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.SE","submitted_at":"2026-04-20T14:16:39Z","title":"From Program Slices to Causal Clarity: Evaluating Faithful, Actionable LLM-Generated Failure Explanations via Context Partitioning and LLM-as-a-Judge"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2604.18309","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-10T04:27:26.058727Z","id":"4d7d44e7-21c9-45e9-b059-193a2366edc3","model_set":{"reader":"grok-4.3"},"one_line_summary":"Focused, failure-specific contexts such as program slices produce more causal and actionable LLM bug explanations than large undifferentiated contexts, and higher-quality explanations correlate with better downstream repair success rates.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Varying the composition of debugging context causally changes the quality of LLM-generated failure explanations, with targeted artifacts yielding better causal and actionable insights than large undifferentiated contexts.","strongest_claim":"Our results indicate that explanation quality is causally affected by context composition. Evidence-rich, failure-specific artifacts improve causal and action-oriented quality, whereas overly large contexts tend to yield vague explanations. Higher explanation-score quartiles are associated with higher downstream repair pass rates and, for some models, with fixes that are closer to the reference minimal fixes.","weakest_assumption":"That the six evaluation criteria and LLM-as-a-judge scores faithfully reflect true causal and actionable quality, and that the 93 context configurations plus the chosen real bugs are representative enough to support general claims about context effects."}},"verdict_id":"4d7d44e7-21c9-45e9-b059-193a2366edc3"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:91ccab3fee842e1cbb9016bdca296437fc131e52853863d7497d81ac93319b6b","target":"record","created_at":"2026-05-21T01:05:19Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"896a0b51accb7ad6369fa5b7d6290a7ebfc7890c62a0e1bb4461c5c0e9330fb3","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.SE","submitted_at":"2026-04-20T14:16:39Z","title_canon_sha256":"cd7a2731dfd4788c8ba1214696a1ffbeff5d0e1a99fda2a2c02ccd7b0e4b98d8"},"schema_version":"1.0","source":{"id":"2604.18309","kind":"arxiv","version":2}},"canonical_sha256":"810d38e64a67e6f5c81dd9fff46bb91cf94f839b823c465c08b967516ccff1b5","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"810d38e64a67e6f5c81dd9fff46bb91cf94f839b823c465c08b967516ccff1b5","first_computed_at":"2026-05-21T01:05:19.330590Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-21T01:05:19.330590Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"UahQo+NVTyaXoXN+ZfzVo2sz1XaRdnCS9zxbCrm3LWybhW/wHH+WcYNjMyWHI2qawTp0RtCHUkszJ6IF4YMxCw==","signature_status":"signed_v1","signed_at":"2026-05-21T01:05:19.331007Z","signed_message":"canonical_sha256_bytes"},"source_id":"2604.18309","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:91ccab3fee842e1cbb9016bdca296437fc131e52853863d7497d81ac93319b6b","sha256:29ace426fa52dfdb4325df710e67e1a448f51c6bc10a631de8d5e0fab4f60ad4"],"state_sha256":"d458b1e8bc7d451a7da618391667eb31a6e26c63c6b115cb371a8fa211c13a85"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"tNNlhAngdvujavbtrAhXtFyuMiUIRqhbvnwWDQ3/ZeMiB/EXGuKPE9Dl/Lgq7OYZ0klbBQ2pt4f3sj0v0g6VBw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-26T04:54:25.388589Z","bundle_sha256":"2cba1af251c7de4926d614fa990c26d10c17b5a641257202095e8e4979754302"}}