{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:DY7URUL7RBCNGYJR53GDZ4QGVG","short_pith_number":"pith:DY7URUL7","schema_version":"1.0","canonical_sha256":"1e3f48d17f8844d36131eecc3cf206a9b6e76e99d71e25fed94fcbceb0855cf1","source":{"kind":"arxiv","id":"2605.28876","version":1},"attestation_state":"computed","paper":{"title":"LogDx-CI: Benchmarking Log Reduction Tools for LLM Root-Cause Diagnosis","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.SE","authors_text":"Bowen Qin","submitted_at":"2026-05-26T06:34:47Z","abstract_excerpt":"CI failure logs are large (median 5k lines, max 200k in this corpus) and noisy. Coding agents that try to debug them depend on an upstream tool to reduce the log to a manageable context, but the field has had no public empirical comparison of which reductions preserve enough evidence for downstream LLM diagnosis. We introduce LogDx-CI, a benchmark that compares 11 context-reduction tools (raw, tail, grep, three RTK modes, two real LLM map-reduce summarizers, three hybrid routers) on 35 real GitHub Actions failure cases, scored by 3 LLM debugger families (Claude Haiku 4.5, Claude Sonnet 4.6, Op"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.28876","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.SE","submitted_at":"2026-05-26T06:34:47Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"a5857b37debd799fcf86caa3079277713df06732df055d9439e543d1c4656939","abstract_canon_sha256":"2aca489345b878a02346a31cffb0aa92eff708f929debd770a0b12088d9bd40f"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-29T00:04:15.008464Z","signature_b64":"wJCLLb0h+yxGa7EyxcTpYqE9eGIqsE2/aRZQwnHaol2qDrbeBvT/OFNCTrniEDWVHR4arzyshSjeC9Sk5ygJDw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"1e3f48d17f8844d36131eecc3cf206a9b6e76e99d71e25fed94fcbceb0855cf1","last_reissued_at":"2026-05-29T00:04:15.007715Z","signature_status":"signed_v1","first_computed_at":"2026-05-29T00:04:15.007715Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"LogDx-CI: Benchmarking Log Reduction Tools for LLM Root-Cause Diagnosis","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.SE","authors_text":"Bowen Qin","submitted_at":"2026-05-26T06:34:47Z","abstract_excerpt":"CI failure logs are large (median 5k lines, max 200k in this corpus) and noisy. Coding agents that try to debug them depend on an upstream tool to reduce the log to a manageable context, but the field has had no public empirical comparison of which reductions preserve enough evidence for downstream LLM diagnosis. We introduce LogDx-CI, a benchmark that compares 11 context-reduction tools (raw, tail, grep, three RTK modes, two real LLM map-reduce summarizers, three hybrid routers) on 35 real GitHub Actions failure cases, scored by 3 LLM debugger families (Claude Haiku 4.5, Claude Sonnet 4.6, Op"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.28876","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.28876/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.28876","created_at":"2026-05-29T00:04:15.007829+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.28876v1","created_at":"2026-05-29T00:04:15.007829+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.28876","created_at":"2026-05-29T00:04:15.007829+00:00"},{"alias_kind":"pith_short_12","alias_value":"DY7URUL7RBCN","created_at":"2026-05-29T00:04:15.007829+00:00"},{"alias_kind":"pith_short_16","alias_value":"DY7URUL7RBCNGYJR","created_at":"2026-05-29T00:04:15.007829+00:00"},{"alias_kind":"pith_short_8","alias_value":"DY7URUL7","created_at":"2026-05-29T00:04:15.007829+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/DY7URUL7RBCNGYJR53GDZ4QGVG","json":"https://pith.science/pith/DY7URUL7RBCNGYJR53GDZ4QGVG.json","graph_json":"https://pith.science/api/pith-number/DY7URUL7RBCNGYJR53GDZ4QGVG/graph.json","events_json":"https://pith.science/api/pith-number/DY7URUL7RBCNGYJR53GDZ4QGVG/events.json","paper":"https://pith.science/paper/DY7URUL7"},"agent_actions":{"view_html":"https://pith.science/pith/DY7URUL7RBCNGYJR53GDZ4QGVG","download_json":"https://pith.science/pith/DY7URUL7RBCNGYJR53GDZ4QGVG.json","view_paper":"https://pith.science/paper/DY7URUL7","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.28876&json=true","fetch_graph":"https://pith.science/api/pith-number/DY7URUL7RBCNGYJR53GDZ4QGVG/graph.json","fetch_events":"https://pith.science/api/pith-number/DY7URUL7RBCNGYJR53GDZ4QGVG/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/DY7URUL7RBCNGYJR53GDZ4QGVG/action/timestamp_anchor","attest_storage":"https://pith.science/pith/DY7URUL7RBCNGYJR53GDZ4QGVG/action/storage_attestation","attest_author":"https://pith.science/pith/DY7URUL7RBCNGYJR53GDZ4QGVG/action/author_attestation","sign_citation":"https://pith.science/pith/DY7URUL7RBCNGYJR53GDZ4QGVG/action/citation_signature","submit_replication":"https://pith.science/pith/DY7URUL7RBCNGYJR53GDZ4QGVG/action/replication_record"}},"created_at":"2026-05-29T00:04:15.007829+00:00","updated_at":"2026-05-29T00:04:15.007829+00:00"}