{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:HSJLAE76WQUPOVJQBDMNXOMLXS","short_pith_number":"pith:HSJLAE76","schema_version":"1.0","canonical_sha256":"3c92b013feb428f7553008d8dbb98bbc80827887e6b6e4e7bfaa2f2cd59b0360","source":{"kind":"arxiv","id":"2606.06324","version":1},"attestation_state":"computed","paper":{"title":"From Failed Trajectories to Reliable LLM Agents: Diagnosing and Repairing Harness Flaws","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.MA"],"primary_cat":"cs.SE","authors_text":"Junjie Wang, Mengzhuo Chen, Qing Wang, Yawen Wang, Zhe Liu","submitted_at":"2026-06-04T15:58:30Z","abstract_excerpt":"LLM-based agents increasingly rely on harnesses that provide execution environments, tool interfaces, context, lifecycle orchestration, observability, verification, and governance. Existing self-improving agents and automatic harness evolution methods mainly improve agents through runtime supervision, prompt optimization, workflow search, or harness modification based on final outcomes. However, they often fail to diagnose where the responsible evidence lies in failed trajectories and which harness layer causes the unreliable behavior, resulting in broad, indirect, or poorly scoped changes. Th"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.06324","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.SE","submitted_at":"2026-06-04T15:58:30Z","cross_cats_sorted":["cs.MA"],"title_canon_sha256":"4737d15e6b5e92adaf542e5f0968f55c34c8de3b304de9a0f510a16eb9c4d350","abstract_canon_sha256":"ca003c1f24770c4dfcc4a2cd386971446953a223432c693fae1663990d22295c"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-05T01:15:41.951271Z","signature_b64":"YNl3HEbIg+FBGwVB82d3baS/0plHTxzQ6wtpDI158a/c22C03C1cWfdZFm5tTjBWzYHbs/ZsxNOJHFNTPSW9AQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"3c92b013feb428f7553008d8dbb98bbc80827887e6b6e4e7bfaa2f2cd59b0360","last_reissued_at":"2026-06-05T01:15:41.950855Z","signature_status":"signed_v1","first_computed_at":"2026-06-05T01:15:41.950855Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"From Failed Trajectories to Reliable LLM Agents: Diagnosing and Repairing Harness Flaws","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.MA"],"primary_cat":"cs.SE","authors_text":"Junjie Wang, Mengzhuo Chen, Qing Wang, Yawen Wang, Zhe Liu","submitted_at":"2026-06-04T15:58:30Z","abstract_excerpt":"LLM-based agents increasingly rely on harnesses that provide execution environments, tool interfaces, context, lifecycle orchestration, observability, verification, and governance. Existing self-improving agents and automatic harness evolution methods mainly improve agents through runtime supervision, prompt optimization, workflow search, or harness modification based on final outcomes. However, they often fail to diagnose where the responsible evidence lies in failed trajectories and which harness layer causes the unreliable behavior, resulting in broad, indirect, or poorly scoped changes. Th"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.06324","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.06324/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.06324","created_at":"2026-06-05T01:15:41.950918+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.06324v1","created_at":"2026-06-05T01:15:41.950918+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.06324","created_at":"2026-06-05T01:15:41.950918+00:00"},{"alias_kind":"pith_short_12","alias_value":"HSJLAE76WQUP","created_at":"2026-06-05T01:15:41.950918+00:00"},{"alias_kind":"pith_short_16","alias_value":"HSJLAE76WQUPOVJQ","created_at":"2026-06-05T01:15:41.950918+00:00"},{"alias_kind":"pith_short_8","alias_value":"HSJLAE76","created_at":"2026-06-05T01:15:41.950918+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/HSJLAE76WQUPOVJQBDMNXOMLXS","json":"https://pith.science/pith/HSJLAE76WQUPOVJQBDMNXOMLXS.json","graph_json":"https://pith.science/api/pith-number/HSJLAE76WQUPOVJQBDMNXOMLXS/graph.json","events_json":"https://pith.science/api/pith-number/HSJLAE76WQUPOVJQBDMNXOMLXS/events.json","paper":"https://pith.science/paper/HSJLAE76"},"agent_actions":{"view_html":"https://pith.science/pith/HSJLAE76WQUPOVJQBDMNXOMLXS","download_json":"https://pith.science/pith/HSJLAE76WQUPOVJQBDMNXOMLXS.json","view_paper":"https://pith.science/paper/HSJLAE76","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.06324&json=true","fetch_graph":"https://pith.science/api/pith-number/HSJLAE76WQUPOVJQBDMNXOMLXS/graph.json","fetch_events":"https://pith.science/api/pith-number/HSJLAE76WQUPOVJQBDMNXOMLXS/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/HSJLAE76WQUPOVJQBDMNXOMLXS/action/timestamp_anchor","attest_storage":"https://pith.science/pith/HSJLAE76WQUPOVJQBDMNXOMLXS/action/storage_attestation","attest_author":"https://pith.science/pith/HSJLAE76WQUPOVJQBDMNXOMLXS/action/author_attestation","sign_citation":"https://pith.science/pith/HSJLAE76WQUPOVJQBDMNXOMLXS/action/citation_signature","submit_replication":"https://pith.science/pith/HSJLAE76WQUPOVJQBDMNXOMLXS/action/replication_record"}},"created_at":"2026-06-05T01:15:41.950918+00:00","updated_at":"2026-06-05T01:15:41.950918+00:00"}