{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:PTDQKWDY24N6XSFINRDDKJQB73","short_pith_number":"pith:PTDQKWDY","canonical_record":{"source":{"id":"2605.27922","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-05-27T03:47:35Z","cross_cats_sorted":[],"title_canon_sha256":"f971a7d77333b1f29b7e0846a1db5d0a0ac7d929a88edb1d86b3a4fb61a35f16","abstract_canon_sha256":"1a0ddf704ad5f95969647389623f38cbb3a5b7e3d843e7da99d053e9765df20e"},"schema_version":"1.0"},"canonical_sha256":"7cc7055878d71bebc8a86c46352601feec53fb3c633f46f0a68611b97acaaade","source":{"kind":"arxiv","id":"2605.27922","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.27922","created_at":"2026-05-28T01:04:52Z"},{"alias_kind":"arxiv_version","alias_value":"2605.27922v1","created_at":"2026-05-28T01:04:52Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.27922","created_at":"2026-05-28T01:04:52Z"},{"alias_kind":"pith_short_12","alias_value":"PTDQKWDY24N6","created_at":"2026-05-28T01:04:52Z"},{"alias_kind":"pith_short_16","alias_value":"PTDQKWDY24N6XSFI","created_at":"2026-05-28T01:04:52Z"},{"alias_kind":"pith_short_8","alias_value":"PTDQKWDY","created_at":"2026-05-28T01:04:52Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:PTDQKWDY24N6XSFINRDDKJQB73","target":"record","payload":{"canonical_record":{"source":{"id":"2605.27922","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-05-27T03:47:35Z","cross_cats_sorted":[],"title_canon_sha256":"f971a7d77333b1f29b7e0846a1db5d0a0ac7d929a88edb1d86b3a4fb61a35f16","abstract_canon_sha256":"1a0ddf704ad5f95969647389623f38cbb3a5b7e3d843e7da99d053e9765df20e"},"schema_version":"1.0"},"canonical_sha256":"7cc7055878d71bebc8a86c46352601feec53fb3c633f46f0a68611b97acaaade","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-28T01:04:52.629372Z","signature_b64":"QvduSHUDxgMwJUQD/QbtJpCj2g5b2HIi9bZLmNNM4IJzpCoAD+X5hPMe/fgnRbQfAwvh5mTRJ4amF1U8BX79CQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"7cc7055878d71bebc8a86c46352601feec53fb3c633f46f0a68611b97acaaade","last_reissued_at":"2026-05-28T01:04:52.628980Z","signature_status":"signed_v1","first_computed_at":"2026-05-28T01:04:52.628980Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.27922","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-28T01:04:52Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"OaB21ZEjckyUXbUWFMBS7LaPYJjdKY4Bdvd8wGQtFOOhyzwf0+gNUJAL2wayh+tquSBoXHQ6oQOugDGCf2YDBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-11T05:31:34.945758Z"},"content_sha256":"5ee3cfa272ce06133db001b76eab2a25f4e6c19e346e97e61ebf762a96c709a7","schema_version":"1.0","event_id":"sha256:5ee3cfa272ce06133db001b76eab2a25f4e6c19e346e97e61ebf762a96c709a7"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:PTDQKWDY24N6XSFINRDDKJQB73","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Harness-Bench: Measuring Harness Effects across Models in Realistic Agent Workflows","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Chao-Hsuan Liu, Guangxiang Zhao, Lin Sun, Tong Yang, Wenhan Yu, Xiangzheng Zhang, Xinyu Tan, Yaoming Li, Yilun Yao, Yuxuan Tian, Zhengyang Wang, Zhewen Tan","submitted_at":"2026-05-27T03:47:35Z","abstract_excerpt":"LLM agents are increasingly deployed as executable systems that use tools, modify workspaces, and produce concrete artifacts. In such workflows, performance depends not only on the base model, but also on the harness: the system layer that manages context, tools, state, constraints, permissions, tracing, and recovery. However, existing benchmarks typically abstract away execution, compare complete agent systems, or hold the harness fixed, making execution-layer variation difficult to study. We introduce Harness-Bench, a diagnostic benchmark for evaluating configuration-level harness effects in"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.27922","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.27922/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-28T01:04:52Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"xKsN78bgLH6VPCUNE7mZJkCPsThS2t7kTQrzt5tmsGDaKE4gEeHDqqBGUl/CjuykccuczTy00+phvYyAIEdFBQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-11T05:31:34.946534Z"},"content_sha256":"5110787eff5523bf648dd7be1765fb147cb99271fdc987fd351a149ec8707ee7","schema_version":"1.0","event_id":"sha256:5110787eff5523bf648dd7be1765fb147cb99271fdc987fd351a149ec8707ee7"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/PTDQKWDY24N6XSFINRDDKJQB73/bundle.json","state_url":"https://pith.science/pith/PTDQKWDY24N6XSFINRDDKJQB73/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/PTDQKWDY24N6XSFINRDDKJQB73/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-11T05:31:34Z","links":{"resolver":"https://pith.science/pith/PTDQKWDY24N6XSFINRDDKJQB73","bundle":"https://pith.science/pith/PTDQKWDY24N6XSFINRDDKJQB73/bundle.json","state":"https://pith.science/pith/PTDQKWDY24N6XSFINRDDKJQB73/state.json","well_known_bundle":"https://pith.science/.well-known/pith/PTDQKWDY24N6XSFINRDDKJQB73/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:PTDQKWDY24N6XSFINRDDKJQB73","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"1a0ddf704ad5f95969647389623f38cbb3a5b7e3d843e7da99d053e9765df20e","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-05-27T03:47:35Z","title_canon_sha256":"f971a7d77333b1f29b7e0846a1db5d0a0ac7d929a88edb1d86b3a4fb61a35f16"},"schema_version":"1.0","source":{"id":"2605.27922","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.27922","created_at":"2026-05-28T01:04:52Z"},{"alias_kind":"arxiv_version","alias_value":"2605.27922v1","created_at":"2026-05-28T01:04:52Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.27922","created_at":"2026-05-28T01:04:52Z"},{"alias_kind":"pith_short_12","alias_value":"PTDQKWDY24N6","created_at":"2026-05-28T01:04:52Z"},{"alias_kind":"pith_short_16","alias_value":"PTDQKWDY24N6XSFI","created_at":"2026-05-28T01:04:52Z"},{"alias_kind":"pith_short_8","alias_value":"PTDQKWDY","created_at":"2026-05-28T01:04:52Z"}],"graph_snapshots":[{"event_id":"sha256:5110787eff5523bf648dd7be1765fb147cb99271fdc987fd351a149ec8707ee7","target":"graph","created_at":"2026-05-28T01:04:52Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.27922/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"LLM agents are increasingly deployed as executable systems that use tools, modify workspaces, and produce concrete artifacts. In such workflows, performance depends not only on the base model, but also on the harness: the system layer that manages context, tools, state, constraints, permissions, tracing, and recovery. However, existing benchmarks typically abstract away execution, compare complete agent systems, or hold the harness fixed, making execution-layer variation difficult to study. We introduce Harness-Bench, a diagnostic benchmark for evaluating configuration-level harness effects in","authors_text":"Chao-Hsuan Liu, Guangxiang Zhao, Lin Sun, Tong Yang, Wenhan Yu, Xiangzheng Zhang, Xinyu Tan, Yaoming Li, Yilun Yao, Yuxuan Tian, Zhengyang Wang, Zhewen Tan","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-05-27T03:47:35Z","title":"Harness-Bench: Measuring Harness Effects across Models in Realistic Agent Workflows"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.27922","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:5ee3cfa272ce06133db001b76eab2a25f4e6c19e346e97e61ebf762a96c709a7","target":"record","created_at":"2026-05-28T01:04:52Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"1a0ddf704ad5f95969647389623f38cbb3a5b7e3d843e7da99d053e9765df20e","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-05-27T03:47:35Z","title_canon_sha256":"f971a7d77333b1f29b7e0846a1db5d0a0ac7d929a88edb1d86b3a4fb61a35f16"},"schema_version":"1.0","source":{"id":"2605.27922","kind":"arxiv","version":1}},"canonical_sha256":"7cc7055878d71bebc8a86c46352601feec53fb3c633f46f0a68611b97acaaade","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"7cc7055878d71bebc8a86c46352601feec53fb3c633f46f0a68611b97acaaade","first_computed_at":"2026-05-28T01:04:52.628980Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-28T01:04:52.628980Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"QvduSHUDxgMwJUQD/QbtJpCj2g5b2HIi9bZLmNNM4IJzpCoAD+X5hPMe/fgnRbQfAwvh5mTRJ4amF1U8BX79CQ==","signature_status":"signed_v1","signed_at":"2026-05-28T01:04:52.629372Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.27922","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:5ee3cfa272ce06133db001b76eab2a25f4e6c19e346e97e61ebf762a96c709a7","sha256:5110787eff5523bf648dd7be1765fb147cb99271fdc987fd351a149ec8707ee7"],"state_sha256":"96e773be1bc82482ef0b3b8e1cd785477cc224abd16653f883d20ce9151be330"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"O0PhlZmi3otKrJvS5q2kj2cVwtMyxj7BU1vvcq9H7HJXOfbwxdC5bimrR8r5nnFqJ3on0F6FXpeGEuf0DRHyAg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-11T05:31:34.950075Z","bundle_sha256":"874f382a9719a6024367116d6c27795aa60ca6359e98cba39182b97cf5bc9db0"}}