{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:DFVOPTSXAES7NMNBPBF3TIDNLG","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"f2fec50ba69be354d83cc489ec84f02ec6cc45f2ae4d2448c9709b1a6465a88a","cross_cats_sorted":["cs.CL"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-07-02T10:59:03Z","title_canon_sha256":"843406a31dc2adff7529fff32ba9f7b40a47fccd11dc94cfe5dbe5ebd8d4f8f1"},"schema_version":"1.0","source":{"id":"2607.02032","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2607.02032","created_at":"2026-07-03T01:17:37Z"},{"alias_kind":"arxiv_version","alias_value":"2607.02032v1","created_at":"2026-07-03T01:17:37Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2607.02032","created_at":"2026-07-03T01:17:37Z"},{"alias_kind":"pith_short_12","alias_value":"DFVOPTSXAES7","created_at":"2026-07-03T01:17:37Z"},{"alias_kind":"pith_short_16","alias_value":"DFVOPTSXAES7NMNB","created_at":"2026-07-03T01:17:37Z"},{"alias_kind":"pith_short_8","alias_value":"DFVOPTSX","created_at":"2026-07-03T01:17:37Z"}],"graph_snapshots":[{"event_id":"sha256:6b50a68f3a0f4d91025c2a946fd7e0b5e497ba436e5ce7c48ece699cdcc2156d","target":"graph","created_at":"2026-07-03T01:17:37Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2607.02032/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Evaluating LLM agents on benchmarks like SWE-Bench and GAIA can be expensive, time-consuming, and requires complex infrastructure. A single evaluation can cost thousands of dollars and take days to complete. In contrast, non-agentic LLM benchmarks that test individual capabilities (e.g., reasoning, code generation) are fast and cheap to run. In this paper, we investigate whether performance on expensive agentic benchmarks can be accurately predicted by the performance on a small, carefully selected subset of atomic evaluation instances. We introduce PACE, a framework that constructs proxy benc","authors_text":"Aditya Bharat Soni, Daniel Lee, Graham Neubig, Jiarui Liu, Jiayi Geng, Lindia Tjuatja, Lintang Sutawika, Vincent Lo, Xiang Yue, Yueqi Song, Yunze Xiao","cross_cats":["cs.CL"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-07-02T10:59:03Z","title":"PACE: A Proxy for Agentic Capability Evaluation"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2607.02032","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:43be4dd9c8903189b8822e09755201a3da5932aa085dc045390e855419fa6083","target":"record","created_at":"2026-07-03T01:17:37Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"f2fec50ba69be354d83cc489ec84f02ec6cc45f2ae4d2448c9709b1a6465a88a","cross_cats_sorted":["cs.CL"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-07-02T10:59:03Z","title_canon_sha256":"843406a31dc2adff7529fff32ba9f7b40a47fccd11dc94cfe5dbe5ebd8d4f8f1"},"schema_version":"1.0","source":{"id":"2607.02032","kind":"arxiv","version":1}},"canonical_sha256":"196ae7ce570125f6b1a1784bb9a06d5989c6c829045d0f0c3844e78a887f6859","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"196ae7ce570125f6b1a1784bb9a06d5989c6c829045d0f0c3844e78a887f6859","first_computed_at":"2026-07-03T01:17:37.604712Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-07-03T01:17:37.604712Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"XLpzbt6Y450xfMMfXyPuXWvDKSOXqruYSWqpPgZkV5fMyVRxjuaJZFRC8uKq8OpHEZaOLBf3ZCm3gT6Kx0d8CQ==","signature_status":"signed_v1","signed_at":"2026-07-03T01:17:37.605143Z","signed_message":"canonical_sha256_bytes"},"source_id":"2607.02032","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:43be4dd9c8903189b8822e09755201a3da5932aa085dc045390e855419fa6083","sha256:6b50a68f3a0f4d91025c2a946fd7e0b5e497ba436e5ce7c48ece699cdcc2156d"],"state_sha256":"2deb8b21a00027ab490c776bd7ad0e28943f09e184054c3f3ba8ab775bc0653e"}