{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:74F2RSO44ECKUVSYKH73LXHHS6","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"e1a1fccac7654794ca49b53d079b3a57592172993f2d56723eae19aa3fe2b750","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-05-27T03:20:45Z","title_canon_sha256":"e94c1198a6786166645cd329ca1481baf022914ccc9dd1af279486c2c420ea33"},"schema_version":"1.0","source":{"id":"2605.27898","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.27898","created_at":"2026-05-28T01:04:51Z"},{"alias_kind":"arxiv_version","alias_value":"2605.27898v1","created_at":"2026-05-28T01:04:51Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.27898","created_at":"2026-05-28T01:04:51Z"},{"alias_kind":"pith_short_12","alias_value":"74F2RSO44ECK","created_at":"2026-05-28T01:04:51Z"},{"alias_kind":"pith_short_16","alias_value":"74F2RSO44ECKUVSY","created_at":"2026-05-28T01:04:51Z"},{"alias_kind":"pith_short_8","alias_value":"74F2RSO4","created_at":"2026-05-28T01:04:51Z"}],"graph_snapshots":[{"event_id":"sha256:b83d0354898eb0cdd241d58fe0e4ddf15b06552406f83f847e5b56c1aade2263","target":"graph","created_at":"2026-05-28T01:04:51Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.27898/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"As LLMs are increasingly deployed as agents, reliable assessment of their agentic capabilities has become essential. However, reported benchmark scores often jointly reflect model capability and the implementation choices each benchmark is packaged with, making cross-benchmark results difficult to interpret as clean measurements of the underlying model. In this work, we present a unified framework for the fair evaluation of LLM agentic capabilities. Driven by a unified configuration system, the framework integrates diverse benchmarks into a standardized instruction--tool--environment format, e","authors_text":"Jing Shao, Jingyi Yang, Lijun Li, Li Sun, Pengyu Zhu, Qianxin Luo, Sen Su, Tingfeng Hui, Xinyu Yuan, Yaxing Lyu, Yi Liu","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-05-27T03:20:45Z","title":"A Unified Framework for the Evaluation of LLM Agentic Capabilities"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.27898","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:b907bc518ed68ee0008176bb0ec41644b79e82fcac942bd19ec90f1613ceb3d7","target":"record","created_at":"2026-05-28T01:04:51Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"e1a1fccac7654794ca49b53d079b3a57592172993f2d56723eae19aa3fe2b750","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-05-27T03:20:45Z","title_canon_sha256":"e94c1198a6786166645cd329ca1481baf022914ccc9dd1af279486c2c420ea33"},"schema_version":"1.0","source":{"id":"2605.27898","kind":"arxiv","version":1}},"canonical_sha256":"ff0ba8c9dce104aa565851ffb5dce797a005a06165327bc82ab84df49b1f0734","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"ff0ba8c9dce104aa565851ffb5dce797a005a06165327bc82ab84df49b1f0734","first_computed_at":"2026-05-28T01:04:51.820154Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-28T01:04:51.820154Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"Wd4iKhQBPYVr+32t0nlRe7BgS5W4YMvB6fadSkwLXv1c9i8VqCZ8WmjQsuCgoZH4qDIdHeamQZjTLsFZkoU4AA==","signature_status":"signed_v1","signed_at":"2026-05-28T01:04:51.820555Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.27898","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:b907bc518ed68ee0008176bb0ec41644b79e82fcac942bd19ec90f1613ceb3d7","sha256:b83d0354898eb0cdd241d58fe0e4ddf15b06552406f83f847e5b56c1aade2263"],"state_sha256":"dd3529a1f1d82ffe8e8ce7b7b750c208cdca8962514babf1321ab19b7e0b7fc7"}