{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:74F2RSO44ECKUVSYKH73LXHHS6","short_pith_number":"pith:74F2RSO4","canonical_record":{"source":{"id":"2605.27898","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-05-27T03:20:45Z","cross_cats_sorted":[],"title_canon_sha256":"e94c1198a6786166645cd329ca1481baf022914ccc9dd1af279486c2c420ea33","abstract_canon_sha256":"e1a1fccac7654794ca49b53d079b3a57592172993f2d56723eae19aa3fe2b750"},"schema_version":"1.0"},"canonical_sha256":"ff0ba8c9dce104aa565851ffb5dce797a005a06165327bc82ab84df49b1f0734","source":{"kind":"arxiv","id":"2605.27898","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.27898","created_at":"2026-05-28T01:04:51Z"},{"alias_kind":"arxiv_version","alias_value":"2605.27898v1","created_at":"2026-05-28T01:04:51Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.27898","created_at":"2026-05-28T01:04:51Z"},{"alias_kind":"pith_short_12","alias_value":"74F2RSO44ECK","created_at":"2026-05-28T01:04:51Z"},{"alias_kind":"pith_short_16","alias_value":"74F2RSO44ECKUVSY","created_at":"2026-05-28T01:04:51Z"},{"alias_kind":"pith_short_8","alias_value":"74F2RSO4","created_at":"2026-05-28T01:04:51Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:74F2RSO44ECKUVSYKH73LXHHS6","target":"record","payload":{"canonical_record":{"source":{"id":"2605.27898","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-05-27T03:20:45Z","cross_cats_sorted":[],"title_canon_sha256":"e94c1198a6786166645cd329ca1481baf022914ccc9dd1af279486c2c420ea33","abstract_canon_sha256":"e1a1fccac7654794ca49b53d079b3a57592172993f2d56723eae19aa3fe2b750"},"schema_version":"1.0"},"canonical_sha256":"ff0ba8c9dce104aa565851ffb5dce797a005a06165327bc82ab84df49b1f0734","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-28T01:04:51.820555Z","signature_b64":"Wd4iKhQBPYVr+32t0nlRe7BgS5W4YMvB6fadSkwLXv1c9i8VqCZ8WmjQsuCgoZH4qDIdHeamQZjTLsFZkoU4AA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"ff0ba8c9dce104aa565851ffb5dce797a005a06165327bc82ab84df49b1f0734","last_reissued_at":"2026-05-28T01:04:51.820154Z","signature_status":"signed_v1","first_computed_at":"2026-05-28T01:04:51.820154Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.27898","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-28T01:04:51Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"SSfsmZFgYH2+lvkyJoFGfsKaDPMXpQlhA0MRgAXuQ06LF2ragvf/3CYFAInvJguS0Q/QX2OHVSi1VfwzhDFoBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T04:10:43.240552Z"},"content_sha256":"b907bc518ed68ee0008176bb0ec41644b79e82fcac942bd19ec90f1613ceb3d7","schema_version":"1.0","event_id":"sha256:b907bc518ed68ee0008176bb0ec41644b79e82fcac942bd19ec90f1613ceb3d7"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:74F2RSO44ECKUVSYKH73LXHHS6","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"A Unified Framework for the Evaluation of LLM Agentic Capabilities","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Jing Shao, Jingyi Yang, Lijun Li, Li Sun, Pengyu Zhu, Qianxin Luo, Sen Su, Tingfeng Hui, Xinyu Yuan, Yaxing Lyu, Yi Liu","submitted_at":"2026-05-27T03:20:45Z","abstract_excerpt":"As LLMs are increasingly deployed as agents, reliable assessment of their agentic capabilities has become essential. However, reported benchmark scores often jointly reflect model capability and the implementation choices each benchmark is packaged with, making cross-benchmark results difficult to interpret as clean measurements of the underlying model. In this work, we present a unified framework for the fair evaluation of LLM agentic capabilities. Driven by a unified configuration system, the framework integrates diverse benchmarks into a standardized instruction--tool--environment format, e"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.27898","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.27898/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-28T01:04:51Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"BO6tJhC60ckgZv62seRJJWg1Vz4oyv+nemqiGDl4/ADZQvk5uAdKm0kbFmq5WJwQZ3DopMCNzIjBRU1oXdkfDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T04:10:43.241242Z"},"content_sha256":"b83d0354898eb0cdd241d58fe0e4ddf15b06552406f83f847e5b56c1aade2263","schema_version":"1.0","event_id":"sha256:b83d0354898eb0cdd241d58fe0e4ddf15b06552406f83f847e5b56c1aade2263"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/74F2RSO44ECKUVSYKH73LXHHS6/bundle.json","state_url":"https://pith.science/pith/74F2RSO44ECKUVSYKH73LXHHS6/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/74F2RSO44ECKUVSYKH73LXHHS6/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-30T04:10:43Z","links":{"resolver":"https://pith.science/pith/74F2RSO44ECKUVSYKH73LXHHS6","bundle":"https://pith.science/pith/74F2RSO44ECKUVSYKH73LXHHS6/bundle.json","state":"https://pith.science/pith/74F2RSO44ECKUVSYKH73LXHHS6/state.json","well_known_bundle":"https://pith.science/.well-known/pith/74F2RSO44ECKUVSYKH73LXHHS6/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:74F2RSO44ECKUVSYKH73LXHHS6","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"e1a1fccac7654794ca49b53d079b3a57592172993f2d56723eae19aa3fe2b750","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-05-27T03:20:45Z","title_canon_sha256":"e94c1198a6786166645cd329ca1481baf022914ccc9dd1af279486c2c420ea33"},"schema_version":"1.0","source":{"id":"2605.27898","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.27898","created_at":"2026-05-28T01:04:51Z"},{"alias_kind":"arxiv_version","alias_value":"2605.27898v1","created_at":"2026-05-28T01:04:51Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.27898","created_at":"2026-05-28T01:04:51Z"},{"alias_kind":"pith_short_12","alias_value":"74F2RSO44ECK","created_at":"2026-05-28T01:04:51Z"},{"alias_kind":"pith_short_16","alias_value":"74F2RSO44ECKUVSY","created_at":"2026-05-28T01:04:51Z"},{"alias_kind":"pith_short_8","alias_value":"74F2RSO4","created_at":"2026-05-28T01:04:51Z"}],"graph_snapshots":[{"event_id":"sha256:b83d0354898eb0cdd241d58fe0e4ddf15b06552406f83f847e5b56c1aade2263","target":"graph","created_at":"2026-05-28T01:04:51Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.27898/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"As LLMs are increasingly deployed as agents, reliable assessment of their agentic capabilities has become essential. However, reported benchmark scores often jointly reflect model capability and the implementation choices each benchmark is packaged with, making cross-benchmark results difficult to interpret as clean measurements of the underlying model. In this work, we present a unified framework for the fair evaluation of LLM agentic capabilities. Driven by a unified configuration system, the framework integrates diverse benchmarks into a standardized instruction--tool--environment format, e","authors_text":"Jing Shao, Jingyi Yang, Lijun Li, Li Sun, Pengyu Zhu, Qianxin Luo, Sen Su, Tingfeng Hui, Xinyu Yuan, Yaxing Lyu, Yi Liu","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-05-27T03:20:45Z","title":"A Unified Framework for the Evaluation of LLM Agentic Capabilities"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.27898","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:b907bc518ed68ee0008176bb0ec41644b79e82fcac942bd19ec90f1613ceb3d7","target":"record","created_at":"2026-05-28T01:04:51Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"e1a1fccac7654794ca49b53d079b3a57592172993f2d56723eae19aa3fe2b750","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-05-27T03:20:45Z","title_canon_sha256":"e94c1198a6786166645cd329ca1481baf022914ccc9dd1af279486c2c420ea33"},"schema_version":"1.0","source":{"id":"2605.27898","kind":"arxiv","version":1}},"canonical_sha256":"ff0ba8c9dce104aa565851ffb5dce797a005a06165327bc82ab84df49b1f0734","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"ff0ba8c9dce104aa565851ffb5dce797a005a06165327bc82ab84df49b1f0734","first_computed_at":"2026-05-28T01:04:51.820154Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-28T01:04:51.820154Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"Wd4iKhQBPYVr+32t0nlRe7BgS5W4YMvB6fadSkwLXv1c9i8VqCZ8WmjQsuCgoZH4qDIdHeamQZjTLsFZkoU4AA==","signature_status":"signed_v1","signed_at":"2026-05-28T01:04:51.820555Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.27898","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:b907bc518ed68ee0008176bb0ec41644b79e82fcac942bd19ec90f1613ceb3d7","sha256:b83d0354898eb0cdd241d58fe0e4ddf15b06552406f83f847e5b56c1aade2263"],"state_sha256":"dd3529a1f1d82ffe8e8ce7b7b750c208cdca8962514babf1321ab19b7e0b7fc7"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"aBN75Z5abb4eVBlQtBhZwuRgp0m+EgJUIHF9eGfjPZk6OSqmAycV+zizvjBb/uciQVtUyFKKb/W1qfly4wK1AQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-30T04:10:43.244714Z","bundle_sha256":"3ed0a14643591fff102f95877e8e7ab6302e9c4c718917d1c778c47eb8979a6b"}}