{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:CKIX5LFSNTFTUWUA4IXX2SWSSD","short_pith_number":"pith:CKIX5LFS","schema_version":"1.0","canonical_sha256":"12917eacb26ccb3a5a80e22f7d4ad290f4081e30c3948baa68d7e3a814e4eac7","source":{"kind":"arxiv","id":"2605.19276","version":1},"attestation_state":"computed","paper":{"title":"OpenCompass: A Universal Evaluation Platform for Large Language Models","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.CL","authors_text":"Chengqi Lyu, Dongsheng Zhu, Fengzhe Zhou, Ge Jiaye, Han Lyu, Haochen Ye, Haodong Duan, Hongwei Liu, Jingming Zhuo, Junnan Liu, Jun Xu, Kai Chen, Linchen Xiao, Lin Zhu, Maosong Cao, Mo Li, Ningsheng Ma, Peiheng Zhou, Songyang Zhang, Tong Gao, Yike Yuan, Yixiao Fang, Yuan Liu, Yufeng Zhao, Yu Sun, Zerun Ma, Zhaohui Yu, Zhiyong Wu","submitted_at":"2026-05-19T02:50:11Z","abstract_excerpt":"In recent years, the field of artificial intelligence has undergone a paradigm shift from task-specific small-scale models to general-purpose large language models (LLMs). With the rapid iteration of LLMs, objective, quantitative, and comprehensive evaluation of their capabilities has become a critical link in advancing technological development. Currently, the mainstream static benchmark dataset-based evaluation methods face challenges such as the diversity of task types, inconsistent evaluation criteria, and fragmentation of data and processing workflows, making it difficult to efficiently c"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.19276","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-19T02:50:11Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"3b0be25df29cf8db55233222ba7521ef9ec628f25dcf5e7a4082c38f91a2c9ea","abstract_canon_sha256":"4893626a5b734c20183d3305ed870754dcdb9e5831d510427214feb917cb28f4"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T01:05:37.281699Z","signature_b64":"wN3/wl0nNJiMZFSLCrGRjocnuBkyhNlzcarcOYfmZe572lmnoYZQ7NmLN7QVne2oQdtxmj9zFhh0EFj4iJbTBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"12917eacb26ccb3a5a80e22f7d4ad290f4081e30c3948baa68d7e3a814e4eac7","last_reissued_at":"2026-05-20T01:05:37.280925Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T01:05:37.280925Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"OpenCompass: A Universal Evaluation Platform for Large Language Models","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.CL","authors_text":"Chengqi Lyu, Dongsheng Zhu, Fengzhe Zhou, Ge Jiaye, Han Lyu, Haochen Ye, Haodong Duan, Hongwei Liu, Jingming Zhuo, Junnan Liu, Jun Xu, Kai Chen, Linchen Xiao, Lin Zhu, Maosong Cao, Mo Li, Ningsheng Ma, Peiheng Zhou, Songyang Zhang, Tong Gao, Yike Yuan, Yixiao Fang, Yuan Liu, Yufeng Zhao, Yu Sun, Zerun Ma, Zhaohui Yu, Zhiyong Wu","submitted_at":"2026-05-19T02:50:11Z","abstract_excerpt":"In recent years, the field of artificial intelligence has undergone a paradigm shift from task-specific small-scale models to general-purpose large language models (LLMs). With the rapid iteration of LLMs, objective, quantitative, and comprehensive evaluation of their capabilities has become a critical link in advancing technological development. Currently, the mainstream static benchmark dataset-based evaluation methods face challenges such as the diversity of task types, inconsistent evaluation criteria, and fragmentation of data and processing workflows, making it difficult to efficiently c"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.19276","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.19276/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.19276","created_at":"2026-05-20T01:05:37.281054+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.19276v1","created_at":"2026-05-20T01:05:37.281054+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.19276","created_at":"2026-05-20T01:05:37.281054+00:00"},{"alias_kind":"pith_short_12","alias_value":"CKIX5LFSNTFT","created_at":"2026-05-20T01:05:37.281054+00:00"},{"alias_kind":"pith_short_16","alias_value":"CKIX5LFSNTFTUWUA","created_at":"2026-05-20T01:05:37.281054+00:00"},{"alias_kind":"pith_short_8","alias_value":"CKIX5LFS","created_at":"2026-05-20T01:05:37.281054+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/CKIX5LFSNTFTUWUA4IXX2SWSSD","json":"https://pith.science/pith/CKIX5LFSNTFTUWUA4IXX2SWSSD.json","graph_json":"https://pith.science/api/pith-number/CKIX5LFSNTFTUWUA4IXX2SWSSD/graph.json","events_json":"https://pith.science/api/pith-number/CKIX5LFSNTFTUWUA4IXX2SWSSD/events.json","paper":"https://pith.science/paper/CKIX5LFS"},"agent_actions":{"view_html":"https://pith.science/pith/CKIX5LFSNTFTUWUA4IXX2SWSSD","download_json":"https://pith.science/pith/CKIX5LFSNTFTUWUA4IXX2SWSSD.json","view_paper":"https://pith.science/paper/CKIX5LFS","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.19276&json=true","fetch_graph":"https://pith.science/api/pith-number/CKIX5LFSNTFTUWUA4IXX2SWSSD/graph.json","fetch_events":"https://pith.science/api/pith-number/CKIX5LFSNTFTUWUA4IXX2SWSSD/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/CKIX5LFSNTFTUWUA4IXX2SWSSD/action/timestamp_anchor","attest_storage":"https://pith.science/pith/CKIX5LFSNTFTUWUA4IXX2SWSSD/action/storage_attestation","attest_author":"https://pith.science/pith/CKIX5LFSNTFTUWUA4IXX2SWSSD/action/author_attestation","sign_citation":"https://pith.science/pith/CKIX5LFSNTFTUWUA4IXX2SWSSD/action/citation_signature","submit_replication":"https://pith.science/pith/CKIX5LFSNTFTUWUA4IXX2SWSSD/action/replication_record"}},"created_at":"2026-05-20T01:05:37.281054+00:00","updated_at":"2026-05-20T01:05:37.281054+00:00"}