{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:I35OYP2PYKDGZGPR3XQRTXAHFR","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"e8a7fe9ac2e1d7fb0e14dbfdeae3df1d442073585cedbd304926f961dd05d70e","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-19T09:40:29Z","title_canon_sha256":"8f873706a21718744a971a78673733a553704f7d7829a7909736599eefd0df18"},"schema_version":"1.0","source":{"id":"2605.19597","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.19597","created_at":"2026-05-20T01:05:53Z"},{"alias_kind":"arxiv_version","alias_value":"2605.19597v1","created_at":"2026-05-20T01:05:53Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.19597","created_at":"2026-05-20T01:05:53Z"},{"alias_kind":"pith_short_12","alias_value":"I35OYP2PYKDG","created_at":"2026-05-20T01:05:53Z"},{"alias_kind":"pith_short_16","alias_value":"I35OYP2PYKDGZGPR","created_at":"2026-05-20T01:05:53Z"},{"alias_kind":"pith_short_8","alias_value":"I35OYP2P","created_at":"2026-05-20T01:05:53Z"}],"graph_snapshots":[{"event_id":"sha256:0a6c199820171a5842aac392bf98a943010c9ac6364b6ae7183beeaa077b46f4","target":"graph","created_at":"2026-05-20T01:05:53Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.19597/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Evaluating large language models (LLMs) on natural-language logical reasoning is essential because rule-governed tasks require conclusions to follow strictly from stated premises. Many existing logical-reasoning benchmarks are generated by templating natural-language items from sampled formulas, provide only coarse or unaudited formal annotations, and are now quickly saturated by frontier reasoning models. We present LLMEval-Logic, a Chinese logical reasoning benchmark built from realistic situational scenarios. Its pipeline forward-authors and expert-audits natural-language items together wit","authors_text":"Junjie Ye, Kexin Tan, Maxm Pan, Ming Zhang, Qiyuan Peng, Qi Zhang, Ruizhi Yang, Shihan Dou, Tao Gui, Xuanjing Huang, Yinxi Wei, Yuhui Wang, Yujiong Shen, Zhangyue Yin, Zhenghao Xiang, Zhiheng Xi","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-19T09:40:29Z","title":"LLMEval-Logic: A Solver-Verified Chinese Benchmark for Logical Reasoning of LLMs with Adversarial Hardening"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.19597","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:e7b1c1094f33ee36bcad77d0142cdc744eb073d40c62043d1728541366921914","target":"record","created_at":"2026-05-20T01:05:53Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"e8a7fe9ac2e1d7fb0e14dbfdeae3df1d442073585cedbd304926f961dd05d70e","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-19T09:40:29Z","title_canon_sha256":"8f873706a21718744a971a78673733a553704f7d7829a7909736599eefd0df18"},"schema_version":"1.0","source":{"id":"2605.19597","kind":"arxiv","version":1}},"canonical_sha256":"46faec3f4fc2866c99f1dde119dc072c744576ef7b5a91bdf26845552217f44c","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"46faec3f4fc2866c99f1dde119dc072c744576ef7b5a91bdf26845552217f44c","first_computed_at":"2026-05-20T01:05:53.626059Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-20T01:05:53.626059Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"pT77/yqPWBskynOZiRnMb+l58Omyx+XgsCPy/7KL2SztOwVxVF/UwrTa7WqLCyCQEszNjK65JsEfcyV+3M1fBQ==","signature_status":"signed_v1","signed_at":"2026-05-20T01:05:53.626711Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.19597","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:e7b1c1094f33ee36bcad77d0142cdc744eb073d40c62043d1728541366921914","sha256:0a6c199820171a5842aac392bf98a943010c9ac6364b6ae7183beeaa077b46f4"],"state_sha256":"92e8cf98b7e0cb18b8f4eb7e8d1d34ee4dee6ff01bc4e41f56a114161e787f15"}