{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:3ZEIAIJD2ABDWSB2MEOT7ATXRM","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"9baf325eb31d34993be7db5388cafdd71fe9750b6c44d134c9200044997abdd2","cross_cats_sorted":["cs.LG","stat.CO","stat.ME"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2026-06-17T13:26:04Z","title_canon_sha256":"fe5d75c5739875cebff605a4915f50b82f7ec5905d222187813c8012b7767779"},"schema_version":"1.0","source":{"id":"2606.19057","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.19057","created_at":"2026-06-19T16:11:55Z"},{"alias_kind":"arxiv_version","alias_value":"2606.19057v1","created_at":"2026-06-19T16:11:55Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.19057","created_at":"2026-06-19T16:11:55Z"},{"alias_kind":"pith_short_12","alias_value":"3ZEIAIJD2ABD","created_at":"2026-06-19T16:11:55Z"},{"alias_kind":"pith_short_16","alias_value":"3ZEIAIJD2ABDWSB2","created_at":"2026-06-19T16:11:55Z"},{"alias_kind":"pith_short_8","alias_value":"3ZEIAIJD","created_at":"2026-06-19T16:11:55Z"}],"graph_snapshots":[{"event_id":"sha256:fc80b999dc1fb704b8f82ca93aa29063f52dc29037281633c7eb48ad95e88993","target":"graph","created_at":"2026-06-19T16:11:55Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.19057/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Large Language Models (LLMs) are increasingly used as judges for scalable evaluation, yet such LLM--as--a--Judge systems exhibit systematic biases that are decoupled from semantic quality, most notably verbosity bias. Meanwhile, human supervision is costly and typically selective, yielding reliable positive judgments but leaving most outputs unlabelled and potentially mixed in quality. We formulate LLM evaluation under selective human supervision as a positive--unlabelled learning problem and propose a geometric auditing framework based on Partial Optimal Transport. By aligning a small set of ","authors_text":"Chi-Kuang Yeh, Lei Ding, Yi-Ting Hung, Zilong Zhang","cross_cats":["cs.LG","stat.CO","stat.ME"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2026-06-17T13:26:04Z","title":"Quantifying and Auditing LLM Evaluation via Positive--Unlabeled Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.19057","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:32868a0a56afc1852a0ccdb9ca5dd2e6c629504407bc49c406ebd567d3c9b1af","target":"record","created_at":"2026-06-19T16:11:55Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"9baf325eb31d34993be7db5388cafdd71fe9750b6c44d134c9200044997abdd2","cross_cats_sorted":["cs.LG","stat.CO","stat.ME"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2026-06-17T13:26:04Z","title_canon_sha256":"fe5d75c5739875cebff605a4915f50b82f7ec5905d222187813c8012b7767779"},"schema_version":"1.0","source":{"id":"2606.19057","kind":"arxiv","version":1}},"canonical_sha256":"de48802123d0023b483a611d3f82778b2b836064dfcdc5dd5c35ad4fb40bbbda","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"de48802123d0023b483a611d3f82778b2b836064dfcdc5dd5c35ad4fb40bbbda","first_computed_at":"2026-06-19T16:11:55.572268Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-19T16:11:55.572268Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"ku7znei/FYN94V4VV7tSqUfh2xtluv2U5QV1rzmj50ayIJu3RUhdSuStM+XZVVQbCK1woxvRQucqOhHVyVxxCA==","signature_status":"signed_v1","signed_at":"2026-06-19T16:11:55.572693Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.19057","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:32868a0a56afc1852a0ccdb9ca5dd2e6c629504407bc49c406ebd567d3c9b1af","sha256:fc80b999dc1fb704b8f82ca93aa29063f52dc29037281633c7eb48ad95e88993"],"state_sha256":"e0b4867d3aed990c11df7185320bf471b528dab7368ba60f8931579b95d5fdde"}