{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:5IBZELKMHBYBXMTA2MI42DZWB3","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"2590f2eef32b16249f5231b209765d3df4c08412fcaf84f4aaf125b338c0b28b","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ME","submitted_at":"2026-06-02T13:45:59Z","title_canon_sha256":"4e36c814705a5334a721d402b18b6b6451f795bcab17d581c3125f97ae8b2044"},"schema_version":"1.0","source":{"id":"2606.03656","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.03656","created_at":"2026-06-03T01:06:03Z"},{"alias_kind":"arxiv_version","alias_value":"2606.03656v1","created_at":"2026-06-03T01:06:03Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.03656","created_at":"2026-06-03T01:06:03Z"},{"alias_kind":"pith_short_12","alias_value":"5IBZELKMHBYB","created_at":"2026-06-03T01:06:03Z"},{"alias_kind":"pith_short_16","alias_value":"5IBZELKMHBYBXMTA","created_at":"2026-06-03T01:06:03Z"},{"alias_kind":"pith_short_8","alias_value":"5IBZELKM","created_at":"2026-06-03T01:06:03Z"}],"graph_snapshots":[{"event_id":"sha256:1e82cab365dd8938109a06244b3d458472be5dc6f5c5134d5bbece8bac2bbc20","target":"graph","created_at":"2026-06-03T01:06:03Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.03656/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Prediction performance metrics such as accuracy and the F1 score are typically reported as single numbers, with no measure of uncertainty. The omission has been tolerable in exploratory settings, where model evaluation is used for informal comparison rather than formal decision-making. But as machine learning is deployed in real-world applications, evaluation results are increasingly used to support binary decisions -- whether a model meets a required standard or not -- making uncertainty quantification essential. The problem is compounded when data are dependent, as in repeated measurements, ","authors_text":"Daeyoung Lim, Taekwon Hong, Woojung Bae","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ME","submitted_at":"2026-06-02T13:45:59Z","title":"Beyond Point Estimates: Reliable Evaluation of Prediction Performance Metrics under Clustered Data"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.03656","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:85d7cf823ddb826e92114ffd88e45d3b324de51176b4cd7ec17ef0e120f237f5","target":"record","created_at":"2026-06-03T01:06:03Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"2590f2eef32b16249f5231b209765d3df4c08412fcaf84f4aaf125b338c0b28b","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ME","submitted_at":"2026-06-02T13:45:59Z","title_canon_sha256":"4e36c814705a5334a721d402b18b6b6451f795bcab17d581c3125f97ae8b2044"},"schema_version":"1.0","source":{"id":"2606.03656","kind":"arxiv","version":1}},"canonical_sha256":"ea03922d4c38701bb260d311cd0f360eef5a2497ce8154a886e905b1b75f78c4","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"ea03922d4c38701bb260d311cd0f360eef5a2497ce8154a886e905b1b75f78c4","first_computed_at":"2026-06-03T01:06:03.674817Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-03T01:06:03.674817Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"wZHz5YRyIcEqOiFmO/CPtcjWwnOoKAM4sfbB4hZgTD+XUn3IlCWGY5Qg3+aGSLy7F2yv7m4dZaLaCyPy0Ah7Cg==","signature_status":"signed_v1","signed_at":"2026-06-03T01:06:03.675218Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.03656","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:85d7cf823ddb826e92114ffd88e45d3b324de51176b4cd7ec17ef0e120f237f5","sha256:1e82cab365dd8938109a06244b3d458472be5dc6f5c5134d5bbece8bac2bbc20"],"state_sha256":"cb4e62a85d4208f2f93255708a07fed9239d81b0b4fe0407908b24dc1f433b14"}