{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:LPGBYPFPNXJILZMBASINP3VKWQ","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"259dcdf58a4d5c02c7cc7ce445c8aa92a7060229c092099cc51a419139e9a136","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-13T12:18:32Z","title_canon_sha256":"a19247bbc6a175d202706f23840a6bade7fb9c557df7ad8fac30d43d9c598266"},"schema_version":"1.0","source":{"id":"2605.18852","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.18852","created_at":"2026-05-20T00:06:25Z"},{"alias_kind":"arxiv_version","alias_value":"2605.18852v1","created_at":"2026-05-20T00:06:25Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.18852","created_at":"2026-05-20T00:06:25Z"},{"alias_kind":"pith_short_12","alias_value":"LPGBYPFPNXJI","created_at":"2026-05-20T00:06:25Z"},{"alias_kind":"pith_short_16","alias_value":"LPGBYPFPNXJILZMB","created_at":"2026-05-20T00:06:25Z"},{"alias_kind":"pith_short_8","alias_value":"LPGBYPFP","created_at":"2026-05-20T00:06:25Z"}],"graph_snapshots":[{"event_id":"sha256:982403fc82ccbe24223cf47cc609c7da44d97a40fd557c4e1b68e281279a419f","target":"graph","created_at":"2026-05-20T00:06:25Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.18852/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Checkpoint selection for multimodal large language models (MLLMs) presents significant challenges when performance differentials are marginal and evaluation signals are prone to noise. Existing methodologies rely heavily on static benchmarks or pointwise scoring, which frequently misalign with in-the-wild usage and lack robust uncertainty estimation, particularly in OCR-heavy scenarios. In this work, we formulate checkpoint selection as a robust decision problem under evaluation uncertainty. We propose a multi-stage framework that integrates curated real-world data, structured LLM-based judgme","authors_text":"Jessie Salas, Qinwu Xu, Zhuoheng Li","cross_cats":["cs.AI","cs.CL"],"headline":"","license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-13T12:18:32Z","title":"Robust Checkpoint Selection for Multimodal LLMs via Agentic Evaluation and Stability-Aware Ranking"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.18852","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:36e210fac85f223b1f78cac489f678c91af8a860c587d012a9d00a0be74d297b","target":"record","created_at":"2026-05-20T00:06:25Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"259dcdf58a4d5c02c7cc7ce445c8aa92a7060229c092099cc51a419139e9a136","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-13T12:18:32Z","title_canon_sha256":"a19247bbc6a175d202706f23840a6bade7fb9c557df7ad8fac30d43d9c598266"},"schema_version":"1.0","source":{"id":"2605.18852","kind":"arxiv","version":1}},"canonical_sha256":"5bcc1c3caf6dd285e5810490d7eeaab40475044855cf8354d2bb165fafda1aef","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"5bcc1c3caf6dd285e5810490d7eeaab40475044855cf8354d2bb165fafda1aef","first_computed_at":"2026-05-20T00:06:25.731514Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-20T00:06:25.731514Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"qKl0GiVvGejDKG+CJBs/kchDJuWPzmlpbGaEL1sj6Jr6XNIv81HuZ8OxS3dfvYhjKtSzuSN45eU807Aejg7mDQ==","signature_status":"signed_v1","signed_at":"2026-05-20T00:06:25.732351Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.18852","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:36e210fac85f223b1f78cac489f678c91af8a860c587d012a9d00a0be74d297b","sha256:982403fc82ccbe24223cf47cc609c7da44d97a40fd557c4e1b68e281279a419f"],"state_sha256":"924309bf6e61cd9f15cb580afb11277a0c197c61fc6da984c7e39e6261109759"}