{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:LPGBYPFPNXJILZMBASINP3VKWQ","short_pith_number":"pith:LPGBYPFP","canonical_record":{"source":{"id":"2605.18852","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-13T12:18:32Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"a19247bbc6a175d202706f23840a6bade7fb9c557df7ad8fac30d43d9c598266","abstract_canon_sha256":"259dcdf58a4d5c02c7cc7ce445c8aa92a7060229c092099cc51a419139e9a136"},"schema_version":"1.0"},"canonical_sha256":"5bcc1c3caf6dd285e5810490d7eeaab40475044855cf8354d2bb165fafda1aef","source":{"kind":"arxiv","id":"2605.18852","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.18852","created_at":"2026-05-20T00:06:25Z"},{"alias_kind":"arxiv_version","alias_value":"2605.18852v1","created_at":"2026-05-20T00:06:25Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.18852","created_at":"2026-05-20T00:06:25Z"},{"alias_kind":"pith_short_12","alias_value":"LPGBYPFPNXJI","created_at":"2026-05-20T00:06:25Z"},{"alias_kind":"pith_short_16","alias_value":"LPGBYPFPNXJILZMB","created_at":"2026-05-20T00:06:25Z"},{"alias_kind":"pith_short_8","alias_value":"LPGBYPFP","created_at":"2026-05-20T00:06:25Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:LPGBYPFPNXJILZMBASINP3VKWQ","target":"record","payload":{"canonical_record":{"source":{"id":"2605.18852","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-13T12:18:32Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"a19247bbc6a175d202706f23840a6bade7fb9c557df7ad8fac30d43d9c598266","abstract_canon_sha256":"259dcdf58a4d5c02c7cc7ce445c8aa92a7060229c092099cc51a419139e9a136"},"schema_version":"1.0"},"canonical_sha256":"5bcc1c3caf6dd285e5810490d7eeaab40475044855cf8354d2bb165fafda1aef","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:06:25.732351Z","signature_b64":"qKl0GiVvGejDKG+CJBs/kchDJuWPzmlpbGaEL1sj6Jr6XNIv81HuZ8OxS3dfvYhjKtSzuSN45eU807Aejg7mDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"5bcc1c3caf6dd285e5810490d7eeaab40475044855cf8354d2bb165fafda1aef","last_reissued_at":"2026-05-20T00:06:25.731514Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:06:25.731514Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.18852","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:06:25Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"meljzNqnJ5BcaSx7MqCoS+9ZOoIbLhmnNCQTEI3tLoPjh+V6geqzFOG+nokuz8pida2OTcNrEEBAYURAfpK9Ag==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T13:33:03.397458Z"},"content_sha256":"36e210fac85f223b1f78cac489f678c91af8a860c587d012a9d00a0be74d297b","schema_version":"1.0","event_id":"sha256:36e210fac85f223b1f78cac489f678c91af8a860c587d012a9d00a0be74d297b"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:LPGBYPFPNXJILZMBASINP3VKWQ","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Robust Checkpoint Selection for Multimodal LLMs via Agentic Evaluation and Stability-Aware Ranking","license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","headline":"","cross_cats":["cs.AI","cs.CL"],"primary_cat":"cs.LG","authors_text":"Jessie Salas, Qinwu Xu, Zhuoheng Li","submitted_at":"2026-05-13T12:18:32Z","abstract_excerpt":"Checkpoint selection for multimodal large language models (MLLMs) presents significant challenges when performance differentials are marginal and evaluation signals are prone to noise. Existing methodologies rely heavily on static benchmarks or pointwise scoring, which frequently misalign with in-the-wild usage and lack robust uncertainty estimation, particularly in OCR-heavy scenarios. In this work, we formulate checkpoint selection as a robust decision problem under evaluation uncertainty. We propose a multi-stage framework that integrates curated real-world data, structured LLM-based judgme"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.18852","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.18852/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:06:25Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Rpcn4GZRRDu7+q/8tcmOrsWStG/Q97erTPqDyX//BQDKpc/29U5Ep4GjHzs/pzvPosh0TQSk21EqIvk0ZrtIAw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T13:33:03.398039Z"},"content_sha256":"982403fc82ccbe24223cf47cc609c7da44d97a40fd557c4e1b68e281279a419f","schema_version":"1.0","event_id":"sha256:982403fc82ccbe24223cf47cc609c7da44d97a40fd557c4e1b68e281279a419f"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/LPGBYPFPNXJILZMBASINP3VKWQ/bundle.json","state_url":"https://pith.science/pith/LPGBYPFPNXJILZMBASINP3VKWQ/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/LPGBYPFPNXJILZMBASINP3VKWQ/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-26T13:33:03Z","links":{"resolver":"https://pith.science/pith/LPGBYPFPNXJILZMBASINP3VKWQ","bundle":"https://pith.science/pith/LPGBYPFPNXJILZMBASINP3VKWQ/bundle.json","state":"https://pith.science/pith/LPGBYPFPNXJILZMBASINP3VKWQ/state.json","well_known_bundle":"https://pith.science/.well-known/pith/LPGBYPFPNXJILZMBASINP3VKWQ/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:LPGBYPFPNXJILZMBASINP3VKWQ","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"259dcdf58a4d5c02c7cc7ce445c8aa92a7060229c092099cc51a419139e9a136","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-13T12:18:32Z","title_canon_sha256":"a19247bbc6a175d202706f23840a6bade7fb9c557df7ad8fac30d43d9c598266"},"schema_version":"1.0","source":{"id":"2605.18852","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.18852","created_at":"2026-05-20T00:06:25Z"},{"alias_kind":"arxiv_version","alias_value":"2605.18852v1","created_at":"2026-05-20T00:06:25Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.18852","created_at":"2026-05-20T00:06:25Z"},{"alias_kind":"pith_short_12","alias_value":"LPGBYPFPNXJI","created_at":"2026-05-20T00:06:25Z"},{"alias_kind":"pith_short_16","alias_value":"LPGBYPFPNXJILZMB","created_at":"2026-05-20T00:06:25Z"},{"alias_kind":"pith_short_8","alias_value":"LPGBYPFP","created_at":"2026-05-20T00:06:25Z"}],"graph_snapshots":[{"event_id":"sha256:982403fc82ccbe24223cf47cc609c7da44d97a40fd557c4e1b68e281279a419f","target":"graph","created_at":"2026-05-20T00:06:25Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.18852/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Checkpoint selection for multimodal large language models (MLLMs) presents significant challenges when performance differentials are marginal and evaluation signals are prone to noise. Existing methodologies rely heavily on static benchmarks or pointwise scoring, which frequently misalign with in-the-wild usage and lack robust uncertainty estimation, particularly in OCR-heavy scenarios. In this work, we formulate checkpoint selection as a robust decision problem under evaluation uncertainty. We propose a multi-stage framework that integrates curated real-world data, structured LLM-based judgme","authors_text":"Jessie Salas, Qinwu Xu, Zhuoheng Li","cross_cats":["cs.AI","cs.CL"],"headline":"","license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-13T12:18:32Z","title":"Robust Checkpoint Selection for Multimodal LLMs via Agentic Evaluation and Stability-Aware Ranking"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.18852","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:36e210fac85f223b1f78cac489f678c91af8a860c587d012a9d00a0be74d297b","target":"record","created_at":"2026-05-20T00:06:25Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"259dcdf58a4d5c02c7cc7ce445c8aa92a7060229c092099cc51a419139e9a136","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-13T12:18:32Z","title_canon_sha256":"a19247bbc6a175d202706f23840a6bade7fb9c557df7ad8fac30d43d9c598266"},"schema_version":"1.0","source":{"id":"2605.18852","kind":"arxiv","version":1}},"canonical_sha256":"5bcc1c3caf6dd285e5810490d7eeaab40475044855cf8354d2bb165fafda1aef","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"5bcc1c3caf6dd285e5810490d7eeaab40475044855cf8354d2bb165fafda1aef","first_computed_at":"2026-05-20T00:06:25.731514Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-20T00:06:25.731514Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"qKl0GiVvGejDKG+CJBs/kchDJuWPzmlpbGaEL1sj6Jr6XNIv81HuZ8OxS3dfvYhjKtSzuSN45eU807Aejg7mDQ==","signature_status":"signed_v1","signed_at":"2026-05-20T00:06:25.732351Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.18852","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:36e210fac85f223b1f78cac489f678c91af8a860c587d012a9d00a0be74d297b","sha256:982403fc82ccbe24223cf47cc609c7da44d97a40fd557c4e1b68e281279a419f"],"state_sha256":"924309bf6e61cd9f15cb580afb11277a0c197c61fc6da984c7e39e6261109759"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"aZkjZYa/zWy0mBpxy7jcY4uActT4UCaAw8g5g2ISRym6GuYMyCJ5qPp30HF1XBiDo7pwJ6IVX+MUwOxrklieBw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-26T13:33:03.401302Z","bundle_sha256":"a3a4d68d0d98237517438f0e007012c58685d95ff9c61fe0be4034cb7ff5d387"}}