{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:FPJIHIJKYXM2T7FECBVCEI2NXG","short_pith_number":"pith:FPJIHIJK","schema_version":"1.0","canonical_sha256":"2bd283a12ac5d9a9fca4106a22234db98cfdb9f2c082b0574f71a469333f5381","source":{"kind":"arxiv","id":"2605.16999","version":1},"attestation_state":"computed","paper":{"title":"Ranking-Aware Calibration for Reliable Multimodal Reinforcement Learning","license":"http://creativecommons.org/licenses/by/4.0/","headline":"Ranking signals from group-based RL can supervise confidence to improve calibration in vision-language models.","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Boyao Yang, Jun Zhu, Peng Cui","submitted_at":"2026-05-16T13:51:29Z","abstract_excerpt":"Reinforcement learning post-training has substantially improved the reasoning accuracy of vision-language models, yet the resulting policies remain poorly calibrated. Terminal correctness rewards provide no gradient that penalizes confident errors more than uncertain ones and no signal that ties confidence to the quality of visual evidence, a gap that becomes especially severe under corrupted or ambiguous inputs where models continue to report high confidence on incorrect answers. We introduce Ranking-Aware Calibration (RAC), a training-time framework that supervises confidence using two compa"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":true,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.16999","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-16T13:51:29Z","cross_cats_sorted":[],"title_canon_sha256":"b2feb1e35fc4fa7b0e1e2be760927bb4c072e5a54fd9b4284f5a4543a178af59","abstract_canon_sha256":"eb0e4e4bdacd9da5a7a856c499f1db0b6bd86661fdeef57d58e75cf673cff972"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:03:35.255130Z","signature_b64":"PraB+pFnVuDUyt0WQQVhiuqhHzub0ZkNPEipV3QAS2WdUi3KNF9Gld4Z0ODWR366Fz8Vq8WN2DpvDIw7yTaIBw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"2bd283a12ac5d9a9fca4106a22234db98cfdb9f2c082b0574f71a469333f5381","last_reissued_at":"2026-05-20T00:03:35.254348Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:03:35.254348Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Ranking-Aware Calibration for Reliable Multimodal Reinforcement Learning","license":"http://creativecommons.org/licenses/by/4.0/","headline":"Ranking signals from group-based RL can supervise confidence to improve calibration in vision-language models.","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Boyao Yang, Jun Zhu, Peng Cui","submitted_at":"2026-05-16T13:51:29Z","abstract_excerpt":"Reinforcement learning post-training has substantially improved the reasoning accuracy of vision-language models, yet the resulting policies remain poorly calibrated. Terminal correctness rewards provide no gradient that penalizes confident errors more than uncertain ones and no signal that ties confidence to the quality of visual evidence, a gap that becomes especially severe under corrupted or ambiguous inputs where models continue to report high confidence on incorrect answers. We introduce Ranking-Aware Calibration (RAC), a training-time framework that supervises confidence using two compa"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Their combination achieves the best calibration across all tested backbones while improving accuracy in the majority of settings.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That the ranking signals already produced by group-based RL directly reflect reasoning quality and can be used to supervise confidence without introducing new biases or requiring validation against external correctness measures.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"RAC adds ranking-aware group loss and clean-corrupted pairwise loss to RL post-training to boost both accuracy and calibration in multimodal reasoning without extra annotations.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Ranking signals from group-based RL can supervise confidence to improve calibration in vision-language models.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"c2e96fc2951359497fc4f138ca769746bb3f93f45d2c594aa7136abb2221b254"},"source":{"id":"2605.16999","kind":"arxiv","version":1},"verdict":{"id":"2db4e29c-c830-42b3-9114-364f9764d644","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-19T20:49:17.718708Z","strongest_claim":"Their combination achieves the best calibration across all tested backbones while improving accuracy in the majority of settings.","one_line_summary":"RAC adds ranking-aware group loss and clean-corrupted pairwise loss to RL post-training to boost both accuracy and calibration in multimodal reasoning without extra annotations.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That the ranking signals already produced by group-based RL directly reflect reasoning quality and can be used to supervise confidence without introducing new biases or requiring validation against external correctness measures.","pith_extraction_headline":"Ranking signals from group-based RL can supervise confidence to improve calibration in vision-language models."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.16999/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"doi_title_agreement","ran_at":"2026-05-19T21:01:19.044903Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"doi_compliance","ran_at":"2026-05-19T21:00:45.448335Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"citation_quote_validity","ran_at":"2026-05-19T19:49:57.218286Z","status":"skipped","version":"0.1.0","findings_count":0},{"name":"cited_work_retraction","ran_at":"2026-05-19T19:23:35.806832Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"claim_evidence","ran_at":"2026-05-19T18:41:56.200234Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"ai_meta_artifact","ran_at":"2026-05-19T18:33:26.289880Z","status":"skipped","version":"1.0.0","findings_count":0}],"snapshot_sha256":"3e5ae055a5305db2c1e6fd7cea3fe39c97a8093af7cdea9fb64b58f4b238db60"},"references":{"count":52,"sample":[{"doi":"","year":2021,"title":"Learning transferable visual models from natural language supervision","work_id":"e458692a-52fd-466e-8198-611f2e9483ff","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2022,"title":"Flamingo: a visual language model for few-shot learning","work_id":"2db6d170-2eba-4c48-8186-8ae2d9658769","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2023,"title":"Blip-2: bootstrapping language-image pre-training with frozen image encoders and large language models","work_id":"2082ea81-8502-4fd3-97e6-b01d8c42d587","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2023,"title":"Visual instruction tuning","work_id":"04e50b1e-f80b-4c85-ba54-45d6b4b19109","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2023,"title":"Qwen-vl: A versatile vision-language model for understanding, localization, text reading, and beyond","work_id":"b5c6eee0-36d1-4d4a-8258-f4ef8294b341","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":52,"snapshot_sha256":"ca4e23006b41590226eab2fdf6c29a2ef7389b262fe92b3cb1c951d2fcba7cd0","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.16999","created_at":"2026-05-20T00:03:35.254493+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.16999v1","created_at":"2026-05-20T00:03:35.254493+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.16999","created_at":"2026-05-20T00:03:35.254493+00:00"},{"alias_kind":"pith_short_12","alias_value":"FPJIHIJKYXM2","created_at":"2026-05-20T00:03:35.254493+00:00"},{"alias_kind":"pith_short_16","alias_value":"FPJIHIJKYXM2T7FE","created_at":"2026-05-20T00:03:35.254493+00:00"},{"alias_kind":"pith_short_8","alias_value":"FPJIHIJK","created_at":"2026-05-20T00:03:35.254493+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/FPJIHIJKYXM2T7FECBVCEI2NXG","json":"https://pith.science/pith/FPJIHIJKYXM2T7FECBVCEI2NXG.json","graph_json":"https://pith.science/api/pith-number/FPJIHIJKYXM2T7FECBVCEI2NXG/graph.json","events_json":"https://pith.science/api/pith-number/FPJIHIJKYXM2T7FECBVCEI2NXG/events.json","paper":"https://pith.science/paper/FPJIHIJK"},"agent_actions":{"view_html":"https://pith.science/pith/FPJIHIJKYXM2T7FECBVCEI2NXG","download_json":"https://pith.science/pith/FPJIHIJKYXM2T7FECBVCEI2NXG.json","view_paper":"https://pith.science/paper/FPJIHIJK","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.16999&json=true","fetch_graph":"https://pith.science/api/pith-number/FPJIHIJKYXM2T7FECBVCEI2NXG/graph.json","fetch_events":"https://pith.science/api/pith-number/FPJIHIJKYXM2T7FECBVCEI2NXG/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/FPJIHIJKYXM2T7FECBVCEI2NXG/action/timestamp_anchor","attest_storage":"https://pith.science/pith/FPJIHIJKYXM2T7FECBVCEI2NXG/action/storage_attestation","attest_author":"https://pith.science/pith/FPJIHIJKYXM2T7FECBVCEI2NXG/action/author_attestation","sign_citation":"https://pith.science/pith/FPJIHIJKYXM2T7FECBVCEI2NXG/action/citation_signature","submit_replication":"https://pith.science/pith/FPJIHIJKYXM2T7FECBVCEI2NXG/action/replication_record"}},"created_at":"2026-05-20T00:03:35.254493+00:00","updated_at":"2026-05-20T00:03:35.254493+00:00"}