{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:X2JCNQCIFEJGFQURVA4LLQQ6YW","short_pith_number":"pith:X2JCNQCI","schema_version":"1.0","canonical_sha256":"be9226c048291262c291a838b5c21ec586a5b24f7cbf3d3b4ad86cfaa3ea426c","source":{"kind":"arxiv","id":"2604.09349","version":2},"attestation_state":"computed","paper":{"title":"Visually-Guided Policy Optimization for Multimodal Reasoning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"Visually-guided policy optimization amplifies relevant visual tokens and reweights advantages to counter forgetting in vision-language models.","cross_cats":["cs.AI","cs.CL"],"primary_cat":"cs.CV","authors_text":"Feng Xiong, Liang Lin, Man Zhang, Xiangxiang Chu, Xuecai Hu, Yanlin Wang, Yong Wang, Zengbin Wang","submitted_at":"2026-04-10T14:22:38Z","abstract_excerpt":"Reinforcement learning with verifiable rewards (RLVR) has significantly advanced the reasoning ability of vision-language models (VLMs). However, the inherent text-dominated nature of VLMs often leads to insufficient visual faithfulness, characterized by sparse attention activation to visual tokens. More importantly, our empirical analysis reveals that temporal visual forgetting along reasoning steps exacerbates this deficiency. To bridge this gap, we propose Visually-Guided Policy Optimization (VGPO), a novel framework to reinforce visual focus during policy optimization. Specifically, VGPO i"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":true},"canonical_record":{"source":{"id":"2604.09349","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-04-10T14:22:38Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"3de2e68d2591c2570c9a16df68c2550fd8b776ec39c042e4a3a0a72c534f99bb","abstract_canon_sha256":"04f0381805e71de659352f26fb10081fd34aa91146bd3d67d7fe419aaf9bfcc0"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-25T02:01:19.336275Z","signature_b64":"oQTDq2zzTm11wImn0SwdnQ8edAjouIjMgE82NxPOxXs0NLsNB9Fton5JQeS71M+OZ5vOHOCWrpqYaM/QxPZCBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"be9226c048291262c291a838b5c21ec586a5b24f7cbf3d3b4ad86cfaa3ea426c","last_reissued_at":"2026-05-25T02:01:19.335357Z","signature_status":"signed_v1","first_computed_at":"2026-05-25T02:01:19.335357Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Visually-Guided Policy Optimization for Multimodal Reasoning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"Visually-guided policy optimization amplifies relevant visual tokens and reweights advantages to counter forgetting in vision-language models.","cross_cats":["cs.AI","cs.CL"],"primary_cat":"cs.CV","authors_text":"Feng Xiong, Liang Lin, Man Zhang, Xiangxiang Chu, Xuecai Hu, Yanlin Wang, Yong Wang, Zengbin Wang","submitted_at":"2026-04-10T14:22:38Z","abstract_excerpt":"Reinforcement learning with verifiable rewards (RLVR) has significantly advanced the reasoning ability of vision-language models (VLMs). However, the inherent text-dominated nature of VLMs often leads to insufficient visual faithfulness, characterized by sparse attention activation to visual tokens. More importantly, our empirical analysis reveals that temporal visual forgetting along reasoning steps exacerbates this deficiency. To bridge this gap, we propose Visually-Guided Policy Optimization (VGPO), a novel framework to reinforce visual focus during policy optimization. Specifically, VGPO i"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"VGPO achieves better visual activation and superior performance in mathematical multimodal reasoning and visual-dependent tasks through Visual Attention Compensation and dual-grained advantage re-weighting.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That amplifying visual cues via similarity metrics and re-weighting advantages based on visual activation will reliably counteract forgetting without destabilizing the policy or degrading text-based reasoning performance.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"VGPO introduces visual attention compensation and dual-grained advantage re-weighting to reinforce visual focus in VLMs, yielding better activation and performance on multimodal reasoning tasks.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Visually-guided policy optimization amplifies relevant visual tokens and reweights advantages to counter forgetting in vision-language models.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"7f0f4dd3ad5990561ba7174fae84cd0b513a1be8960b2d3f606cab16205fdffd"},"source":{"id":"2604.09349","kind":"arxiv","version":2},"verdict":{"id":"c327febb-a0b5-40f6-bee4-9cd0aef702e2","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-10T17:20:04.694682Z","strongest_claim":"VGPO achieves better visual activation and superior performance in mathematical multimodal reasoning and visual-dependent tasks through Visual Attention Compensation and dual-grained advantage re-weighting.","one_line_summary":"VGPO introduces visual attention compensation and dual-grained advantage re-weighting to reinforce visual focus in VLMs, yielding better activation and performance on multimodal reasoning tasks.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That amplifying visual cues via similarity metrics and re-weighting advantages based on visual activation will reliably counteract forgetting without destabilizing the policy or degrading text-based reasoning performance.","pith_extraction_headline":"Visually-guided policy optimization amplifies relevant visual tokens and reweights advantages to counter forgetting in vision-language models."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2604.09349/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":2,"snapshot_sha256":"1e958235f693395773e5fa481fa2f23d33f3dba8f34deed6448ff1ba6b12a90f"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2604.09349","created_at":"2026-05-25T02:01:19.335489+00:00"},{"alias_kind":"arxiv_version","alias_value":"2604.09349v2","created_at":"2026-05-25T02:01:19.335489+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.09349","created_at":"2026-05-25T02:01:19.335489+00:00"},{"alias_kind":"pith_short_12","alias_value":"X2JCNQCIFEJG","created_at":"2026-05-25T02:01:19.335489+00:00"},{"alias_kind":"pith_short_16","alias_value":"X2JCNQCIFEJGFQUR","created_at":"2026-05-25T02:01:19.335489+00:00"},{"alias_kind":"pith_short_8","alias_value":"X2JCNQCI","created_at":"2026-05-25T02:01:19.335489+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"2605.07274","citing_title":"Structured Role-Aware Policy Optimization for Multimodal Reasoning","ref_index":14,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":2,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/X2JCNQCIFEJGFQURVA4LLQQ6YW","json":"https://pith.science/pith/X2JCNQCIFEJGFQURVA4LLQQ6YW.json","graph_json":"https://pith.science/api/pith-number/X2JCNQCIFEJGFQURVA4LLQQ6YW/graph.json","events_json":"https://pith.science/api/pith-number/X2JCNQCIFEJGFQURVA4LLQQ6YW/events.json","paper":"https://pith.science/paper/X2JCNQCI"},"agent_actions":{"view_html":"https://pith.science/pith/X2JCNQCIFEJGFQURVA4LLQQ6YW","download_json":"https://pith.science/pith/X2JCNQCIFEJGFQURVA4LLQQ6YW.json","view_paper":"https://pith.science/paper/X2JCNQCI","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2604.09349&json=true","fetch_graph":"https://pith.science/api/pith-number/X2JCNQCIFEJGFQURVA4LLQQ6YW/graph.json","fetch_events":"https://pith.science/api/pith-number/X2JCNQCIFEJGFQURVA4LLQQ6YW/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/X2JCNQCIFEJGFQURVA4LLQQ6YW/action/timestamp_anchor","attest_storage":"https://pith.science/pith/X2JCNQCIFEJGFQURVA4LLQQ6YW/action/storage_attestation","attest_author":"https://pith.science/pith/X2JCNQCIFEJGFQURVA4LLQQ6YW/action/author_attestation","sign_citation":"https://pith.science/pith/X2JCNQCIFEJGFQURVA4LLQQ6YW/action/citation_signature","submit_replication":"https://pith.science/pith/X2JCNQCIFEJGFQURVA4LLQQ6YW/action/replication_record"}},"created_at":"2026-05-25T02:01:19.335489+00:00","updated_at":"2026-05-25T02:01:19.335489+00:00"}