{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:X2JCNQCIFEJGFQURVA4LLQQ6YW","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"04f0381805e71de659352f26fb10081fd34aa91146bd3d67d7fe419aaf9bfcc0","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-04-10T14:22:38Z","title_canon_sha256":"3de2e68d2591c2570c9a16df68c2550fd8b776ec39c042e4a3a0a72c534f99bb"},"schema_version":"1.0","source":{"id":"2604.09349","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2604.09349","created_at":"2026-05-25T02:01:19Z"},{"alias_kind":"arxiv_version","alias_value":"2604.09349v2","created_at":"2026-05-25T02:01:19Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.09349","created_at":"2026-05-25T02:01:19Z"},{"alias_kind":"pith_short_12","alias_value":"X2JCNQCIFEJG","created_at":"2026-05-25T02:01:19Z"},{"alias_kind":"pith_short_16","alias_value":"X2JCNQCIFEJGFQUR","created_at":"2026-05-25T02:01:19Z"},{"alias_kind":"pith_short_8","alias_value":"X2JCNQCI","created_at":"2026-05-25T02:01:19Z"}],"graph_snapshots":[{"event_id":"sha256:f23b35a8b5fd885d968646bcbbee2fd6d6991f0d16daec20eb7210b48ec1b0ee","target":"graph","created_at":"2026-05-25T02:01:19Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"VGPO achieves better visual activation and superior performance in mathematical multimodal reasoning and visual-dependent tasks through Visual Attention Compensation and dual-grained advantage re-weighting."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That amplifying visual cues via similarity metrics and re-weighting advantages based on visual activation will reliably counteract forgetting without destabilizing the policy or degrading text-based reasoning performance."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"VGPO introduces visual attention compensation and dual-grained advantage re-weighting to reinforce visual focus in VLMs, yielding better activation and performance on multimodal reasoning tasks."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Visually-guided policy optimization amplifies relevant visual tokens and reweights advantages to counter forgetting in vision-language models."}],"snapshot_sha256":"7f0f4dd3ad5990561ba7174fae84cd0b513a1be8960b2d3f606cab16205fdffd"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"1e958235f693395773e5fa481fa2f23d33f3dba8f34deed6448ff1ba6b12a90f"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2604.09349/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Reinforcement learning with verifiable rewards (RLVR) has significantly advanced the reasoning ability of vision-language models (VLMs). However, the inherent text-dominated nature of VLMs often leads to insufficient visual faithfulness, characterized by sparse attention activation to visual tokens. More importantly, our empirical analysis reveals that temporal visual forgetting along reasoning steps exacerbates this deficiency. To bridge this gap, we propose Visually-Guided Policy Optimization (VGPO), a novel framework to reinforce visual focus during policy optimization. Specifically, VGPO i","authors_text":"Feng Xiong, Liang Lin, Man Zhang, Xiangxiang Chu, Xuecai Hu, Yanlin Wang, Yong Wang, Zengbin Wang","cross_cats":["cs.AI","cs.CL"],"headline":"Visually-guided policy optimization amplifies relevant visual tokens and reweights advantages to counter forgetting in vision-language models.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-04-10T14:22:38Z","title":"Visually-Guided Policy Optimization for Multimodal Reasoning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2604.09349","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-10T17:20:04.694682Z","id":"c327febb-a0b5-40f6-bee4-9cd0aef702e2","model_set":{"reader":"grok-4.3"},"one_line_summary":"VGPO introduces visual attention compensation and dual-grained advantage re-weighting to reinforce visual focus in VLMs, yielding better activation and performance on multimodal reasoning tasks.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Visually-guided policy optimization amplifies relevant visual tokens and reweights advantages to counter forgetting in vision-language models.","strongest_claim":"VGPO achieves better visual activation and superior performance in mathematical multimodal reasoning and visual-dependent tasks through Visual Attention Compensation and dual-grained advantage re-weighting.","weakest_assumption":"That amplifying visual cues via similarity metrics and re-weighting advantages based on visual activation will reliably counteract forgetting without destabilizing the policy or degrading text-based reasoning performance."}},"verdict_id":"c327febb-a0b5-40f6-bee4-9cd0aef702e2"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:e8a233af481839b3d393d5ce408cdb78858cde071bf6b097490c875ef32452ff","target":"record","created_at":"2026-05-25T02:01:19Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"04f0381805e71de659352f26fb10081fd34aa91146bd3d67d7fe419aaf9bfcc0","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-04-10T14:22:38Z","title_canon_sha256":"3de2e68d2591c2570c9a16df68c2550fd8b776ec39c042e4a3a0a72c534f99bb"},"schema_version":"1.0","source":{"id":"2604.09349","kind":"arxiv","version":2}},"canonical_sha256":"be9226c048291262c291a838b5c21ec586a5b24f7cbf3d3b4ad86cfaa3ea426c","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"be9226c048291262c291a838b5c21ec586a5b24f7cbf3d3b4ad86cfaa3ea426c","first_computed_at":"2026-05-25T02:01:19.335357Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-25T02:01:19.335357Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"oQTDq2zzTm11wImn0SwdnQ8edAjouIjMgE82NxPOxXs0NLsNB9Fton5JQeS71M+OZ5vOHOCWrpqYaM/QxPZCBQ==","signature_status":"signed_v1","signed_at":"2026-05-25T02:01:19.336275Z","signed_message":"canonical_sha256_bytes"},"source_id":"2604.09349","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:e8a233af481839b3d393d5ce408cdb78858cde071bf6b097490c875ef32452ff","sha256:f23b35a8b5fd885d968646bcbbee2fd6d6991f0d16daec20eb7210b48ec1b0ee"],"state_sha256":"5f87bc6c37c0f2b4d70efaee467885faaec40ec8b332f83029822087b2223760"}