{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:CRCODHP7JH43OF7MZHHPAJNKJG","short_pith_number":"pith:CRCODHP7","schema_version":"1.0","canonical_sha256":"1444e19dff49f9b717ecc9cef025aa499ec0453b351b676c07ff1ca96f8f9fdc","source":{"kind":"arxiv","id":"2606.23206","version":1},"attestation_state":"computed","paper":{"title":"CFPO: Counterfactual Policy Optimization for Multimodal Reasoning","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.CL"],"primary_cat":"cs.CV","authors_text":"Guangjing Yang, Qicheng Lao, Wanran Sun, Xiaohu Wu, Zhangyuan Yu","submitted_at":"2026-06-22T11:51:51Z","abstract_excerpt":"Large Vision-Language Models (LVLMs) have demonstrated remarkable capabilities in multimodal reasoning. However, prevailing reinforcement learning (RL) paradigms lack explicit counterfactual enhancement and causal learning mechanisms. This fundamental deficiency results in severe grounding failures, manifesting as a tendency to ignore visual evidence in favor of language priors or exhibiting hallucination drift during long chain-of-thought reasoning. To address this root cause, we propose CounterFactual Policy Optimization (CFPO), a novel framework that enforces causal consistency between visu"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.23206","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-22T11:51:51Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"3255c09e5a95178d619bd326e8f422c565f9f72af5188d82e8d0675e84ae42fe","abstract_canon_sha256":"0a664036d31db884b203e4477436828b3685fc67e725847f09124abb4d21b67c"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-23T03:14:12.939224Z","signature_b64":"kZOnt6rbPdFIMYDXAUANt0fNlxS3INySD8wjxDU0VviiV20aZ8VeA8WY0Oc9prMxuVWq5eZ+nU+SbJDZ0TapAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"1444e19dff49f9b717ecc9cef025aa499ec0453b351b676c07ff1ca96f8f9fdc","last_reissued_at":"2026-06-23T03:14:12.938887Z","signature_status":"signed_v1","first_computed_at":"2026-06-23T03:14:12.938887Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"CFPO: Counterfactual Policy Optimization for Multimodal Reasoning","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.CL"],"primary_cat":"cs.CV","authors_text":"Guangjing Yang, Qicheng Lao, Wanran Sun, Xiaohu Wu, Zhangyuan Yu","submitted_at":"2026-06-22T11:51:51Z","abstract_excerpt":"Large Vision-Language Models (LVLMs) have demonstrated remarkable capabilities in multimodal reasoning. However, prevailing reinforcement learning (RL) paradigms lack explicit counterfactual enhancement and causal learning mechanisms. This fundamental deficiency results in severe grounding failures, manifesting as a tendency to ignore visual evidence in favor of language priors or exhibiting hallucination drift during long chain-of-thought reasoning. To address this root cause, we propose CounterFactual Policy Optimization (CFPO), a novel framework that enforces causal consistency between visu"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.23206","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.23206/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.23206","created_at":"2026-06-23T03:14:12.938944+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.23206v1","created_at":"2026-06-23T03:14:12.938944+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.23206","created_at":"2026-06-23T03:14:12.938944+00:00"},{"alias_kind":"pith_short_12","alias_value":"CRCODHP7JH43","created_at":"2026-06-23T03:14:12.938944+00:00"},{"alias_kind":"pith_short_16","alias_value":"CRCODHP7JH43OF7M","created_at":"2026-06-23T03:14:12.938944+00:00"},{"alias_kind":"pith_short_8","alias_value":"CRCODHP7","created_at":"2026-06-23T03:14:12.938944+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/CRCODHP7JH43OF7MZHHPAJNKJG","json":"https://pith.science/pith/CRCODHP7JH43OF7MZHHPAJNKJG.json","graph_json":"https://pith.science/api/pith-number/CRCODHP7JH43OF7MZHHPAJNKJG/graph.json","events_json":"https://pith.science/api/pith-number/CRCODHP7JH43OF7MZHHPAJNKJG/events.json","paper":"https://pith.science/paper/CRCODHP7"},"agent_actions":{"view_html":"https://pith.science/pith/CRCODHP7JH43OF7MZHHPAJNKJG","download_json":"https://pith.science/pith/CRCODHP7JH43OF7MZHHPAJNKJG.json","view_paper":"https://pith.science/paper/CRCODHP7","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.23206&json=true","fetch_graph":"https://pith.science/api/pith-number/CRCODHP7JH43OF7MZHHPAJNKJG/graph.json","fetch_events":"https://pith.science/api/pith-number/CRCODHP7JH43OF7MZHHPAJNKJG/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/CRCODHP7JH43OF7MZHHPAJNKJG/action/timestamp_anchor","attest_storage":"https://pith.science/pith/CRCODHP7JH43OF7MZHHPAJNKJG/action/storage_attestation","attest_author":"https://pith.science/pith/CRCODHP7JH43OF7MZHHPAJNKJG/action/author_attestation","sign_citation":"https://pith.science/pith/CRCODHP7JH43OF7MZHHPAJNKJG/action/citation_signature","submit_replication":"https://pith.science/pith/CRCODHP7JH43OF7MZHHPAJNKJG/action/replication_record"}},"created_at":"2026-06-23T03:14:12.938944+00:00","updated_at":"2026-06-23T03:14:12.938944+00:00"}