{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:MEYQMSYHQQLOTFFRN3IZ5SZNCC","short_pith_number":"pith:MEYQMSYH","canonical_record":{"source":{"id":"2607.02490","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-07-02T17:53:15Z","cross_cats_sorted":["cs.CV"],"title_canon_sha256":"26d60049a2b08ffb6946c547ce780084e0a3ac93b321daba506b733b7ef2eb71","abstract_canon_sha256":"cfba4da487343b0b80637db6a9fb7c1a7a0d349e85f27868bb0b4a91e4691b76"},"schema_version":"1.0"},"canonical_sha256":"6131064b078416e994b16ed19ecb2d108616c6a388422e5e6dbfa5abc7cbb11c","source":{"kind":"arxiv","id":"2607.02490","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2607.02490","created_at":"2026-07-03T01:18:00Z"},{"alias_kind":"arxiv_version","alias_value":"2607.02490v1","created_at":"2026-07-03T01:18:00Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2607.02490","created_at":"2026-07-03T01:18:00Z"},{"alias_kind":"pith_short_12","alias_value":"MEYQMSYHQQLO","created_at":"2026-07-03T01:18:00Z"},{"alias_kind":"pith_short_16","alias_value":"MEYQMSYHQQLOTFFR","created_at":"2026-07-03T01:18:00Z"},{"alias_kind":"pith_short_8","alias_value":"MEYQMSYH","created_at":"2026-07-03T01:18:00Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:MEYQMSYHQQLOTFFRN3IZ5SZNCC","target":"record","payload":{"canonical_record":{"source":{"id":"2607.02490","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-07-02T17:53:15Z","cross_cats_sorted":["cs.CV"],"title_canon_sha256":"26d60049a2b08ffb6946c547ce780084e0a3ac93b321daba506b733b7ef2eb71","abstract_canon_sha256":"cfba4da487343b0b80637db6a9fb7c1a7a0d349e85f27868bb0b4a91e4691b76"},"schema_version":"1.0"},"canonical_sha256":"6131064b078416e994b16ed19ecb2d108616c6a388422e5e6dbfa5abc7cbb11c","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-03T01:18:00.712491Z","signature_b64":"fjzIObRQNU0MyXPV+/ZZwYS2qfs1n5tN6gxHr6bXKyX+xUY7cHyPmfUwlZRbxqL0npPd0SUrxcw25JxG18OxCw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"6131064b078416e994b16ed19ecb2d108616c6a388422e5e6dbfa5abc7cbb11c","last_reissued_at":"2026-07-03T01:18:00.711902Z","signature_status":"signed_v1","first_computed_at":"2026-07-03T01:18:00.711902Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2607.02490","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-07-03T01:18:00Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"CGM61ADD5E1PXa+Yqc0/WaCTHUoxA/kzwvzYXRZFmfyQuJx6w/N9FLo9ui/RzReDdrRlgNPHEvnB/FcdzPxeBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-04T06:34:35.527534Z"},"content_sha256":"a0d0e0909dbdf7b8f5b8600271685fe0000e55f3d9ad020d9a9891c28211b58c","schema_version":"1.0","event_id":"sha256:a0d0e0909dbdf7b8f5b8600271685fe0000e55f3d9ad020d9a9891c28211b58c"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:MEYQMSYHQQLOTFFRN3IZ5SZNCC","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Visually Grounded Self-Reflection for Vision-Language Models via Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CV"],"primary_cat":"cs.CL","authors_text":"Fangcong Yin, Greg Durrett, Liyan Tang","submitted_at":"2026-07-02T17:53:15Z","abstract_excerpt":"Large vision-language models can reason over multimodal inputs by generating textual chains of thought (CoT). A key capability exhibited in CoT reasoning is self-reflection: revisiting earlier decisions and correcting previous errors. However, existing LVLMs often fail to properly attend to visual inputs during reflection, limiting their ability to translate feedback into grounded corrections, especially for out-of-distribution images. To address this issue, we propose a novel reinforcement learning training framework VRRL, with two components explicitly designed to elicit visually grounded se"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2607.02490","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2607.02490/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-07-03T01:18:00Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"J5nJU5z5U1N2a6QG+NrGJMfRCDrdn99byvKxhRBtpI2UMw0Cw8a/+P4FZr/Gal1/qt7Ng6P+8cRmqXrCB0XmBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-04T06:34:35.527910Z"},"content_sha256":"ef705483dbf4306750b5edde92fd969f21ca1baa7fb4733cdb9a3536fe269538","schema_version":"1.0","event_id":"sha256:ef705483dbf4306750b5edde92fd969f21ca1baa7fb4733cdb9a3536fe269538"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/MEYQMSYHQQLOTFFRN3IZ5SZNCC/bundle.json","state_url":"https://pith.science/pith/MEYQMSYHQQLOTFFRN3IZ5SZNCC/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/MEYQMSYHQQLOTFFRN3IZ5SZNCC/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-07-04T06:34:35Z","links":{"resolver":"https://pith.science/pith/MEYQMSYHQQLOTFFRN3IZ5SZNCC","bundle":"https://pith.science/pith/MEYQMSYHQQLOTFFRN3IZ5SZNCC/bundle.json","state":"https://pith.science/pith/MEYQMSYHQQLOTFFRN3IZ5SZNCC/state.json","well_known_bundle":"https://pith.science/.well-known/pith/MEYQMSYHQQLOTFFRN3IZ5SZNCC/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:MEYQMSYHQQLOTFFRN3IZ5SZNCC","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"cfba4da487343b0b80637db6a9fb7c1a7a0d349e85f27868bb0b4a91e4691b76","cross_cats_sorted":["cs.CV"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-07-02T17:53:15Z","title_canon_sha256":"26d60049a2b08ffb6946c547ce780084e0a3ac93b321daba506b733b7ef2eb71"},"schema_version":"1.0","source":{"id":"2607.02490","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2607.02490","created_at":"2026-07-03T01:18:00Z"},{"alias_kind":"arxiv_version","alias_value":"2607.02490v1","created_at":"2026-07-03T01:18:00Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2607.02490","created_at":"2026-07-03T01:18:00Z"},{"alias_kind":"pith_short_12","alias_value":"MEYQMSYHQQLO","created_at":"2026-07-03T01:18:00Z"},{"alias_kind":"pith_short_16","alias_value":"MEYQMSYHQQLOTFFR","created_at":"2026-07-03T01:18:00Z"},{"alias_kind":"pith_short_8","alias_value":"MEYQMSYH","created_at":"2026-07-03T01:18:00Z"}],"graph_snapshots":[{"event_id":"sha256:ef705483dbf4306750b5edde92fd969f21ca1baa7fb4733cdb9a3536fe269538","target":"graph","created_at":"2026-07-03T01:18:00Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2607.02490/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Large vision-language models can reason over multimodal inputs by generating textual chains of thought (CoT). A key capability exhibited in CoT reasoning is self-reflection: revisiting earlier decisions and correcting previous errors. However, existing LVLMs often fail to properly attend to visual inputs during reflection, limiting their ability to translate feedback into grounded corrections, especially for out-of-distribution images. To address this issue, we propose a novel reinforcement learning training framework VRRL, with two components explicitly designed to elicit visually grounded se","authors_text":"Fangcong Yin, Greg Durrett, Liyan Tang","cross_cats":["cs.CV"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-07-02T17:53:15Z","title":"Visually Grounded Self-Reflection for Vision-Language Models via Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2607.02490","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:a0d0e0909dbdf7b8f5b8600271685fe0000e55f3d9ad020d9a9891c28211b58c","target":"record","created_at":"2026-07-03T01:18:00Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"cfba4da487343b0b80637db6a9fb7c1a7a0d349e85f27868bb0b4a91e4691b76","cross_cats_sorted":["cs.CV"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-07-02T17:53:15Z","title_canon_sha256":"26d60049a2b08ffb6946c547ce780084e0a3ac93b321daba506b733b7ef2eb71"},"schema_version":"1.0","source":{"id":"2607.02490","kind":"arxiv","version":1}},"canonical_sha256":"6131064b078416e994b16ed19ecb2d108616c6a388422e5e6dbfa5abc7cbb11c","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"6131064b078416e994b16ed19ecb2d108616c6a388422e5e6dbfa5abc7cbb11c","first_computed_at":"2026-07-03T01:18:00.711902Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-07-03T01:18:00.711902Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"fjzIObRQNU0MyXPV+/ZZwYS2qfs1n5tN6gxHr6bXKyX+xUY7cHyPmfUwlZRbxqL0npPd0SUrxcw25JxG18OxCw==","signature_status":"signed_v1","signed_at":"2026-07-03T01:18:00.712491Z","signed_message":"canonical_sha256_bytes"},"source_id":"2607.02490","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:a0d0e0909dbdf7b8f5b8600271685fe0000e55f3d9ad020d9a9891c28211b58c","sha256:ef705483dbf4306750b5edde92fd969f21ca1baa7fb4733cdb9a3536fe269538"],"state_sha256":"094fd181cc1cf2bea5e25f370113ed4e5da07f9c60bff9e6e9d6b29d345f3a21"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"u5nyGZSJKAn00vhfDDHq11xgzi+iei6riDGAgb0Hfxk8rQ+B4Zqn7VOobbjhSn+Dv5Bv1heOUeqgG8Q50xNcAA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-07-04T06:34:35.529878Z","bundle_sha256":"6f2b006bae8502d16bf1a6138c96e18b2178adcbec598c82ebe94926daf40473"}}