{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2025:K6JGKNLIUJKGSEVPTRP3LSOXJG","short_pith_number":"pith:K6JGKNLI","canonical_record":{"source":{"id":"2505.08617","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2025-05-13T14:35:51Z","cross_cats_sorted":[],"title_canon_sha256":"3ba2bef8bf3e1210a9ca37ae961fff081ff482833b3e6e2b4b0ceb9e86148ac2","abstract_canon_sha256":"d3cec06cfc35f7bb27006d3c59e03cf69c4435b409847f692a8c93cf6d4e4e2c"},"schema_version":"1.0"},"canonical_sha256":"5792653568a2546912af9c5fb5c9d749b47e3aac31f3591d0f53730ce5221e15","source":{"kind":"arxiv","id":"2505.08617","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2505.08617","created_at":"2026-05-17T23:38:46Z"},{"alias_kind":"arxiv_version","alias_value":"2505.08617v2","created_at":"2026-05-17T23:38:46Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2505.08617","created_at":"2026-05-17T23:38:46Z"},{"alias_kind":"pith_short_12","alias_value":"K6JGKNLIUJKG","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"K6JGKNLIUJKGSEVP","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"K6JGKNLI","created_at":"2026-05-18T12:33:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2025:K6JGKNLIUJKGSEVPTRP3LSOXJG","target":"record","payload":{"canonical_record":{"source":{"id":"2505.08617","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2025-05-13T14:35:51Z","cross_cats_sorted":[],"title_canon_sha256":"3ba2bef8bf3e1210a9ca37ae961fff081ff482833b3e6e2b4b0ceb9e86148ac2","abstract_canon_sha256":"d3cec06cfc35f7bb27006d3c59e03cf69c4435b409847f692a8c93cf6d4e4e2c"},"schema_version":"1.0"},"canonical_sha256":"5792653568a2546912af9c5fb5c9d749b47e3aac31f3591d0f53730ce5221e15","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:38:46.421874Z","signature_b64":"TCIBAnFdFihd2r5nBXntN5TrgxsDf7kPwgfz8i4MBR/IPQTJOchPEfyEh4i0x9oUouyctiN0HwWvYyADHgP0CA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"5792653568a2546912af9c5fb5c9d749b47e3aac31f3591d0f53730ce5221e15","last_reissued_at":"2026-05-17T23:38:46.421271Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:38:46.421271Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2505.08617","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:46Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"r0GBm7RRDeeFtGHf5K9bZ4IqgfswYAh1g9g9YXEtN9z7v5DXQmXsCCyWD5D/jU2XOtPE0+w6X739AiIMrQJcDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-05T13:20:22.932837Z"},"content_sha256":"55522dc73e7c92fbce749fbd9ff696eb018d7b3105055782c5091aede9418c45","schema_version":"1.0","event_id":"sha256:55522dc73e7c92fbce749fbd9ff696eb018d7b3105055782c5091aede9418c45"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2025:K6JGKNLIUJKGSEVPTRP3LSOXJG","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"OpenThinkIMG: Learning to Think with Images via Visual Tool Reinforcement Learning","license":"http://creativecommons.org/licenses/by-sa/4.0/","headline":"Reinforcement learning on visual tool feedback lets a small LVLM learn adaptive tool-use policies that outperform supervised training and some larger models on chart reasoning.","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Guanjie Chen, Jiawei Gu, Juntao Li, Jun Zhang, Linjie Li, Mingyang Song, Xiaoye Qu, Yu Cheng, Yunzhuo Hao, Zhaochen Su, Zhengyuan Yang","submitted_at":"2025-05-13T14:35:51Z","abstract_excerpt":"While humans can flexibly leverage interactive visual cognition for complex problem-solving, enabling Large Vision-Language Models (LVLMs) to learn similarly adaptive behaviors with visual tools remains challenging. A significant hurdle is the current lack of standardized infrastructure, which hinders integrating diverse tools, generating rich interaction data, and training robust agents effectively. To address these gaps, we introduce OpenThinkIMG, the first open-source, comprehensive end-to-end framework for tool-augmented LVLMs. It features standardized vision tool interfaces, scalable traj"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Our RL-trained agent, built upon a Qwen2-VL-2B, significantly outperforms its SFT-initialized counterpart (+28.83 points) and surpasses established supervised tool-learning baselines like Taco and CogCom by an average of +12.7 points. Notably, it also surpasses prominent closed-source models like GPT-4.1 by +8.68 accuracy points.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The assumption that feedback from tool interactions on chart reasoning tasks will produce policies that generalize to other visual domains and tool sets without additional tuning or domain-specific reward shaping.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"OpenThinkIMG and V-ToolRL enable LVLMs to learn adaptive visual tool use via RL, yielding a Qwen2-VL-2B agent that beats its SFT version by 28.83 points and GPT-4.1 by 8.68 points on chart reasoning.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Reinforcement learning on visual tool feedback lets a small LVLM learn adaptive tool-use policies that outperform supervised training and some larger models on chart reasoning.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"fb754c29782ff7029a4d9d066f2970e93b83b2eac7cb39a55770a9f0290492ca"},"source":{"id":"2505.08617","kind":"arxiv","version":2},"verdict":{"id":"4d787b5f-af64-445b-b60a-144d46e6c0b1","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-16T22:07:29.872962Z","strongest_claim":"Our RL-trained agent, built upon a Qwen2-VL-2B, significantly outperforms its SFT-initialized counterpart (+28.83 points) and surpasses established supervised tool-learning baselines like Taco and CogCom by an average of +12.7 points. Notably, it also surpasses prominent closed-source models like GPT-4.1 by +8.68 accuracy points.","one_line_summary":"OpenThinkIMG and V-ToolRL enable LVLMs to learn adaptive visual tool use via RL, yielding a Qwen2-VL-2B agent that beats its SFT version by 28.83 points and GPT-4.1 by 8.68 points on chart reasoning.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The assumption that feedback from tool interactions on chart reasoning tasks will produce policies that generalize to other visual domains and tool sets without additional tuning or domain-specific reward shaping.","pith_extraction_headline":"Reinforcement learning on visual tool feedback lets a small LVLM learn adaptive tool-use policies that outperform supervised training and some larger models on chart reasoning."},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":2,"snapshot_sha256":"f7afe6eec8f19d01b457ed1e666d8f8853c913b0fa330dfc1e156fa0076274aa"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"4d787b5f-af64-445b-b60a-144d46e6c0b1"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:46Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"SYAeLu7YNn5o8OjsapMb/InuxIpzuzuFgaRL8rRy782IlW86GMqdom3nnObZi6YgyEfYdNApk58IP8VmdmecCQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-05T13:20:22.933276Z"},"content_sha256":"a8808271184b7038bf12fb319794d5765d32a00005df52cc7ce8a5ed22cf1668","schema_version":"1.0","event_id":"sha256:a8808271184b7038bf12fb319794d5765d32a00005df52cc7ce8a5ed22cf1668"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/K6JGKNLIUJKGSEVPTRP3LSOXJG/bundle.json","state_url":"https://pith.science/pith/K6JGKNLIUJKGSEVPTRP3LSOXJG/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/K6JGKNLIUJKGSEVPTRP3LSOXJG/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-05T13:20:22Z","links":{"resolver":"https://pith.science/pith/K6JGKNLIUJKGSEVPTRP3LSOXJG","bundle":"https://pith.science/pith/K6JGKNLIUJKGSEVPTRP3LSOXJG/bundle.json","state":"https://pith.science/pith/K6JGKNLIUJKGSEVPTRP3LSOXJG/state.json","well_known_bundle":"https://pith.science/.well-known/pith/K6JGKNLIUJKGSEVPTRP3LSOXJG/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:K6JGKNLIUJKGSEVPTRP3LSOXJG","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"d3cec06cfc35f7bb27006d3c59e03cf69c4435b409847f692a8c93cf6d4e4e2c","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2025-05-13T14:35:51Z","title_canon_sha256":"3ba2bef8bf3e1210a9ca37ae961fff081ff482833b3e6e2b4b0ceb9e86148ac2"},"schema_version":"1.0","source":{"id":"2505.08617","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2505.08617","created_at":"2026-05-17T23:38:46Z"},{"alias_kind":"arxiv_version","alias_value":"2505.08617v2","created_at":"2026-05-17T23:38:46Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2505.08617","created_at":"2026-05-17T23:38:46Z"},{"alias_kind":"pith_short_12","alias_value":"K6JGKNLIUJKG","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"K6JGKNLIUJKGSEVP","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"K6JGKNLI","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:a8808271184b7038bf12fb319794d5765d32a00005df52cc7ce8a5ed22cf1668","target":"graph","created_at":"2026-05-17T23:38:46Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Our RL-trained agent, built upon a Qwen2-VL-2B, significantly outperforms its SFT-initialized counterpart (+28.83 points) and surpasses established supervised tool-learning baselines like Taco and CogCom by an average of +12.7 points. Notably, it also surpasses prominent closed-source models like GPT-4.1 by +8.68 accuracy points."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The assumption that feedback from tool interactions on chart reasoning tasks will produce policies that generalize to other visual domains and tool sets without additional tuning or domain-specific reward shaping."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"OpenThinkIMG and V-ToolRL enable LVLMs to learn adaptive visual tool use via RL, yielding a Qwen2-VL-2B agent that beats its SFT version by 28.83 points and GPT-4.1 by 8.68 points on chart reasoning."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Reinforcement learning on visual tool feedback lets a small LVLM learn adaptive tool-use policies that outperform supervised training and some larger models on chart reasoning."}],"snapshot_sha256":"fb754c29782ff7029a4d9d066f2970e93b83b2eac7cb39a55770a9f0290492ca"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"f7afe6eec8f19d01b457ed1e666d8f8853c913b0fa330dfc1e156fa0076274aa"},"paper":{"abstract_excerpt":"While humans can flexibly leverage interactive visual cognition for complex problem-solving, enabling Large Vision-Language Models (LVLMs) to learn similarly adaptive behaviors with visual tools remains challenging. A significant hurdle is the current lack of standardized infrastructure, which hinders integrating diverse tools, generating rich interaction data, and training robust agents effectively. To address these gaps, we introduce OpenThinkIMG, the first open-source, comprehensive end-to-end framework for tool-augmented LVLMs. It features standardized vision tool interfaces, scalable traj","authors_text":"Guanjie Chen, Jiawei Gu, Juntao Li, Jun Zhang, Linjie Li, Mingyang Song, Xiaoye Qu, Yu Cheng, Yunzhuo Hao, Zhaochen Su, Zhengyuan Yang","cross_cats":[],"headline":"Reinforcement learning on visual tool feedback lets a small LVLM learn adaptive tool-use policies that outperform supervised training and some larger models on chart reasoning.","license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2025-05-13T14:35:51Z","title":"OpenThinkIMG: Learning to Think with Images via Visual Tool Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2505.08617","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-16T22:07:29.872962Z","id":"4d787b5f-af64-445b-b60a-144d46e6c0b1","model_set":{"reader":"grok-4.3"},"one_line_summary":"OpenThinkIMG and V-ToolRL enable LVLMs to learn adaptive visual tool use via RL, yielding a Qwen2-VL-2B agent that beats its SFT version by 28.83 points and GPT-4.1 by 8.68 points on chart reasoning.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Reinforcement learning on visual tool feedback lets a small LVLM learn adaptive tool-use policies that outperform supervised training and some larger models on chart reasoning.","strongest_claim":"Our RL-trained agent, built upon a Qwen2-VL-2B, significantly outperforms its SFT-initialized counterpart (+28.83 points) and surpasses established supervised tool-learning baselines like Taco and CogCom by an average of +12.7 points. Notably, it also surpasses prominent closed-source models like GPT-4.1 by +8.68 accuracy points.","weakest_assumption":"The assumption that feedback from tool interactions on chart reasoning tasks will produce policies that generalize to other visual domains and tool sets without additional tuning or domain-specific reward shaping."}},"verdict_id":"4d787b5f-af64-445b-b60a-144d46e6c0b1"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:55522dc73e7c92fbce749fbd9ff696eb018d7b3105055782c5091aede9418c45","target":"record","created_at":"2026-05-17T23:38:46Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"d3cec06cfc35f7bb27006d3c59e03cf69c4435b409847f692a8c93cf6d4e4e2c","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2025-05-13T14:35:51Z","title_canon_sha256":"3ba2bef8bf3e1210a9ca37ae961fff081ff482833b3e6e2b4b0ceb9e86148ac2"},"schema_version":"1.0","source":{"id":"2505.08617","kind":"arxiv","version":2}},"canonical_sha256":"5792653568a2546912af9c5fb5c9d749b47e3aac31f3591d0f53730ce5221e15","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"5792653568a2546912af9c5fb5c9d749b47e3aac31f3591d0f53730ce5221e15","first_computed_at":"2026-05-17T23:38:46.421271Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:38:46.421271Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"TCIBAnFdFihd2r5nBXntN5TrgxsDf7kPwgfz8i4MBR/IPQTJOchPEfyEh4i0x9oUouyctiN0HwWvYyADHgP0CA==","signature_status":"signed_v1","signed_at":"2026-05-17T23:38:46.421874Z","signed_message":"canonical_sha256_bytes"},"source_id":"2505.08617","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:55522dc73e7c92fbce749fbd9ff696eb018d7b3105055782c5091aede9418c45","sha256:a8808271184b7038bf12fb319794d5765d32a00005df52cc7ce8a5ed22cf1668"],"state_sha256":"640e4ce618af0d7fd4bf3427375db16f84079c04e2265112eca1765d424344c8"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Os8a2vuPv1kEr19k3KVoAnEf5MllsexvPXr4xiH4K+Dzo3+fYX98jpQgR9SMDcQ8MhEuht5mUobL9zF8uAgVAA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-05T13:20:22.935513Z","bundle_sha256":"8459756024a4b8b743f3bf38777ce54439186ed82a021bf312e4a0ce92c06ff5"}}