{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:PEB7KHZBUFDVMJGTATS4TKM3B3","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"c232160f0797ae950e525d96cb59643dcc2b03a9e1f25f346b790c78a0bba859","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-06-07T16:29:49Z","title_canon_sha256":"0d0ad1ec68a836f42caeede977b225fc9587c75b163114ee8360a980014ec78e"},"schema_version":"1.0","source":{"id":"2606.08719","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.08719","created_at":"2026-06-09T02:07:35Z"},{"alias_kind":"arxiv_version","alias_value":"2606.08719v1","created_at":"2026-06-09T02:07:35Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.08719","created_at":"2026-06-09T02:07:35Z"},{"alias_kind":"pith_short_12","alias_value":"PEB7KHZBUFDV","created_at":"2026-06-09T02:07:35Z"},{"alias_kind":"pith_short_16","alias_value":"PEB7KHZBUFDVMJGT","created_at":"2026-06-09T02:07:35Z"},{"alias_kind":"pith_short_8","alias_value":"PEB7KHZB","created_at":"2026-06-09T02:07:35Z"}],"graph_snapshots":[{"event_id":"sha256:823ad571d1b922ff6488ac76b66a0c4cabd31b01f183bd8256424d591fe6a482","target":"graph","created_at":"2026-06-09T02:07:35Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.08719/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"''Thinking with Images'' has emerged as an effective paradigm for fine-grained visual reasoning: by explicitly zooming into relevant regions and reasoning over crops, models can access local evidence that is difficult to recover from a single global image. However, this benefit comes with redundant tool invocations and longer inference traces. Moreover, when such behaviors are learned mainly from outcome reward, the resulting intermediate crops or visual cues can be noisy or fail to faithfully capture task-relevant visual evidence. In this work, we ask whether the reasoning benefits of ''Think","authors_text":"Haobo Deng, Haoli Bai, Jiahui Liu, Kun Ouyang, Linli Yao, Xiaohui Li, Xu Sun, Yishuo Cai, Yuanxin Liu, Yuhao Zheng, Zhimo Li, Ziyue Wang","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-06-07T16:29:49Z","title":"Thinking Without Images: Internalizing Visual Manipulation with On-Policy Self-Distillation"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.08719","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:c1751bdca554dd2644eaffe77672b059ebf54f504102947267b9b1c625aafd80","target":"record","created_at":"2026-06-09T02:07:35Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"c232160f0797ae950e525d96cb59643dcc2b03a9e1f25f346b790c78a0bba859","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-06-07T16:29:49Z","title_canon_sha256":"0d0ad1ec68a836f42caeede977b225fc9587c75b163114ee8360a980014ec78e"},"schema_version":"1.0","source":{"id":"2606.08719","kind":"arxiv","version":1}},"canonical_sha256":"7903f51f21a1475624d304e5c9a99b0eeaf1b6614113896315556a5ebf82a2f5","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"7903f51f21a1475624d304e5c9a99b0eeaf1b6614113896315556a5ebf82a2f5","first_computed_at":"2026-06-09T02:07:35.188377Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-09T02:07:35.188377Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"3mvoB6py7rBAKhDjC86lAef8RvW1afdmo/q7P9kZ62t0wMGKu6H2mKC820CMfhjslwfw6EnJh6ClAUFCuVaACg==","signature_status":"signed_v1","signed_at":"2026-06-09T02:07:35.190966Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.08719","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:c1751bdca554dd2644eaffe77672b059ebf54f504102947267b9b1c625aafd80","sha256:823ad571d1b922ff6488ac76b66a0c4cabd31b01f183bd8256424d591fe6a482"],"state_sha256":"8254b8fce49d9591f52fe595efb05570511247b4cb3b0368592bb09fd28643b6"}