{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:UGZKVVX66NAYVVALOS7KV6QQA7","short_pith_number":"pith:UGZKVVX6","canonical_record":{"source":{"id":"2605.26520","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-26T04:07:49Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"3c437204268ef94e5212aa8a418e06b76fa7d9fce6dce9dfbb1bded888ae8601","abstract_canon_sha256":"6b8a02df3ac6533a51e26a6766e00e1967bfb1aac15e317545a42d250c32b183"},"schema_version":"1.0"},"canonical_sha256":"a1b2aad6fef3418ad40b74beaafa1007ef2aa469bade697951b3efb8bd6f47f1","source":{"kind":"arxiv","id":"2605.26520","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.26520","created_at":"2026-05-27T01:05:24Z"},{"alias_kind":"arxiv_version","alias_value":"2605.26520v1","created_at":"2026-05-27T01:05:24Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.26520","created_at":"2026-05-27T01:05:24Z"},{"alias_kind":"pith_short_12","alias_value":"UGZKVVX66NAY","created_at":"2026-05-27T01:05:24Z"},{"alias_kind":"pith_short_16","alias_value":"UGZKVVX66NAYVVAL","created_at":"2026-05-27T01:05:24Z"},{"alias_kind":"pith_short_8","alias_value":"UGZKVVX6","created_at":"2026-05-27T01:05:24Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:UGZKVVX66NAYVVALOS7KV6QQA7","target":"record","payload":{"canonical_record":{"source":{"id":"2605.26520","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-26T04:07:49Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"3c437204268ef94e5212aa8a418e06b76fa7d9fce6dce9dfbb1bded888ae8601","abstract_canon_sha256":"6b8a02df3ac6533a51e26a6766e00e1967bfb1aac15e317545a42d250c32b183"},"schema_version":"1.0"},"canonical_sha256":"a1b2aad6fef3418ad40b74beaafa1007ef2aa469bade697951b3efb8bd6f47f1","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-27T01:05:24.851088Z","signature_b64":"RmEQtxIoJrf02iJgVm41ywwIs4psJ2SPKtLIe3QnoNCCp5ATKe9cM6P1QI4XcjFgh+kqJnYTU4OwadW98kBLDw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"a1b2aad6fef3418ad40b74beaafa1007ef2aa469bade697951b3efb8bd6f47f1","last_reissued_at":"2026-05-27T01:05:24.850396Z","signature_status":"signed_v1","first_computed_at":"2026-05-27T01:05:24.850396Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.26520","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-27T01:05:24Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"cUtS8QzpKM/5SF8c/wIn+016SJfaIiFiyzIbs5Q81fjf+HOSlu6krWCmRgMf6BEesNrtwOcQQ1FCjJ2PG/p5Aw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-05T00:42:13.368252Z"},"content_sha256":"18f2a3195b58d96fa99a97cf27b3e234842f58525a95ab28090a1e4fc74a4a6e","schema_version":"1.0","event_id":"sha256:18f2a3195b58d96fa99a97cf27b3e234842f58525a95ab28090a1e4fc74a4a6e"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:UGZKVVX66NAYVVALOS7KV6QQA7","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"InterSketch: An Interleaved Reasoning Model with Self-correcting Visual Sketch and Stepwise Reward","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CV","authors_text":"HanMing Deng, Jie Yang, Jingcheng Ni, Jixuan Ying, Lewei Lu, Shengnan Ma, Tao Hu, Wei Liu, Wenwen Tong, Xiangli Kong, Yong Xien Chng, Yuanjie Zheng, Zehuan Wu, Zhiwei Ning, Ziyi Shang","submitted_at":"2026-05-26T04:07:49Z","abstract_excerpt":"While vision-language models (VLMs) have exhibited multi-turn visual reasoning capabilities, their reasoning trajectories remain relatively shallow and are dominated by a text-centric paradigm, limiting their applicability to complex visual challenges. In contrast, human-like thought typically involves long-horizon reasoning with an interleaved visual-textual chain-of-thought (VT-CoT). To bridge this gap, we introduce InterSketch, an interleaved reasoning model to enhance the VT-CoT capability via self-correcting and stepwise reward mechanisms. InterSketch dynamically generates intermediate vi"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.26520","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.26520/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-27T01:05:24Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"lcdnYbakXb8GjZwUYSyDgShlABtomps/l4Js0emW3DH5aRoeK/EDxBORkEWQtoH5yt+0/lcPIRoPKwaJDISpDw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-05T00:42:13.368655Z"},"content_sha256":"25756df6aa806f1cc2a20fd4d08a21cdf4ef3340ea703d133150cb549646f917","schema_version":"1.0","event_id":"sha256:25756df6aa806f1cc2a20fd4d08a21cdf4ef3340ea703d133150cb549646f917"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/UGZKVVX66NAYVVALOS7KV6QQA7/bundle.json","state_url":"https://pith.science/pith/UGZKVVX66NAYVVALOS7KV6QQA7/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/UGZKVVX66NAYVVALOS7KV6QQA7/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-05T00:42:13Z","links":{"resolver":"https://pith.science/pith/UGZKVVX66NAYVVALOS7KV6QQA7","bundle":"https://pith.science/pith/UGZKVVX66NAYVVALOS7KV6QQA7/bundle.json","state":"https://pith.science/pith/UGZKVVX66NAYVVALOS7KV6QQA7/state.json","well_known_bundle":"https://pith.science/.well-known/pith/UGZKVVX66NAYVVALOS7KV6QQA7/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:UGZKVVX66NAYVVALOS7KV6QQA7","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"6b8a02df3ac6533a51e26a6766e00e1967bfb1aac15e317545a42d250c32b183","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-26T04:07:49Z","title_canon_sha256":"3c437204268ef94e5212aa8a418e06b76fa7d9fce6dce9dfbb1bded888ae8601"},"schema_version":"1.0","source":{"id":"2605.26520","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.26520","created_at":"2026-05-27T01:05:24Z"},{"alias_kind":"arxiv_version","alias_value":"2605.26520v1","created_at":"2026-05-27T01:05:24Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.26520","created_at":"2026-05-27T01:05:24Z"},{"alias_kind":"pith_short_12","alias_value":"UGZKVVX66NAY","created_at":"2026-05-27T01:05:24Z"},{"alias_kind":"pith_short_16","alias_value":"UGZKVVX66NAYVVAL","created_at":"2026-05-27T01:05:24Z"},{"alias_kind":"pith_short_8","alias_value":"UGZKVVX6","created_at":"2026-05-27T01:05:24Z"}],"graph_snapshots":[{"event_id":"sha256:25756df6aa806f1cc2a20fd4d08a21cdf4ef3340ea703d133150cb549646f917","target":"graph","created_at":"2026-05-27T01:05:24Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.26520/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"While vision-language models (VLMs) have exhibited multi-turn visual reasoning capabilities, their reasoning trajectories remain relatively shallow and are dominated by a text-centric paradigm, limiting their applicability to complex visual challenges. In contrast, human-like thought typically involves long-horizon reasoning with an interleaved visual-textual chain-of-thought (VT-CoT). To bridge this gap, we introduce InterSketch, an interleaved reasoning model to enhance the VT-CoT capability via self-correcting and stepwise reward mechanisms. InterSketch dynamically generates intermediate vi","authors_text":"HanMing Deng, Jie Yang, Jingcheng Ni, Jixuan Ying, Lewei Lu, Shengnan Ma, Tao Hu, Wei Liu, Wenwen Tong, Xiangli Kong, Yong Xien Chng, Yuanjie Zheng, Zehuan Wu, Zhiwei Ning, Ziyi Shang","cross_cats":["cs.AI"],"headline":"","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-26T04:07:49Z","title":"InterSketch: An Interleaved Reasoning Model with Self-correcting Visual Sketch and Stepwise Reward"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.26520","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:18f2a3195b58d96fa99a97cf27b3e234842f58525a95ab28090a1e4fc74a4a6e","target":"record","created_at":"2026-05-27T01:05:24Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"6b8a02df3ac6533a51e26a6766e00e1967bfb1aac15e317545a42d250c32b183","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-26T04:07:49Z","title_canon_sha256":"3c437204268ef94e5212aa8a418e06b76fa7d9fce6dce9dfbb1bded888ae8601"},"schema_version":"1.0","source":{"id":"2605.26520","kind":"arxiv","version":1}},"canonical_sha256":"a1b2aad6fef3418ad40b74beaafa1007ef2aa469bade697951b3efb8bd6f47f1","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"a1b2aad6fef3418ad40b74beaafa1007ef2aa469bade697951b3efb8bd6f47f1","first_computed_at":"2026-05-27T01:05:24.850396Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-27T01:05:24.850396Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"RmEQtxIoJrf02iJgVm41ywwIs4psJ2SPKtLIe3QnoNCCp5ATKe9cM6P1QI4XcjFgh+kqJnYTU4OwadW98kBLDw==","signature_status":"signed_v1","signed_at":"2026-05-27T01:05:24.851088Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.26520","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:18f2a3195b58d96fa99a97cf27b3e234842f58525a95ab28090a1e4fc74a4a6e","sha256:25756df6aa806f1cc2a20fd4d08a21cdf4ef3340ea703d133150cb549646f917"],"state_sha256":"60d32eacd421d6618514c25789f5b2fb03f424b6633ba60bcf027296d3ee5d22"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ZeRHpfx/Na5xDBo63jVepdR11RIqYVg+1+dQwZ3SG99zwyl7gK3EICqwTjf5lY+mU/X2eyGV0WltorOF/B3/Aw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-05T00:42:13.370816Z","bundle_sha256":"acafb1abb796b73a409a773fb2c2af0e84541664531357916094b582dc2f76f2"}}