{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:3BQGMULEYEUFJCID75TMSLWB26","short_pith_number":"pith:3BQGMULE","schema_version":"1.0","canonical_sha256":"d860665164c128548903ff66c92ec1d7b845237dcd3b6fda52cf0115ff34bdda","source":{"kind":"arxiv","id":"2606.08464","version":1},"attestation_state":"computed","paper":{"title":"TVI-CoT: Text-Visual Interleaved Chain-of-Thought Reasoning for Multimodal Understanding","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Lianyu Hu, Xiaoyu Ma, Yang Liu, Zeqin Liao","submitted_at":"2026-06-07T05:58:39Z","abstract_excerpt":"Chain-of-thought (CoT) reasoning has proven effective for enhancing problem-solving in large language models. However, when applied to multimodal LLMs (MLLMs), existing CoT approaches suffer from a fundamental limitation: they perform reasoning entirely in text without accessing visual features during the reasoning process. After initial visual encoding, image information becomes inaccessible, forcing models to reason based solely on whatever was captured in the initial description, which forms a `vision-blind reasoning' paradigm that limits fine-grained visual extraction, error verification, "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.08464","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-07T05:58:39Z","cross_cats_sorted":[],"title_canon_sha256":"37cd380912b5991f0b7a92b6b0b365cbd4e60c6aca004dc6a8c7b4cbd6088252","abstract_canon_sha256":"2494c2ac9420797e022369bcf8eac3832fcf595817fd7a0bb8e186fb70d733aa"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-09T01:05:37.284258Z","signature_b64":"IkYOPXEnfAC7wCgTQtJfXorNNhPWtjyiObYZb0Nmf/WdyHiYIl+qyKbtoMje2F+Mplbcf4PhUxACkEMKN5jFCQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"d860665164c128548903ff66c92ec1d7b845237dcd3b6fda52cf0115ff34bdda","last_reissued_at":"2026-06-09T01:05:37.283879Z","signature_status":"signed_v1","first_computed_at":"2026-06-09T01:05:37.283879Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"TVI-CoT: Text-Visual Interleaved Chain-of-Thought Reasoning for Multimodal Understanding","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Lianyu Hu, Xiaoyu Ma, Yang Liu, Zeqin Liao","submitted_at":"2026-06-07T05:58:39Z","abstract_excerpt":"Chain-of-thought (CoT) reasoning has proven effective for enhancing problem-solving in large language models. However, when applied to multimodal LLMs (MLLMs), existing CoT approaches suffer from a fundamental limitation: they perform reasoning entirely in text without accessing visual features during the reasoning process. After initial visual encoding, image information becomes inaccessible, forcing models to reason based solely on whatever was captured in the initial description, which forms a `vision-blind reasoning' paradigm that limits fine-grained visual extraction, error verification, "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.08464","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.08464/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.08464","created_at":"2026-06-09T01:05:37.283939+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.08464v1","created_at":"2026-06-09T01:05:37.283939+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.08464","created_at":"2026-06-09T01:05:37.283939+00:00"},{"alias_kind":"pith_short_12","alias_value":"3BQGMULEYEUF","created_at":"2026-06-09T01:05:37.283939+00:00"},{"alias_kind":"pith_short_16","alias_value":"3BQGMULEYEUFJCID","created_at":"2026-06-09T01:05:37.283939+00:00"},{"alias_kind":"pith_short_8","alias_value":"3BQGMULE","created_at":"2026-06-09T01:05:37.283939+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/3BQGMULEYEUFJCID75TMSLWB26","json":"https://pith.science/pith/3BQGMULEYEUFJCID75TMSLWB26.json","graph_json":"https://pith.science/api/pith-number/3BQGMULEYEUFJCID75TMSLWB26/graph.json","events_json":"https://pith.science/api/pith-number/3BQGMULEYEUFJCID75TMSLWB26/events.json","paper":"https://pith.science/paper/3BQGMULE"},"agent_actions":{"view_html":"https://pith.science/pith/3BQGMULEYEUFJCID75TMSLWB26","download_json":"https://pith.science/pith/3BQGMULEYEUFJCID75TMSLWB26.json","view_paper":"https://pith.science/paper/3BQGMULE","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.08464&json=true","fetch_graph":"https://pith.science/api/pith-number/3BQGMULEYEUFJCID75TMSLWB26/graph.json","fetch_events":"https://pith.science/api/pith-number/3BQGMULEYEUFJCID75TMSLWB26/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/3BQGMULEYEUFJCID75TMSLWB26/action/timestamp_anchor","attest_storage":"https://pith.science/pith/3BQGMULEYEUFJCID75TMSLWB26/action/storage_attestation","attest_author":"https://pith.science/pith/3BQGMULEYEUFJCID75TMSLWB26/action/author_attestation","sign_citation":"https://pith.science/pith/3BQGMULEYEUFJCID75TMSLWB26/action/citation_signature","submit_replication":"https://pith.science/pith/3BQGMULEYEUFJCID75TMSLWB26/action/replication_record"}},"created_at":"2026-06-09T01:05:37.283939+00:00","updated_at":"2026-06-09T01:05:37.283939+00:00"}