{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:YRDHXJQD42DAWRYYHVKYI5RHPG","short_pith_number":"pith:YRDHXJQD","schema_version":"1.0","canonical_sha256":"c4467ba603e6860b47183d558476277985c95b2c14fe37e64ae3a53d8e443f23","source":{"kind":"arxiv","id":"2511.17731","version":2},"attestation_state":"computed","paper":{"title":"VisReason: A Large-Scale Dataset for Visual Chain-of-Thought Reasoning","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.CV","authors_text":"Chen Tang, Chenyu You, Lingxiao Li, Xiangyu Yue, Xinyan Gao, Yifan Wang","submitted_at":"2025-11-21T19:30:24Z","abstract_excerpt":"Chain-of-Thought (CoT) prompting has proven remarkably effective for eliciting complex reasoning in large language models (LLMs). Yet, its potential in multimodal large language models (MLLMs) remains largely untapped, hindered by the absence of large-scale datasets that capture the rich, spatially grounded reasoning intrinsic to visual understanding. Existing visual-CoT resources are typically small, domain-specific, or lack the human-like stepwise structure necessary for compositional visual reasoning. In this paper, we introduce VisReason, a large-scale dataset designed to advance visual Ch"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2511.17731","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2025-11-21T19:30:24Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"6a32ed49f8d6f48279419c9323f9b97368ec4c07a9c59c09bbe5f90de5b2fc20","abstract_canon_sha256":"cee675141c2af17ba724a3d302ffdea15a28890c17b29a6af3a6e7f892e91c9e"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-02T00:18:26.227955Z","signature_b64":"6+77S3to9kLZ0ysind8N+U8Lv9GOQTG+78QuQJEHsUgfkSAzx9QOWrVVGvdR0dhFQLxK+kV6jQa9G3wh8XtjAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"c4467ba603e6860b47183d558476277985c95b2c14fe37e64ae3a53d8e443f23","last_reissued_at":"2026-07-02T00:18:26.227311Z","signature_status":"signed_v1","first_computed_at":"2026-07-02T00:18:26.227311Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"VisReason: A Large-Scale Dataset for Visual Chain-of-Thought Reasoning","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.CV","authors_text":"Chen Tang, Chenyu You, Lingxiao Li, Xiangyu Yue, Xinyan Gao, Yifan Wang","submitted_at":"2025-11-21T19:30:24Z","abstract_excerpt":"Chain-of-Thought (CoT) prompting has proven remarkably effective for eliciting complex reasoning in large language models (LLMs). Yet, its potential in multimodal large language models (MLLMs) remains largely untapped, hindered by the absence of large-scale datasets that capture the rich, spatially grounded reasoning intrinsic to visual understanding. Existing visual-CoT resources are typically small, domain-specific, or lack the human-like stepwise structure necessary for compositional visual reasoning. In this paper, we introduce VisReason, a large-scale dataset designed to advance visual Ch"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2511.17731","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2511.17731/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2511.17731","created_at":"2026-07-02T00:18:26.227416+00:00"},{"alias_kind":"arxiv_version","alias_value":"2511.17731v2","created_at":"2026-07-02T00:18:26.227416+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2511.17731","created_at":"2026-07-02T00:18:26.227416+00:00"},{"alias_kind":"pith_short_12","alias_value":"YRDHXJQD42DA","created_at":"2026-07-02T00:18:26.227416+00:00"},{"alias_kind":"pith_short_16","alias_value":"YRDHXJQD42DAWRYY","created_at":"2026-07-02T00:18:26.227416+00:00"},{"alias_kind":"pith_short_8","alias_value":"YRDHXJQD","created_at":"2026-07-02T00:18:26.227416+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"2605.20177","citing_title":"From Seeing to Thinking: Decoupling Perception and Reasoning Improves Post-Training of Vision-Language Models","ref_index":14,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/YRDHXJQD42DAWRYYHVKYI5RHPG","json":"https://pith.science/pith/YRDHXJQD42DAWRYYHVKYI5RHPG.json","graph_json":"https://pith.science/api/pith-number/YRDHXJQD42DAWRYYHVKYI5RHPG/graph.json","events_json":"https://pith.science/api/pith-number/YRDHXJQD42DAWRYYHVKYI5RHPG/events.json","paper":"https://pith.science/paper/YRDHXJQD"},"agent_actions":{"view_html":"https://pith.science/pith/YRDHXJQD42DAWRYYHVKYI5RHPG","download_json":"https://pith.science/pith/YRDHXJQD42DAWRYYHVKYI5RHPG.json","view_paper":"https://pith.science/paper/YRDHXJQD","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2511.17731&json=true","fetch_graph":"https://pith.science/api/pith-number/YRDHXJQD42DAWRYYHVKYI5RHPG/graph.json","fetch_events":"https://pith.science/api/pith-number/YRDHXJQD42DAWRYYHVKYI5RHPG/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/YRDHXJQD42DAWRYYHVKYI5RHPG/action/timestamp_anchor","attest_storage":"https://pith.science/pith/YRDHXJQD42DAWRYYHVKYI5RHPG/action/storage_attestation","attest_author":"https://pith.science/pith/YRDHXJQD42DAWRYYHVKYI5RHPG/action/author_attestation","sign_citation":"https://pith.science/pith/YRDHXJQD42DAWRYYHVKYI5RHPG/action/citation_signature","submit_replication":"https://pith.science/pith/YRDHXJQD42DAWRYYHVKYI5RHPG/action/replication_record"}},"created_at":"2026-07-02T00:18:26.227416+00:00","updated_at":"2026-07-02T00:18:26.227416+00:00"}