{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:3MPOGIXL24SUSB5LZC22VKILGU","short_pith_number":"pith:3MPOGIXL","schema_version":"1.0","canonical_sha256":"db1ee322ebd7254907abc8b5aaa90b352c53a5e5ba1a017606cd042c794eb554","source":{"kind":"arxiv","id":"2602.06566","version":3},"attestation_state":"computed","paper":{"title":"SPARC: Separating Perception And Reasoning Circuits for Test-time Scaling of VLMs","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.CL"],"primary_cat":"cs.CV","authors_text":"Hang Hua, Junling Wang, Konrad Schindler, Li Mi, Mattia Rigotti, Nayanika Debnath, Niccolo Avogaro, Thomas Frick, Zexue He","submitted_at":"2026-02-06T10:05:25Z","abstract_excerpt":"Despite recent successes, test-time scaling -- i.e., dynamically expanding the token budget during inference as needed -- remains brittle for vision-language models (VLMs). Unstructured visual reasoning chains entangle perception and reasoning, leading to long, disorganized contexts where small perceptual mistakes may cascade into completely wrong answers. Reasoning also requires expensive reinforcement learning with hand-crafted rewards. Here, we introduce SPARC (Separating Perception And Reasoning Circuits), a modular framework that explicitly decouples visual perception from reasoning. Insp"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2602.06566","kind":"arxiv","version":3},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-02-06T10:05:25Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"e68a8aeaae7bb29e8722a445e652b514318f66f6baefec257eb68c2faa14b69f","abstract_canon_sha256":"c3bcb9364f7aab6ff571daa1d7243166dfdf1f75dd708c7509c0fb61f49c1974"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-25T01:18:36.996213Z","signature_b64":"mNxL0JaB5fbQH6qTUT/yYiZ/GCk5VuTQBbklzhoWkFRlXkCWLo3ai2T61tZJz0q/6F9FX+n8Y8QYkUs1IPihAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"db1ee322ebd7254907abc8b5aaa90b352c53a5e5ba1a017606cd042c794eb554","last_reissued_at":"2026-06-25T01:18:36.995730Z","signature_status":"signed_v1","first_computed_at":"2026-06-25T01:18:36.995730Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"SPARC: Separating Perception And Reasoning Circuits for Test-time Scaling of VLMs","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.CL"],"primary_cat":"cs.CV","authors_text":"Hang Hua, Junling Wang, Konrad Schindler, Li Mi, Mattia Rigotti, Nayanika Debnath, Niccolo Avogaro, Thomas Frick, Zexue He","submitted_at":"2026-02-06T10:05:25Z","abstract_excerpt":"Despite recent successes, test-time scaling -- i.e., dynamically expanding the token budget during inference as needed -- remains brittle for vision-language models (VLMs). Unstructured visual reasoning chains entangle perception and reasoning, leading to long, disorganized contexts where small perceptual mistakes may cascade into completely wrong answers. Reasoning also requires expensive reinforcement learning with hand-crafted rewards. Here, we introduce SPARC (Separating Perception And Reasoning Circuits), a modular framework that explicitly decouples visual perception from reasoning. Insp"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2602.06566","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2602.06566/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2602.06566","created_at":"2026-06-25T01:18:36.995786+00:00"},{"alias_kind":"arxiv_version","alias_value":"2602.06566v3","created_at":"2026-06-25T01:18:36.995786+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2602.06566","created_at":"2026-06-25T01:18:36.995786+00:00"},{"alias_kind":"pith_short_12","alias_value":"3MPOGIXL24SU","created_at":"2026-06-25T01:18:36.995786+00:00"},{"alias_kind":"pith_short_16","alias_value":"3MPOGIXL24SUSB5L","created_at":"2026-06-25T01:18:36.995786+00:00"},{"alias_kind":"pith_short_8","alias_value":"3MPOGIXL","created_at":"2026-06-25T01:18:36.995786+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":4,"internal_anchor_count":4,"sample":[{"citing_arxiv_id":"2602.18600","citing_title":"MapTab: Are MLLMs Ready for Multi-Criteria Route Planning in Heterogeneous Graphs?","ref_index":4,"is_internal_anchor":true},{"citing_arxiv_id":"2605.18652","citing_title":"MementoGUI: Learning Agentic Multimodal Memory Control for Long-Horizon GUI Agents","ref_index":2,"is_internal_anchor":true},{"citing_arxiv_id":"2602.18600","citing_title":"MapTab: Are MLLMs Ready for Multi-Criteria Route Planning in Heterogeneous Graphs?","ref_index":4,"is_internal_anchor":true},{"citing_arxiv_id":"2605.07817","citing_title":"GazeVLM: Active Vision via Internal Attention Control for Multimodal Reasoning","ref_index":2,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/3MPOGIXL24SUSB5LZC22VKILGU","json":"https://pith.science/pith/3MPOGIXL24SUSB5LZC22VKILGU.json","graph_json":"https://pith.science/api/pith-number/3MPOGIXL24SUSB5LZC22VKILGU/graph.json","events_json":"https://pith.science/api/pith-number/3MPOGIXL24SUSB5LZC22VKILGU/events.json","paper":"https://pith.science/paper/3MPOGIXL"},"agent_actions":{"view_html":"https://pith.science/pith/3MPOGIXL24SUSB5LZC22VKILGU","download_json":"https://pith.science/pith/3MPOGIXL24SUSB5LZC22VKILGU.json","view_paper":"https://pith.science/paper/3MPOGIXL","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2602.06566&json=true","fetch_graph":"https://pith.science/api/pith-number/3MPOGIXL24SUSB5LZC22VKILGU/graph.json","fetch_events":"https://pith.science/api/pith-number/3MPOGIXL24SUSB5LZC22VKILGU/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/3MPOGIXL24SUSB5LZC22VKILGU/action/timestamp_anchor","attest_storage":"https://pith.science/pith/3MPOGIXL24SUSB5LZC22VKILGU/action/storage_attestation","attest_author":"https://pith.science/pith/3MPOGIXL24SUSB5LZC22VKILGU/action/author_attestation","sign_citation":"https://pith.science/pith/3MPOGIXL24SUSB5LZC22VKILGU/action/citation_signature","submit_replication":"https://pith.science/pith/3MPOGIXL24SUSB5LZC22VKILGU/action/replication_record"}},"created_at":"2026-06-25T01:18:36.995786+00:00","updated_at":"2026-06-25T01:18:36.995786+00:00"}