{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:SCF7EQVEK6FTZY5KSXS4Z5R2SK","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"2aace5a36849de6fde91afc753105b5554fc5a4e04de5e821ea7947722357a4e","cross_cats_sorted":["cs.RO"],"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2025-11-18T18:52:22Z","title_canon_sha256":"40221f17d47a6be85094c05fdf06a5182eca98f2189ae5158aac26e397e62cba"},"schema_version":"1.0","source":{"id":"2511.14751","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2511.14751","created_at":"2026-05-17T23:39:00Z"},{"alias_kind":"arxiv_version","alias_value":"2511.14751v2","created_at":"2026-05-17T23:39:00Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2511.14751","created_at":"2026-05-17T23:39:00Z"},{"alias_kind":"pith_short_12","alias_value":"SCF7EQVEK6FT","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"SCF7EQVEK6FTZY5K","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"SCF7EQVE","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:0932995c224aab80c479a700df2165f8f9c0b9419e387c6a1ae593ba1dc36b56","target":"graph","created_at":"2026-05-17T23:39:00Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"When applied to VGGT and Pi3, Co-Me achieves up to 21.5x and 20.4x speedup, making visual geometric transformers practical for real-time 3D perception and reconstruction."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That a distilled lightweight confidence predictor can reliably rank tokens by uncertainty in a manner that matches regions emphasized by the transformer, enabling substantial acceleration without degrading performance across multi-view and streaming setups."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"Co-Me distills a confidence predictor to selectively merge low-confidence tokens in visual geometric transformers, delivering up to 21.5x speedup on VGGT and 20.4x on Pi3 while preserving spatial coverage and performance."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"A distilled confidence predictor ranks and merges low-uncertainty tokens to accelerate visual geometric transformers up to 21 times without retraining."}],"snapshot_sha256":"4930bc7846b2de09a194ff8830bc0a72a1169d406c5444172e4265aad6feba26"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We propose Confidence-Guided Token Merging (Co-Me), an acceleration mechanism for visual geometric transformers without retraining or finetuning the base model. Co-Me distilled a light-weight confidence predictor to rank tokens by uncertainty and selectively merge low-confidence ones, effectively reducing computation while maintaining spatial coverage. Compared to similarity-based merging or pruning, the confidence signal in Co-Me reliably indicates regions emphasized by the transformer, enabling substantial acceleration without degrading performance. Co-Me applies seamlessly to various multi-","authors_text":"Ali Agha, Jay Patrikar, Ruogu Li, Sebastian Scherer, Shayegan Omidshafiei, Yuheng Qiu, Yutian Chen","cross_cats":["cs.RO"],"headline":"A distilled confidence predictor ranks and merges low-uncertainty tokens to accelerate visual geometric transformers up to 21 times without retraining.","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2025-11-18T18:52:22Z","title":"Co-Me: Confidence-Guided Token Merging for Visual Geometric Transformers"},"references":{"count":44,"internal_anchors":2,"resolved_work":44,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"Large-scale data for multiple-view stereopsis.International Journal of Computer Vision, pages 1–16, 2016","work_id":"9704adcc-e422-4599-9c99-4af55ad87af3","year":2016},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"Token merging for fast sta- ble diffusion","work_id":"9cc69a3c-fb65-454e-9bce-9b4061b94cf3","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"Token merging: Your vit but faster, 2023","work_id":"2324203d-0a9b-4019-a556-d1c919dfe818","year":2023},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"Learning to rank using gradient descent","work_id":"d887c317-abd8-498b-b55b-3518fb99e431","year":2005},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"Must3r: Multi-view network for stereo 3d reconstruc- tion, 2025","work_id":"9dc0ed82-1ee5-4402-9ebe-6b4f3b134f3e","year":2025}],"snapshot_sha256":"e941ba9571ed93b244538976c77d18782039c36c97762e105e9e957403bdf1de"},"source":{"id":"2511.14751","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-17T20:32:47.511581Z","id":"7877cbb3-6a7e-4fcc-85d1-b3d1058155f4","model_set":{"reader":"grok-4.3"},"one_line_summary":"Co-Me distills a confidence predictor to selectively merge low-confidence tokens in visual geometric transformers, delivering up to 21.5x speedup on VGGT and 20.4x on Pi3 while preserving spatial coverage and performance.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"A distilled confidence predictor ranks and merges low-uncertainty tokens to accelerate visual geometric transformers up to 21 times without retraining.","strongest_claim":"When applied to VGGT and Pi3, Co-Me achieves up to 21.5x and 20.4x speedup, making visual geometric transformers practical for real-time 3D perception and reconstruction.","weakest_assumption":"That a distilled lightweight confidence predictor can reliably rank tokens by uncertainty in a manner that matches regions emphasized by the transformer, enabling substantial acceleration without degrading performance across multi-view and streaming setups."}},"verdict_id":"7877cbb3-6a7e-4fcc-85d1-b3d1058155f4"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:ea5ffc72f09e3d635546402eb451b5585b43fd597cc6a19de9389875e972c9f4","target":"record","created_at":"2026-05-17T23:39:00Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"2aace5a36849de6fde91afc753105b5554fc5a4e04de5e821ea7947722357a4e","cross_cats_sorted":["cs.RO"],"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2025-11-18T18:52:22Z","title_canon_sha256":"40221f17d47a6be85094c05fdf06a5182eca98f2189ae5158aac26e397e62cba"},"schema_version":"1.0","source":{"id":"2511.14751","kind":"arxiv","version":2}},"canonical_sha256":"908bf242a4578b3ce3aa95e5ccf63a92a0679976428f6443052c6f4c1669e621","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"908bf242a4578b3ce3aa95e5ccf63a92a0679976428f6443052c6f4c1669e621","first_computed_at":"2026-05-17T23:39:00.734159Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:39:00.734159Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"uYbZfS30DP5oPCvjLR7r54tOYbiX5IJagJmCBozEP4xFx7hIm/WZ0IAk50CFKBf7SiB9G+NstTPnmdHvQlXNDQ==","signature_status":"signed_v1","signed_at":"2026-05-17T23:39:00.734872Z","signed_message":"canonical_sha256_bytes"},"source_id":"2511.14751","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:ea5ffc72f09e3d635546402eb451b5585b43fd597cc6a19de9389875e972c9f4","sha256:0932995c224aab80c479a700df2165f8f9c0b9419e387c6a1ae593ba1dc36b56"],"state_sha256":"3c469ad21b38aa9b698a8b2f64d7f00ae4a060677ef074bea331d868b5574201"}