{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:ZI3N2TSVW7EDBHSIGDGLSLIKMK","short_pith_number":"pith:ZI3N2TSV","schema_version":"1.0","canonical_sha256":"ca36dd4e55b7c8309e4830ccb92d0a62b2430e457993fe6c840f6eca8f73293b","source":{"kind":"arxiv","id":"2605.29535","version":1},"attestation_state":"computed","paper":{"title":"AsymVLM: Asymmetric Token Pruning for Efficient Vision-Language Model Inference","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Ahmed Burak Gulhan, Mahmut Taylan Kandemir, Yilin Feng","submitted_at":"2026-05-28T07:49:45Z","abstract_excerpt":"Vision-Language Models (VLMs) process thousands of visual tokens per image alongside comparatively few text tokens, yet existing compression methods treat both modalities uniformly. We observe that the two modalities have fundamentally different properties: vision tokens are spatially redundant and dominate prefill, while text tokens are causally dependent and accumulate during decoding. Based on this asymmetry, we propose and empirically evaluate AsymVLM, which applies aggressive pruning to vision tokens before prefill using a learned importance scorer with per-sample adaptive budgeting, and "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.29535","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-28T07:49:45Z","cross_cats_sorted":[],"title_canon_sha256":"b30fbd1de93510b5de84410488f6ca36388ede5d96ba7a10993c6509181e6f14","abstract_canon_sha256":"bb273d869718c71016b0757af6363df8d5555ed346a228f4724670f87410adf7"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-29T01:05:45.783034Z","signature_b64":"dvYlKvWuwT7BHqMi+qWk1z1Fo+XlzOonlaGMaRFCFSuxVXYA3E6VtzO6k0Kyvvr/7tam/WuUD+P3HERNxVWyAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"ca36dd4e55b7c8309e4830ccb92d0a62b2430e457993fe6c840f6eca8f73293b","last_reissued_at":"2026-05-29T01:05:45.782305Z","signature_status":"signed_v1","first_computed_at":"2026-05-29T01:05:45.782305Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"AsymVLM: Asymmetric Token Pruning for Efficient Vision-Language Model Inference","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Ahmed Burak Gulhan, Mahmut Taylan Kandemir, Yilin Feng","submitted_at":"2026-05-28T07:49:45Z","abstract_excerpt":"Vision-Language Models (VLMs) process thousands of visual tokens per image alongside comparatively few text tokens, yet existing compression methods treat both modalities uniformly. We observe that the two modalities have fundamentally different properties: vision tokens are spatially redundant and dominate prefill, while text tokens are causally dependent and accumulate during decoding. Based on this asymmetry, we propose and empirically evaluate AsymVLM, which applies aggressive pruning to vision tokens before prefill using a learned importance scorer with per-sample adaptive budgeting, and "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.29535","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.29535/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.29535","created_at":"2026-05-29T01:05:45.782431+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.29535v1","created_at":"2026-05-29T01:05:45.782431+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.29535","created_at":"2026-05-29T01:05:45.782431+00:00"},{"alias_kind":"pith_short_12","alias_value":"ZI3N2TSVW7ED","created_at":"2026-05-29T01:05:45.782431+00:00"},{"alias_kind":"pith_short_16","alias_value":"ZI3N2TSVW7EDBHSI","created_at":"2026-05-29T01:05:45.782431+00:00"},{"alias_kind":"pith_short_8","alias_value":"ZI3N2TSV","created_at":"2026-05-29T01:05:45.782431+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/ZI3N2TSVW7EDBHSIGDGLSLIKMK","json":"https://pith.science/pith/ZI3N2TSVW7EDBHSIGDGLSLIKMK.json","graph_json":"https://pith.science/api/pith-number/ZI3N2TSVW7EDBHSIGDGLSLIKMK/graph.json","events_json":"https://pith.science/api/pith-number/ZI3N2TSVW7EDBHSIGDGLSLIKMK/events.json","paper":"https://pith.science/paper/ZI3N2TSV"},"agent_actions":{"view_html":"https://pith.science/pith/ZI3N2TSVW7EDBHSIGDGLSLIKMK","download_json":"https://pith.science/pith/ZI3N2TSVW7EDBHSIGDGLSLIKMK.json","view_paper":"https://pith.science/paper/ZI3N2TSV","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.29535&json=true","fetch_graph":"https://pith.science/api/pith-number/ZI3N2TSVW7EDBHSIGDGLSLIKMK/graph.json","fetch_events":"https://pith.science/api/pith-number/ZI3N2TSVW7EDBHSIGDGLSLIKMK/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/ZI3N2TSVW7EDBHSIGDGLSLIKMK/action/timestamp_anchor","attest_storage":"https://pith.science/pith/ZI3N2TSVW7EDBHSIGDGLSLIKMK/action/storage_attestation","attest_author":"https://pith.science/pith/ZI3N2TSVW7EDBHSIGDGLSLIKMK/action/author_attestation","sign_citation":"https://pith.science/pith/ZI3N2TSVW7EDBHSIGDGLSLIKMK/action/citation_signature","submit_replication":"https://pith.science/pith/ZI3N2TSVW7EDBHSIGDGLSLIKMK/action/replication_record"}},"created_at":"2026-05-29T01:05:45.782431+00:00","updated_at":"2026-05-29T01:05:45.782431+00:00"}