{"work":{"id":"ef2b3279-e78f-44b7-abab-ed7d011dc1cf","openalex_id":null,"doi":null,"arxiv_id":"2312.16886","raw_key":null,"title":"MobileVLM : A Fast, Strong and Open Vision Language Assistant for Mobile Devices","authors":null,"authors_text":"Xiangxiang Chu, Limeng Qiao, Xinyang Lin, Shuang Xu, Yang Yang, Yiming Hu","year":2023,"venue":"cs.CV","abstract":"We present MobileVLM, a competent multimodal vision language model (MMVLM) targeted to run on mobile devices. It is an amalgamation of a myriad of architectural designs and techniques that are mobile-oriented, which comprises a set of language models at the scale of 1.4B and 2.7B parameters, trained from scratch, a multimodal vision model that is pre-trained in the CLIP fashion, cross-modality interaction via an efficient projector. We evaluate MobileVLM on several typical VLM benchmarks. Our models demonstrate on par performance compared with a few much larger models. More importantly, we measure the inference speed on both a Qualcomm Snapdragon 888 CPU and an NVIDIA Jeston Orin GPU, and we obtain state-of-the-art performance of 21.5 tokens and 65.3 tokens per second, respectively. Our code will be made available at: https://github.com/Meituan-AutoML/MobileVLM.","external_url":"https://arxiv.org/abs/2312.16886","cited_by_count":null,"metadata_source":"pith","metadata_fetched_at":"2026-05-18T20:21:49.920952+00:00","pith_arxiv_id":"2312.16886","created_at":"2026-05-10T11:25:18.592836+00:00","updated_at":"2026-05-18T20:21:49.920952+00:00","title_quality_ok":true,"display_title":"MobileVLM : A Fast, Strong and Open Vision Language Assistant for Mobile Devices","render_title":"MobileVLM : A Fast, Strong and Open Vision Language Assistant for Mobile Devices"},"hub":{"state":{"work_id":"ef2b3279-e78f-44b7-abab-ed7d011dc1cf","tier":"hub","tier_reason":"10+ Pith inbound or 1,000+ external citations","pith_inbound_count":22,"external_cited_by_count":null,"distinct_field_count":5,"first_pith_cited_at":"2023-06-23T15:21:52+00:00","last_pith_cited_at":"2026-05-11T14:28:44+00:00","author_build_status":"not_needed","summary_status":"needed","contexts_status":"needed","graph_status":"needed","ask_index_status":"not_needed","reader_status":"not_needed","recognition_status":"not_needed","updated_at":"2026-05-25T04:35:26.946594+00:00","tier_text":"hub"},"tier":"hub","role_counts":[{"context_role":"background","n":8},{"context_role":"baseline","n":3}],"polarity_counts":[{"context_polarity":"background","n":7},{"context_polarity":"baseline","n":3},{"context_polarity":"unclear","n":1}],"runs":{},"summary":{},"graph":{},"authors":[]}}