{"work_id":"1fe243aa-e3c0-4da6-b391-4cbcfc88d5c0","graph":{"co_cited":[{"title":"Qwen2.5-VL Technical Report","work_id":"69dffacb-bfe8-442d-be86-48624c60426f","shared_citers":100},{"title":"InternVL3.5: Advancing Open-Source Multimodal Models in Versatility, Reasoning, and Efficiency","work_id":"b8f5e260-fff5-444e-bcf5-2c42cfefd83d","shared_citers":90},{"title":"DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models","work_id":"c5006563-f3ec-438a-9e35-b7b484f34828","shared_citers":72},{"title":"Gemini 2.5: Pushing the Frontier with Advanced Reasoning, Multimodality, Long Context, and Next Generation Agentic Capabilities","work_id":"008df105-2fdd-45d8-857a-8e35868aecb6","shared_citers":62},{"title":"Qwen2-VL: Enhancing Vision-Language Model's Perception of the World at Any Resolution","work_id":"8abcfe4f-e0fb-44b7-9123-448fac95f90a","shared_citers":62},{"title":"Qwen3 Technical Report","work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e","shared_citers":56},{"title":"InternVL3: Exploring Advanced Training and Test-Time Recipes for Open-Source Multimodal Models","work_id":"fe8637aa-12bc-4434-8d36-9f57b5eebcbe","shared_citers":54},{"title":"DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning","work_id":"e6b75ad5-2877-4168-97c8-710407094d20","shared_citers":51},{"title":"GPT-4 Technical Report","work_id":"b928e041-6991-4c08-8c81-0359e4097c7b","shared_citers":48},{"title":"GPT-4o System Card","work_id":"f37bf1c7-4964-4e56-9762-d20da8d9009f","shared_citers":47},{"title":"Gemini: A Family of Highly Capable Multimodal Models","work_id":"83f7c85b-3f11-450f-ac0c-64d9745220b2","shared_citers":45},{"title":"LLaVA-OneVision: Easy Visual Task Transfer","work_id":"f5f2452b-f2a9-49ac-b38d-c76e18cdfe49","shared_citers":44},{"title":"OpenAI GPT-5 System Card","work_id":"ca87689a-0d29-4476-b504-b65dbbb08af4","shared_citers":44},{"title":"Wan: Open and Advanced Large-Scale Video Generative Models","work_id":"ad3ebc3b-4224-46c9-b61d-bcf135da0a7c","shared_citers":40},{"title":"Proximal Policy Optimization Algorithms","work_id":"240c67fe-d14d-4520-91c1-38a4e272ca19","shared_citers":31},{"title":"Qwen-Image Technical Report","work_id":"d06d7ecc-7579-4f89-a60b-4278a0f3c562","shared_citers":27},{"title":"Qwen-VL: A Versatile Vision-Language Model for Understanding, Localization, Text Reading, and Beyond","work_id":"cbc2bb21-b6bb-46c0-80bf-107e195ffe10","shared_citers":26},{"title":"SigLIP 2: Multilingual Vision-Language Encoders with Improved Semantic Understanding, Localization, and Dense Features","work_id":"50eec732-2d41-432f-9dcf-ac7fff235ea5","shared_citers":25},{"title":"Expanding Performance Boundaries of Open-Source Multimodal Models with Model, Data, and Test-Time Scaling","work_id":"ee70bdc8-4656-4849-ada7-ce42a2278d70","shared_citers":24},{"title":"GLM-4.5V and GLM-4.1V-Thinking: Towards Versatile Multimodal Reasoning with Scalable Reinforcement Learning","work_id":"366607ba-e4ea-4726-98c3-63356e32351c","shared_citers":24},{"title":"OpenVLA: An Open-Source Vision-Language-Action Model","work_id":"3e7e65c5-5aed-4fe9-8414-2092bcb31cc7","shared_citers":23},{"title":"DAPO: An Open-Source LLM Reinforcement Learning System at Scale","work_id":"64019d00-0b11-4bbd-b173-b46c8fad0157","shared_citers":22},{"title":"DINOv2: Learning Robust Visual Features without Supervision","work_id":"26b304e5-b54a-4f26-be7e-83299eca52e4","shared_citers":22},{"title":"Kimi-VL Technical Report","work_id":"c876520f-8a20-44f3-b92a-bf7d35bd430f","shared_citers":22}],"time_series":[{"n":394,"year":2026}]}}