{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:XGXUEWFMHNYXH6JJ7DNSCM7E6H","short_pith_number":"pith:XGXUEWFM","schema_version":"1.0","canonical_sha256":"b9af4258ac3b7173f929f8db2133e4f1dec457f50218d8fe8d85c4b4ff502ede","source":{"kind":"arxiv","id":"2606.10651","version":1},"attestation_state":"computed","paper":{"title":"Kwai Keye-VL-2.0 Technical Report","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Bin Wen, Changyi Liu, Chengru Song, Chongling Rao, Chuan Yi, Fan Yang, Feng Han, Guowang Zhang, Haixuan Gao, Hang Li, Han Li, Haonan Fan, Haonan Jia, Hengrui Ju, Jiankang Chen, Jiapeng Chen, Jiawei Yuan, Jinghui Jia, Jing Wang, Junmin Chen, Junyu Shi, Kaixuan Yang, Kaiyu Jiang, Kun Gai, Kwai Keye Team, Lele Yang, Lingzhi Zhou, Mingqiao Liu, Muxi Diao, Na Nie, Qile Su, Qi Zhang, Ruilin Zhang, Sen Na, Tianke Zhang, Tianming Liang, Tingting Gao, Wei Chen, Weixin Xu, Wentao Hong, Xiaoxiao Ma, Xingyu Lu, Xuanyu Zheng, Yancheng Long, Yang Tian, Yankai Yang, Yingxin Li, Yiyang Fan, Yufei Han, Yulong Chen, Yu Xia, Yuzhe Chen, Ziliang Lai","submitted_at":"2026-06-09T09:58:08Z","abstract_excerpt":"We introduce Kwai Keye-VL-2.0-30B-A3B, an open-source Mixture-of-Experts (MoE) multimodal foundation model designed to advance long-video understanding and agentic intelligence. To address the challenges of ultra-long contexts, information redundancy, and prohibitive computational costs inherent in hour-level videos, Keye-VL-2.0 is the first to adapt DeepSeek Sparse Attention (DSA) to GQA-based multimodal architectures, enabling lossless 256K context processing while capturing critical frames and long-range temporal dependencies. This architecture is underpinned by a highly optimized training "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.10651","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-09T09:58:08Z","cross_cats_sorted":[],"title_canon_sha256":"c1d2fd56b876fc5499ca53866fd7009aa37c41ca526a4b3556968ffd560c1b42","abstract_canon_sha256":"405cbd1d7a30b9e0396410a3288a54730ef42aa9856b37973bb96f743dfaf9e1"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-10T01:10:32.382634Z","signature_b64":"DHJe80PqUXCMWAgIzNrqrVQlb3DPmTp++0CQDak/6lDY1+1bwyvFn1nOFQUlhQhYWE1kCyejOs26UQ/2po2kAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"b9af4258ac3b7173f929f8db2133e4f1dec457f50218d8fe8d85c4b4ff502ede","last_reissued_at":"2026-06-10T01:10:32.381715Z","signature_status":"signed_v1","first_computed_at":"2026-06-10T01:10:32.381715Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Kwai Keye-VL-2.0 Technical Report","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Bin Wen, Changyi Liu, Chengru Song, Chongling Rao, Chuan Yi, Fan Yang, Feng Han, Guowang Zhang, Haixuan Gao, Hang Li, Han Li, Haonan Fan, Haonan Jia, Hengrui Ju, Jiankang Chen, Jiapeng Chen, Jiawei Yuan, Jinghui Jia, Jing Wang, Junmin Chen, Junyu Shi, Kaixuan Yang, Kaiyu Jiang, Kun Gai, Kwai Keye Team, Lele Yang, Lingzhi Zhou, Mingqiao Liu, Muxi Diao, Na Nie, Qile Su, Qi Zhang, Ruilin Zhang, Sen Na, Tianke Zhang, Tianming Liang, Tingting Gao, Wei Chen, Weixin Xu, Wentao Hong, Xiaoxiao Ma, Xingyu Lu, Xuanyu Zheng, Yancheng Long, Yang Tian, Yankai Yang, Yingxin Li, Yiyang Fan, Yufei Han, Yulong Chen, Yu Xia, Yuzhe Chen, Ziliang Lai","submitted_at":"2026-06-09T09:58:08Z","abstract_excerpt":"We introduce Kwai Keye-VL-2.0-30B-A3B, an open-source Mixture-of-Experts (MoE) multimodal foundation model designed to advance long-video understanding and agentic intelligence. To address the challenges of ultra-long contexts, information redundancy, and prohibitive computational costs inherent in hour-level videos, Keye-VL-2.0 is the first to adapt DeepSeek Sparse Attention (DSA) to GQA-based multimodal architectures, enabling lossless 256K context processing while capturing critical frames and long-range temporal dependencies. This architecture is underpinned by a highly optimized training "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.10651","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.10651/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.10651","created_at":"2026-06-10T01:10:32.381880+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.10651v1","created_at":"2026-06-10T01:10:32.381880+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.10651","created_at":"2026-06-10T01:10:32.381880+00:00"},{"alias_kind":"pith_short_12","alias_value":"XGXUEWFMHNYX","created_at":"2026-06-10T01:10:32.381880+00:00"},{"alias_kind":"pith_short_16","alias_value":"XGXUEWFMHNYXH6JJ","created_at":"2026-06-10T01:10:32.381880+00:00"},{"alias_kind":"pith_short_8","alias_value":"XGXUEWFM","created_at":"2026-06-10T01:10:32.381880+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/XGXUEWFMHNYXH6JJ7DNSCM7E6H","json":"https://pith.science/pith/XGXUEWFMHNYXH6JJ7DNSCM7E6H.json","graph_json":"https://pith.science/api/pith-number/XGXUEWFMHNYXH6JJ7DNSCM7E6H/graph.json","events_json":"https://pith.science/api/pith-number/XGXUEWFMHNYXH6JJ7DNSCM7E6H/events.json","paper":"https://pith.science/paper/XGXUEWFM"},"agent_actions":{"view_html":"https://pith.science/pith/XGXUEWFMHNYXH6JJ7DNSCM7E6H","download_json":"https://pith.science/pith/XGXUEWFMHNYXH6JJ7DNSCM7E6H.json","view_paper":"https://pith.science/paper/XGXUEWFM","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.10651&json=true","fetch_graph":"https://pith.science/api/pith-number/XGXUEWFMHNYXH6JJ7DNSCM7E6H/graph.json","fetch_events":"https://pith.science/api/pith-number/XGXUEWFMHNYXH6JJ7DNSCM7E6H/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/XGXUEWFMHNYXH6JJ7DNSCM7E6H/action/timestamp_anchor","attest_storage":"https://pith.science/pith/XGXUEWFMHNYXH6JJ7DNSCM7E6H/action/storage_attestation","attest_author":"https://pith.science/pith/XGXUEWFMHNYXH6JJ7DNSCM7E6H/action/author_attestation","sign_citation":"https://pith.science/pith/XGXUEWFMHNYXH6JJ7DNSCM7E6H/action/citation_signature","submit_replication":"https://pith.science/pith/XGXUEWFMHNYXH6JJ7DNSCM7E6H/action/replication_record"}},"created_at":"2026-06-10T01:10:32.381880+00:00","updated_at":"2026-06-10T01:10:32.381880+00:00"}