{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:YEK2EB3UZILCNPFUT75SKOASPQ","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"e26b739eaab3a336fdbef524f8c1175a46f3aac3cfe83c64a58b41e5c4e16c7a","cross_cats_sorted":["cs.CL"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2025-01-23T16:51:47Z","title_canon_sha256":"d04a3d1b3579b429fd81cd2b06c42dc5c53e786c742b441ef725394c06e527a3"},"schema_version":"1.0","source":{"id":"2501.13826","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2501.13826","created_at":"2026-05-18T03:19:23Z"},{"alias_kind":"arxiv_version","alias_value":"2501.13826v1","created_at":"2026-05-18T03:19:23Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2501.13826","created_at":"2026-05-18T03:19:23Z"},{"alias_kind":"pith_short_12","alias_value":"YEK2EB3UZILC","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"YEK2EB3UZILCNPFU","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"YEK2EB3U","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:0572df769c0e811382c23794f957e72c079f4c90c354396bd4a69838d28508ba","target":"graph","created_at":"2026-05-18T03:19:23Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Evaluation of LMMs reveals a steep decline in performance as cognitive demands increase and highlights a significant gap between human and model knowledge acquisition, underscoring the need for methods to enhance LMMs' capability to learn and adapt from videos."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That the 300 videos and 900 human-annotated questions accurately and unbiasedly capture the three cognitive stages of knowledge acquisition without selection or annotation artifacts affecting the measured gaps."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"Video-MMMU benchmark shows large multimodal models exhibit steep performance drops on higher cognitive tasks when learning from professional videos and lag significantly behind humans in knowledge acquisition."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Video-MMMU benchmark shows large multimodal models decline sharply in performance as video tasks require more cognitive adaptation."}],"snapshot_sha256":"a19b6db4967651cc8c9efef686b380bf0ced55a794e317c72d1007133b445769"},"formal_canon":{"evidence_count":1,"snapshot_sha256":"a2c7abe510327495ce8d16c5310ff305c8b3b59ccdd3bce6704668fcfcdc9d17"},"paper":{"abstract_excerpt":"Humans acquire knowledge through three cognitive stages: perceiving information, comprehending knowledge, and adapting knowledge to solve novel problems. Videos serve as an effective medium for this learning process, facilitating a progression through these cognitive stages. However, existing video benchmarks fail to systematically evaluate the knowledge acquisition capabilities in Large Multimodal Models (LMMs). To address this gap, we introduce Video-MMMU, a multi-modal, multi-disciplinary benchmark designed to assess LMMs' ability to acquire and utilize knowledge from videos. Video-MMMU fea","authors_text":"Bo Li, Fanyi Pu, Kairui Hu, Penghao Wu, Wang Xiao, Xiang Yue, Yuanhan Zhang, Ziwei Liu","cross_cats":["cs.CL"],"headline":"Video-MMMU benchmark shows large multimodal models decline sharply in performance as video tasks require more cognitive adaptation.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2025-01-23T16:51:47Z","title":"Video-MMMU: Evaluating Knowledge Acquisition from Multi-Discipline Professional Videos"},"references":{"count":62,"internal_anchors":14,"resolved_work":62,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"Anthropic. Claude Team. Introducing Claude 3.5 Sonnet. https://www.anthropic.com/claude/sonnet ,","work_id":"440e81fa-cd1f-404b-8d37-c4d8c28910e4","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"A systematic classification of knowl- edge, reasoning, and context within the ARC dataset","work_id":"f14b7cda-005a-43d5-8a6f-8cfeaeac4480","year":2018},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"Temporalbench: Towards fine-grained temporal understanding for multimodal video models","work_id":"0f787776-4151-41f7-8d50-6174eb1340d3","year":2024},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"Auroracap: Efficient, performant video detailed captioning and a new benchmark","work_id":"3c75ba15-49f8-4ea3-87a8-6c357f825176","year":2024},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"Autoeval-video: An automatic benchmark for assessing large vision language models in open-ended video question answer- ing","work_id":"a448c544-28f6-42bc-a57f-0f0e879ea3b2","year":2023}],"snapshot_sha256":"2005f98eaf47fc1226002149ae12f04ca5952540dcbcdf5e76223f3363541d2e"},"source":{"id":"2501.13826","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-14T00:27:45.966463Z","id":"9beedc01-02fe-4c6d-96d7-57f99f0ea6c7","model_set":{"reader":"grok-4.3"},"one_line_summary":"Video-MMMU benchmark shows large multimodal models exhibit steep performance drops on higher cognitive tasks when learning from professional videos and lag significantly behind humans in knowledge acquisition.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Video-MMMU benchmark shows large multimodal models decline sharply in performance as video tasks require more cognitive adaptation.","strongest_claim":"Evaluation of LMMs reveals a steep decline in performance as cognitive demands increase and highlights a significant gap between human and model knowledge acquisition, underscoring the need for methods to enhance LMMs' capability to learn and adapt from videos.","weakest_assumption":"That the 300 videos and 900 human-annotated questions accurately and unbiasedly capture the three cognitive stages of knowledge acquisition without selection or annotation artifacts affecting the measured gaps."}},"verdict_id":"9beedc01-02fe-4c6d-96d7-57f99f0ea6c7"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:510a4fbbad197f2686de0f5e35c8e3c9c3b2c1e04d5767bf746a1d83ddb69f6c","target":"record","created_at":"2026-05-18T03:19:23Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"e26b739eaab3a336fdbef524f8c1175a46f3aac3cfe83c64a58b41e5c4e16c7a","cross_cats_sorted":["cs.CL"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2025-01-23T16:51:47Z","title_canon_sha256":"d04a3d1b3579b429fd81cd2b06c42dc5c53e786c742b441ef725394c06e527a3"},"schema_version":"1.0","source":{"id":"2501.13826","kind":"arxiv","version":1}},"canonical_sha256":"c115a20774ca1626bcb49ffb2538127c01322595c20ebd0fdb3baf0c12ded52d","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"c115a20774ca1626bcb49ffb2538127c01322595c20ebd0fdb3baf0c12ded52d","first_computed_at":"2026-05-18T03:19:23.485360Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T03:19:23.485360Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"fzRjTtg2DAHONF5N0v2tha90jX2a1qjgvWuprSiYRAINJIzZSneCLX869nRay7EXsA+9RQrobqkeliioRAVRAw==","signature_status":"signed_v1","signed_at":"2026-05-18T03:19:23.485951Z","signed_message":"canonical_sha256_bytes"},"source_id":"2501.13826","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:510a4fbbad197f2686de0f5e35c8e3c9c3b2c1e04d5767bf746a1d83ddb69f6c","sha256:0572df769c0e811382c23794f957e72c079f4c90c354396bd4a69838d28508ba"],"state_sha256":"13bf7924ec06d9bdfc455917049bd02fe9ad18501fc2b3328c2fe7af9b0c4ebc"}