{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2024:IL2GFSKZ2657G56SVJAYSIRIN6","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"8ea710614dbdefbfee1caca2d0d10902ea897a7deb524d9d333cb07dccfc7a7e","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2024-03-01T12:02:19Z","title_canon_sha256":"5c3fcd68707f7e1fe9fdbfdc44467dac5589f4b0191d98f5a0d2c1e1882aaf32"},"schema_version":"1.0","source":{"id":"2403.00476","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2403.00476","created_at":"2026-05-17T23:38:15Z"},{"alias_kind":"arxiv_version","alias_value":"2403.00476v3","created_at":"2026-05-17T23:38:15Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2403.00476","created_at":"2026-05-17T23:38:15Z"},{"alias_kind":"pith_short_12","alias_value":"IL2GFSKZ2657","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"IL2GFSKZ2657G56S","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"IL2GFSKZ","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:4bd91807a8cdc6486e2cc2210a4493a8aaf7066881345101ef8927e8453eb8ab","target":"graph","created_at":"2026-05-17T23:38:15Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Based on TempCompass, these models exhibit notably poor temporal perception ability."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That the constructed conflicting videos successfully isolate specific temporal aspects without introducing unintended biases or allowing models to exploit other cues, and that the LLM-based automatic evaluation accurately reflects model performance."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"TempCompass benchmark reveals that state-of-the-art Video LLMs have poor ability to perceive temporal aspects such as speed, direction, and ordering in videos."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Video LLMs exhibit notably poor temporal perception ability across aspects like speed and direction."}],"snapshot_sha256":"3a5dd8c75e0dc8b8d918ad27b7c2ab1ec77fef06481965d0222b403368d63d5d"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"c074fe3136d412d21fb68ac13e935fa92aac864a349e25027700ccfdc7ad3862"},"paper":{"abstract_excerpt":"Recently, there is a surge in interest surrounding video large language models (Video LLMs). However, existing benchmarks fail to provide a comprehensive feedback on the temporal perception ability of Video LLMs. On the one hand, most of them are unable to distinguish between different temporal aspects (e.g., speed, direction) and thus cannot reflect the nuanced performance on these specific aspects. On the other hand, they are limited in the diversity of task formats (e.g., only multi-choice QA), which hinders the understanding of how temporal perception performance may vary across different ","authors_text":"Lei Li, Lu Hou, Shicheng Li, Shuhuai Ren, Sishuo Chen, Xu Sun, Yi Liu, Yuanxin Liu, Yuxiang Wang","cross_cats":[],"headline":"Video LLMs exhibit notably poor temporal perception ability across aspects like speed and direction.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2024-03-01T12:02:19Z","title":"TempCompass: Do Video LLMs Really Understand Videos?"},"references":{"count":135,"internal_anchors":27,"resolved_work":135,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"LLaMA: Open and Efficient Foundation Language Models , author=. ArXiv , year=","work_id":"8a8b63b4-c22e-413d-88f5-8753fc5f8402","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"Llama 2: Open Foundation and Fine-Tuned Chat Models , author=. ArXiv , year=","work_id":"abef9ec9-cc35-48c6-968c-28f788c4162f","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"and Stoica, Ion and Xing, Eric P","work_id":"cb4b41f6-6d60-4db4-a4d1-6c5bb7899473","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"Hashimoto , year =","work_id":"59352350-df66-4d75-a005-0d0cb02e8ccf","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"Tom B. Brown and Benjamin Mann and Nick Ryder and Melanie Subbiah and Jared Kaplan and Prafulla Dhariwal and Arvind Neelakantan and Pranav Shyam and Girish Sastry and Amanda Askell and Sandhini Agarwa","work_id":"15cd97b7-6e24-48b5-b218-f433fead09cd","year":null}],"snapshot_sha256":"df3ccff164fbbda1d89bd03332bafc7f276da5e41436d1c51549a7153ca1cbb7"},"source":{"id":"2403.00476","kind":"arxiv","version":3},"verdict":{"created_at":"2026-05-17T02:40:35.903339Z","id":"94b6c738-6e1f-48d5-a67c-553b68b85c0b","model_set":{"reader":"grok-4.3"},"one_line_summary":"TempCompass benchmark reveals that state-of-the-art Video LLMs have poor ability to perceive temporal aspects such as speed, direction, and ordering in videos.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Video LLMs exhibit notably poor temporal perception ability across aspects like speed and direction.","strongest_claim":"Based on TempCompass, these models exhibit notably poor temporal perception ability.","weakest_assumption":"That the constructed conflicting videos successfully isolate specific temporal aspects without introducing unintended biases or allowing models to exploit other cues, and that the LLM-based automatic evaluation accurately reflects model performance."}},"verdict_id":"94b6c738-6e1f-48d5-a67c-553b68b85c0b"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:668fba1c5ac5a4fe6ae95de2cf0b7a1390b66344b2319292bdb9e77e2e080655","target":"record","created_at":"2026-05-17T23:38:15Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"8ea710614dbdefbfee1caca2d0d10902ea897a7deb524d9d333cb07dccfc7a7e","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2024-03-01T12:02:19Z","title_canon_sha256":"5c3fcd68707f7e1fe9fdbfdc44467dac5589f4b0191d98f5a0d2c1e1882aaf32"},"schema_version":"1.0","source":{"id":"2403.00476","kind":"arxiv","version":3}},"canonical_sha256":"42f462c959d7bbf377d2aa418922286f88f1771da0803e0dec95e98189ceb55c","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"42f462c959d7bbf377d2aa418922286f88f1771da0803e0dec95e98189ceb55c","first_computed_at":"2026-05-17T23:38:15.358530Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:38:15.358530Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"/7vFKF9S1zRNxeci0VwOKoTq/mXN9WuoWuZnOR/GHjN/t/13rq5J0zpGpt+QBOglzdDSqUtG71V72f6ua0eEAw==","signature_status":"signed_v1","signed_at":"2026-05-17T23:38:15.359178Z","signed_message":"canonical_sha256_bytes"},"source_id":"2403.00476","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:668fba1c5ac5a4fe6ae95de2cf0b7a1390b66344b2319292bdb9e77e2e080655","sha256:4bd91807a8cdc6486e2cc2210a4493a8aaf7066881345101ef8927e8453eb8ab"],"state_sha256":"ef3ca962f3e486ad7c916e24fce4ca68e51afe9973ea1513ba51fa86f41fcd62"}