{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2024:Z4MCSTT22SONSD5RF63XZFU5KR","short_pith_number":"pith:Z4MCSTT2","canonical_record":{"source":{"id":"2501.00574","kind":"arxiv","version":4},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2024-12-31T18:01:23Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"395798686d9037a5397018f4151a520f679975b72a99a4fc23dd0edb5a7b4645","abstract_canon_sha256":"e23fd1e8d2c18894178e01e3eb53572b53f36dd63b4b9b3d47db628c7444ceac"},"schema_version":"1.0"},"canonical_sha256":"cf18294e7ad49cd90fb12fb77c969d546152e6907395541ea9180978e50975c1","source":{"kind":"arxiv","id":"2501.00574","version":4},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2501.00574","created_at":"2026-05-18T03:57:11Z"},{"alias_kind":"arxiv_version","alias_value":"2501.00574v4","created_at":"2026-05-18T03:57:11Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2501.00574","created_at":"2026-05-18T03:57:11Z"},{"alias_kind":"pith_short_12","alias_value":"Z4MCSTT22SON","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"Z4MCSTT22SONSD5R","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"Z4MCSTT2","created_at":"2026-05-18T12:33:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2024:Z4MCSTT22SONSD5RF63XZFU5KR","target":"record","payload":{"canonical_record":{"source":{"id":"2501.00574","kind":"arxiv","version":4},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2024-12-31T18:01:23Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"395798686d9037a5397018f4151a520f679975b72a99a4fc23dd0edb5a7b4645","abstract_canon_sha256":"e23fd1e8d2c18894178e01e3eb53572b53f36dd63b4b9b3d47db628c7444ceac"},"schema_version":"1.0"},"canonical_sha256":"cf18294e7ad49cd90fb12fb77c969d546152e6907395541ea9180978e50975c1","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T03:57:11.983559Z","signature_b64":"FhtiqBD0gLrc8sIVhbED62mwAeH37tI/oLJ/eJMrh+Qm+BjQGPYdTEX2Z/Xs4Vq9AWcVDW8jFifkCRCPw/h+Ag==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"cf18294e7ad49cd90fb12fb77c969d546152e6907395541ea9180978e50975c1","last_reissued_at":"2026-05-18T03:57:11.983097Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T03:57:11.983097Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2501.00574","source_version":4,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T03:57:11Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"mPD25PvUCmfX8M21Ta6V/NWYEYiUkU7YrMUK3KYjozwOS8UJql0tXd7tYi3dpl+y4i0+mx3/eRKNTHfGsnm4AQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-18T20:40:50.269787Z"},"content_sha256":"536ec9d294396f369083172ebf0252a8f5c2941c8092a62ca687231113fcd91e","schema_version":"1.0","event_id":"sha256:536ec9d294396f369083172ebf0252a8f5c2941c8092a62ca687231113fcd91e"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2024:Z4MCSTT22SONSD5RF63XZFU5KR","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"VideoChat-Flash: Hierarchical Compression for Long-Context Video Modeling","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.CV","authors_text":"Chenting Wang, Haian Huang, Jianfei Gao, Jiashuo Yu, Kunchang Li, Limin Wang, Xiangyu Zeng, Xinhao Li, Yali Wang, Yinan He, Yi Wang, Yuhan Zhu, Yu Qiao","submitted_at":"2024-12-31T18:01:23Z","abstract_excerpt":"Long-context video modeling is critical for multimodal large language models (MLLMs), enabling them to process movies, online video streams, and so on. Despite its advances, handling long videos remains challenging due to the difficulty in efficiently understanding the extremely long video context. This paper aims to address this issue from aspects of model architecture, training data, training strategy and evaluation benchmark. First, we propose a novel Hierarchical video token Compression (HiCo) method, which leverages visual redundancy in long videos to compress long video context from Clip"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2501.00574","kind":"arxiv","version":4},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T03:57:11Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Ql/VUfCVhLUp76fsW1HCLQfYJR4GMCeZRlNEqW5yEz59RUkb5luJ6FmJAwVOLBR36vMd7JudtrE1tQ/NVUgvBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-18T20:40:50.270338Z"},"content_sha256":"bdda74ecfd294b03d26dc67d6beb474d6942d0338544e003c96153f4a263d479","schema_version":"1.0","event_id":"sha256:bdda74ecfd294b03d26dc67d6beb474d6942d0338544e003c96153f4a263d479"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/Z4MCSTT22SONSD5RF63XZFU5KR/bundle.json","state_url":"https://pith.science/pith/Z4MCSTT22SONSD5RF63XZFU5KR/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/Z4MCSTT22SONSD5RF63XZFU5KR/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-18T20:40:50Z","links":{"resolver":"https://pith.science/pith/Z4MCSTT22SONSD5RF63XZFU5KR","bundle":"https://pith.science/pith/Z4MCSTT22SONSD5RF63XZFU5KR/bundle.json","state":"https://pith.science/pith/Z4MCSTT22SONSD5RF63XZFU5KR/state.json","well_known_bundle":"https://pith.science/.well-known/pith/Z4MCSTT22SONSD5RF63XZFU5KR/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2024:Z4MCSTT22SONSD5RF63XZFU5KR","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"e23fd1e8d2c18894178e01e3eb53572b53f36dd63b4b9b3d47db628c7444ceac","cross_cats_sorted":["cs.LG"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2024-12-31T18:01:23Z","title_canon_sha256":"395798686d9037a5397018f4151a520f679975b72a99a4fc23dd0edb5a7b4645"},"schema_version":"1.0","source":{"id":"2501.00574","kind":"arxiv","version":4}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2501.00574","created_at":"2026-05-18T03:57:11Z"},{"alias_kind":"arxiv_version","alias_value":"2501.00574v4","created_at":"2026-05-18T03:57:11Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2501.00574","created_at":"2026-05-18T03:57:11Z"},{"alias_kind":"pith_short_12","alias_value":"Z4MCSTT22SON","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"Z4MCSTT22SONSD5R","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"Z4MCSTT2","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:bdda74ecfd294b03d26dc67d6beb474d6942d0338544e003c96153f4a263d479","target":"graph","created_at":"2026-05-18T03:57:11Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Long-context video modeling is critical for multimodal large language models (MLLMs), enabling them to process movies, online video streams, and so on. Despite its advances, handling long videos remains challenging due to the difficulty in efficiently understanding the extremely long video context. This paper aims to address this issue from aspects of model architecture, training data, training strategy and evaluation benchmark. First, we propose a novel Hierarchical video token Compression (HiCo) method, which leverages visual redundancy in long videos to compress long video context from Clip","authors_text":"Chenting Wang, Haian Huang, Jianfei Gao, Jiashuo Yu, Kunchang Li, Limin Wang, Xiangyu Zeng, Xinhao Li, Yali Wang, Yinan He, Yi Wang, Yuhan Zhu, Yu Qiao","cross_cats":["cs.LG"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2024-12-31T18:01:23Z","title":"VideoChat-Flash: Hierarchical Compression for Long-Context Video Modeling"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2501.00574","kind":"arxiv","version":4},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:536ec9d294396f369083172ebf0252a8f5c2941c8092a62ca687231113fcd91e","target":"record","created_at":"2026-05-18T03:57:11Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"e23fd1e8d2c18894178e01e3eb53572b53f36dd63b4b9b3d47db628c7444ceac","cross_cats_sorted":["cs.LG"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2024-12-31T18:01:23Z","title_canon_sha256":"395798686d9037a5397018f4151a520f679975b72a99a4fc23dd0edb5a7b4645"},"schema_version":"1.0","source":{"id":"2501.00574","kind":"arxiv","version":4}},"canonical_sha256":"cf18294e7ad49cd90fb12fb77c969d546152e6907395541ea9180978e50975c1","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"cf18294e7ad49cd90fb12fb77c969d546152e6907395541ea9180978e50975c1","first_computed_at":"2026-05-18T03:57:11.983097Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T03:57:11.983097Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"FhtiqBD0gLrc8sIVhbED62mwAeH37tI/oLJ/eJMrh+Qm+BjQGPYdTEX2Z/Xs4Vq9AWcVDW8jFifkCRCPw/h+Ag==","signature_status":"signed_v1","signed_at":"2026-05-18T03:57:11.983559Z","signed_message":"canonical_sha256_bytes"},"source_id":"2501.00574","source_kind":"arxiv","source_version":4}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:536ec9d294396f369083172ebf0252a8f5c2941c8092a62ca687231113fcd91e","sha256:bdda74ecfd294b03d26dc67d6beb474d6942d0338544e003c96153f4a263d479"],"state_sha256":"0ca51a86a2b82f140d97e4daa49e75983098fef52ec930fd19b00c2d8d076548"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"apE0E7UE6D6i16WF+VdsTSp2OhyzYt2EtU3r5ayeRSZLv5pgbzSo1TrMIam874H0PuNlWWas++t4OCTwDPUnDw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-18T20:40:50.272946Z","bundle_sha256":"673523090d8e393c2a2066b69b58444313348f6b00844590a24dedd8c332826a"}}