{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2022:U52T37GUSXOPBDROGA4KV2MDWC","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"6dbcccf4bb7c02fbfe9928bc7b713e502af96cf6459bfa583f91f5be25e07262","cross_cats_sorted":["cs.AI"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2022-11-23T18:58:39Z","title_canon_sha256":"c6faa78873c360f2d65fa170a921710b4b4f23535ada711afe37d86bc2dc53c3"},"schema_version":"1.0","source":{"id":"2211.13221","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2211.13221","created_at":"2026-05-17T23:38:53Z"},{"alias_kind":"arxiv_version","alias_value":"2211.13221v2","created_at":"2026-05-17T23:38:53Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2211.13221","created_at":"2026-05-17T23:38:53Z"},{"alias_kind":"pith_short_12","alias_value":"U52T37GUSXOP","created_at":"2026-05-18T12:33:33Z"},{"alias_kind":"pith_short_16","alias_value":"U52T37GUSXOPBDRO","created_at":"2026-05-18T12:33:33Z"},{"alias_kind":"pith_short_8","alias_value":"U52T37GU","created_at":"2026-05-18T12:33:33Z"}],"graph_snapshots":[{"event_id":"sha256:da3660c75f9bf0099d65a6e506095f4fa3830da43ac4a4a38d80dc428a8e5564","target":"graph","created_at":"2026-05-17T23:38:53Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"we introduce lightweight video diffusion models by leveraging a low-dimensional 3D latent space, significantly outperforming previous pixel-space video diffusion models under a limited computational budget... hierarchical diffusion in the latent space such that longer videos with more than one thousand frames can be produced... conditional latent perturbation and unconditional guidance that effectively mitigate the accumulated errors during the extension of video length."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The low-dimensional 3D latent space preserves sufficient spatial-temporal detail for high-fidelity generation, and the added perturbation and guidance steps prevent error accumulation without introducing new artifacts or inconsistencies."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"Latent-space hierarchical diffusion models with targeted error-correction techniques generate realistic videos exceeding 1000 frames while using less compute than prior pixel-space approaches."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Video diffusion models shift to a low-dimensional 3D latent space to generate realistic clips longer than 1000 frames with modest compute."}],"snapshot_sha256":"414298ca8caf73e6cf6215dd13e84fccb4e6b26da7f47dfa0449112068a1dd6e"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"ec97c6e8e91c6e42965c4e19c01b8eb6256e69265d6c3a187c03456ff8c7e8fe"},"paper":{"abstract_excerpt":"AI-generated content has attracted lots of attention recently, but photo-realistic video synthesis is still challenging. Although many attempts using GANs and autoregressive models have been made in this area, the visual quality and length of generated videos are far from satisfactory. Diffusion models have shown remarkable results recently but require significant computational resources. To address this, we introduce lightweight video diffusion models by leveraging a low-dimensional 3D latent space, significantly outperforming previous pixel-space video diffusion models under a limited comput","authors_text":"Qifeng Chen, Tianyu Yang, Yingqing He, Ying Shan, Yong Zhang","cross_cats":["cs.AI"],"headline":"Video diffusion models shift to a low-dimensional 3D latent space to generate realistic clips longer than 1000 frames with modest compute.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2022-11-23T18:58:39Z","title":"Latent Video Diffusion Models for High-Fidelity Long Video Generation"},"references":{"count":48,"internal_anchors":15,"resolved_work":48,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"Large scale GAN training for high ﬁdelity natural image synthesis","work_id":"af262cdc-3bc5-47cd-8871-360f893535c0","year":2019},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"Generating long videos of dynamic scenes","work_id":"443f47a8-d87d-4758-80b8-84e5a8c0f8b4","year":2022},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"Hier- archical video generation for complex data","work_id":"70163995-e878-4b3a-a64e-14bf55c851be","year":2021},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"Diffusion models beat gans on image synthesis","work_id":"9f6d98a1-8a67-4c73-9285-5883f9f33a56","year":2021},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"Taming transformers for high-resolution image synthesis","work_id":"79ce61b1-69be-4667-80fd-eb5b40b6dcb4","year":2021}],"snapshot_sha256":"0997c4fba9953ccdfbff9df324b6b9da495f3c5957905ffa6613ee8e2beba715"},"source":{"id":"2211.13221","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-15T04:23:42.819005Z","id":"aa27081a-e0f9-485f-ada9-264af80c1216","model_set":{"reader":"grok-4.3"},"one_line_summary":"Latent-space hierarchical diffusion models with targeted error-correction techniques generate realistic videos exceeding 1000 frames while using less compute than prior pixel-space approaches.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Video diffusion models shift to a low-dimensional 3D latent space to generate realistic clips longer than 1000 frames with modest compute.","strongest_claim":"we introduce lightweight video diffusion models by leveraging a low-dimensional 3D latent space, significantly outperforming previous pixel-space video diffusion models under a limited computational budget... hierarchical diffusion in the latent space such that longer videos with more than one thousand frames can be produced... conditional latent perturbation and unconditional guidance that effectively mitigate the accumulated errors during the extension of video length.","weakest_assumption":"The low-dimensional 3D latent space preserves sufficient spatial-temporal detail for high-fidelity generation, and the added perturbation and guidance steps prevent error accumulation without introducing new artifacts or inconsistencies."}},"verdict_id":"aa27081a-e0f9-485f-ada9-264af80c1216"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:c74a87d1b527b6dfc072c4ca5ea628849817f812f12c3e2a32aa39306055f223","target":"record","created_at":"2026-05-17T23:38:53Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"6dbcccf4bb7c02fbfe9928bc7b713e502af96cf6459bfa583f91f5be25e07262","cross_cats_sorted":["cs.AI"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2022-11-23T18:58:39Z","title_canon_sha256":"c6faa78873c360f2d65fa170a921710b4b4f23535ada711afe37d86bc2dc53c3"},"schema_version":"1.0","source":{"id":"2211.13221","kind":"arxiv","version":2}},"canonical_sha256":"a7753dfcd495dcf08e2e3038aae983b0816e34f4ab1fadbd1e3ba9fe6640db33","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"a7753dfcd495dcf08e2e3038aae983b0816e34f4ab1fadbd1e3ba9fe6640db33","first_computed_at":"2026-05-17T23:38:53.534898Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:38:53.534898Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"yT6ImFDoouJrc2mgCZ2/nfIBMtYCfhCfOUECi6le5WHghj5sNarndgMaB7NaHGYGzcoT6xthMEcM0vT+hIpBAQ==","signature_status":"signed_v1","signed_at":"2026-05-17T23:38:53.535523Z","signed_message":"canonical_sha256_bytes"},"source_id":"2211.13221","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:c74a87d1b527b6dfc072c4ca5ea628849817f812f12c3e2a32aa39306055f223","sha256:da3660c75f9bf0099d65a6e506095f4fa3830da43ac4a4a38d80dc428a8e5564"],"state_sha256":"34d83948414539134f3df96875ed4e983b3819cb1a5acb70d0852af3c51d5f6d"}