{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:LTC5F6X7XFS5JRME2HUOBX4ZUS","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"1292eeb7b544d00e497f524c966f0595d82d9986f7c40778ae387cc1bf65999d","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2025-03-10T17:57:04Z","title_canon_sha256":"2c7c560f4cbfdf87ccdb24d7102a49e1f8018124aba127510f276ea072866958"},"schema_version":"1.0","source":{"id":"2503.07598","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2503.07598","created_at":"2026-05-17T23:38:49Z"},{"alias_kind":"arxiv_version","alias_value":"2503.07598v2","created_at":"2026-05-17T23:38:49Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2503.07598","created_at":"2026-05-17T23:38:49Z"},{"alias_kind":"pith_short_12","alias_value":"LTC5F6X7XFS5","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"LTC5F6X7XFS5JRME","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"LTC5F6X7","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:73da13fc20acdc8ab94781286701f86c2370fafbb248b0bf03c53766910073bb","target":"graph","created_at":"2026-05-17T23:38:49Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"the unified model of VACE achieves performance on par with task-specific models across various subtasks. Simultaneously, it enables diverse applications through versatile task combinations."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"that the Video Condition Unit and Context Adapter can integrate the requirements of reference-to-video, video-to-video, and masked editing tasks into a single model without performance degradation relative to specialized systems."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"VACE unifies reference-to-video generation, video-to-video editing, and masked video-to-video editing in one Diffusion Transformer framework using a Video Condition Unit for inputs and a Context Adapter for task injection."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"VACE unifies reference-to-video generation, video-to-video editing, and masked editing in one diffusion transformer model."}],"snapshot_sha256":"5e3b2b50af3cebcfa67cc8e46fd7a55ede1bb1bbd0e72032f321f1675e98ff6b"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"98ac4e276ab47b3b674121c6770f7e8103a6978261a6811e24065b771702275b"},"paper":{"abstract_excerpt":"Diffusion Transformer has demonstrated powerful capability and scalability in generating high-quality images and videos. Further pursuing the unification of generation and editing tasks has yielded significant progress in the domain of image content creation. However, due to the intrinsic demands for consistency across both temporal and spatial dynamics, achieving a unified approach for video synthesis remains challenging. We introduce VACE, which enables users to perform Video tasks within an All-in-one framework for Creation and Editing. These tasks include reference-to-video generation, vid","authors_text":"Chaojie Mao, Jingfeng Zhang, Yulin Pan, Yu Liu, Zeyinzi Jiang, Zhen Han","cross_cats":[],"headline":"VACE unifies reference-to-video generation, video-to-video editing, and masked editing in one diffusion transformer model.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2025-03-10T17:57:04Z","title":"VACE: All-in-One Video Creation and Editing"},"references":{"count":81,"internal_anchors":6,"resolved_work":81,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"KLING AI, https://klingai.com/ ,","work_id":"02be1107-121c-4dda-a749-df32de2a34b2","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"Stable Diffusion v1.5 Model Card, https://huggingface.co/runwayml/stable- diffusion-v1-5, 2022","work_id":"c7f856b2-cfe1-4a71-9cd5-529683c6a523","year":2022},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"Stable Diffusion Inpainting Model Card, https://huggingface.co/runwayml/stable- diffusion-inpainting, 2022","work_id":"a4296892-521a-43db-8142-7df676cf710e","year":2022},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"Tim Brooks, Aleksander Holynski, and Alexei A. Efros. In- structPix2Pix: Learning To Follow Image Editing Instruc- tions. In IEEE Conf. Comput. Vis. Pattern Recog. , pages 18392–18402, 2023. 2, 3","work_id":"57163bb1-5d6a-4e2b-890e-3645454dcb28","year":2023},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"OpenPose: Realtime Multi-Person 2D Pose Estimation Using Part Affinity Fields","work_id":"61b0f343-0d3d-45fe-bcff-672c64331b9c","year":2021}],"snapshot_sha256":"190b06ccb06772df870e978cb4e06f92ed0225762e31f3078d155cf6dedf7197"},"source":{"id":"2503.07598","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-16T00:48:33.590926Z","id":"dcf63541-932c-414f-96b3-cf9a5d1d958f","model_set":{"reader":"grok-4.3"},"one_line_summary":"VACE unifies reference-to-video generation, video-to-video editing, and masked video-to-video editing in one Diffusion Transformer framework using a Video Condition Unit for inputs and a Context Adapter for task injection.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"VACE unifies reference-to-video generation, video-to-video editing, and masked editing in one diffusion transformer model.","strongest_claim":"the unified model of VACE achieves performance on par with task-specific models across various subtasks. Simultaneously, it enables diverse applications through versatile task combinations.","weakest_assumption":"that the Video Condition Unit and Context Adapter can integrate the requirements of reference-to-video, video-to-video, and masked editing tasks into a single model without performance degradation relative to specialized systems."}},"verdict_id":"dcf63541-932c-414f-96b3-cf9a5d1d958f"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:a044efb69c5e46895505e4ec866e149ef80abf4ed7654bd0f5da7343d9beee28","target":"record","created_at":"2026-05-17T23:38:49Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"1292eeb7b544d00e497f524c966f0595d82d9986f7c40778ae387cc1bf65999d","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2025-03-10T17:57:04Z","title_canon_sha256":"2c7c560f4cbfdf87ccdb24d7102a49e1f8018124aba127510f276ea072866958"},"schema_version":"1.0","source":{"id":"2503.07598","kind":"arxiv","version":2}},"canonical_sha256":"5cc5d2faffb965d4c584d1e8e0df99a4a10d66fda744db74f2cbbc260888bda5","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"5cc5d2faffb965d4c584d1e8e0df99a4a10d66fda744db74f2cbbc260888bda5","first_computed_at":"2026-05-17T23:38:49.529939Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:38:49.529939Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"CidutFwk9E8Gw6ecMtZ1NStuTcwt8QSyfkCUArqrgLZ3mYisH2TZfiys0FiZgQ6uHXNPxsATYaC8HDMLr5HtCQ==","signature_status":"signed_v1","signed_at":"2026-05-17T23:38:49.530496Z","signed_message":"canonical_sha256_bytes"},"source_id":"2503.07598","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:a044efb69c5e46895505e4ec866e149ef80abf4ed7654bd0f5da7343d9beee28","sha256:73da13fc20acdc8ab94781286701f86c2370fafbb248b0bf03c53766910073bb"],"state_sha256":"d7f0f4ca875481f342ef05d7122da72a8fc427971daa7d5e94e63b0dffa7ae89"}