{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2024:QUBC5UMTDPBC4QFEYWLPFN5E77","short_pith_number":"pith:QUBC5UMT","canonical_record":{"source":{"id":"2406.02509","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2024-06-04T17:27:19Z","cross_cats_sorted":[],"title_canon_sha256":"c358ab54f299c894e2ff6b254dd0f9df8d91edd137583060629b740233c5d527","abstract_canon_sha256":"271e94179800efbd83c4c50a7553d59c8d5f0b355ff9be3032dc979f02d0c972"},"schema_version":"1.0"},"canonical_sha256":"85022ed1931bc22e40a4c596f2b7a4ffccf4e8ac3491af192700fa19049f38e4","source":{"kind":"arxiv","id":"2406.02509","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2406.02509","created_at":"2026-05-17T23:38:46Z"},{"alias_kind":"arxiv_version","alias_value":"2406.02509v1","created_at":"2026-05-17T23:38:46Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2406.02509","created_at":"2026-05-17T23:38:46Z"},{"alias_kind":"pith_short_12","alias_value":"QUBC5UMTDPBC","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"QUBC5UMTDPBC4QFE","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"QUBC5UMT","created_at":"2026-05-18T12:33:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2024:QUBC5UMTDPBC4QFEYWLPFN5E77","target":"record","payload":{"canonical_record":{"source":{"id":"2406.02509","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2024-06-04T17:27:19Z","cross_cats_sorted":[],"title_canon_sha256":"c358ab54f299c894e2ff6b254dd0f9df8d91edd137583060629b740233c5d527","abstract_canon_sha256":"271e94179800efbd83c4c50a7553d59c8d5f0b355ff9be3032dc979f02d0c972"},"schema_version":"1.0"},"canonical_sha256":"85022ed1931bc22e40a4c596f2b7a4ffccf4e8ac3491af192700fa19049f38e4","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:38:46.805598Z","signature_b64":"kfxfivGlFQcPvaqoQyt6aEegjVeOuP+Paj5Up9zdFPNykFc21XVjLR9t8gcSd8mojbV0LoabN5uAoeuptHMCDw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"85022ed1931bc22e40a4c596f2b7a4ffccf4e8ac3491af192700fa19049f38e4","last_reissued_at":"2026-05-17T23:38:46.805055Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:38:46.805055Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2406.02509","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:46Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"SGxMIMFIhEvenc1xwK+j27AoutFzjWhbS7PiPY6RD1dxSXhsEuwKudQD+zooUEC3mLvdsxx9z/14oIMZ9MA6Bw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T09:38:45.088946Z"},"content_sha256":"12c6d94b54810955f70f8410bc5c746d55f0ec7693dbd081dc7968e6aa01e004","schema_version":"1.0","event_id":"sha256:12c6d94b54810955f70f8410bc5c746d55f0ec7693dbd081dc7968e6aa01e004"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2024:QUBC5UMTDPBC4QFEYWLPFN5E77","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"CamCo: Camera-Controllable 3D-Consistent Image-to-Video Generation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"CamCo adds precise camera pose control to image-to-video generation while enforcing 3D consistency across frames.","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Arash Vahdat, Chao Liu, Dejia Xu, Jan Kautz, Sifei Liu, Weili Nie, Zhangyang Wang","submitted_at":"2024-06-04T17:27:19Z","abstract_excerpt":"Recently video diffusion models have emerged as expressive generative tools for high-quality video content creation readily available to general users. However, these models often do not offer precise control over camera poses for video generation, limiting the expression of cinematic language and user control. To address this issue, we introduce CamCo, which allows fine-grained Camera pose Control for image-to-video generation. We equip a pre-trained image-to-video generator with accurately parameterized camera pose input using Pl\\\"ucker coordinates. To enhance 3D consistency in the videos pr"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Our experiments show that CamCo significantly improves 3D consistency and camera control capabilities compared to previous models while effectively generating plausible object motion.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That the epipolar attention module will enforce geometric consistency across frames without introducing new artifacts or reducing visual quality, and that fine-tuning on SfM-estimated poses from real videos will transfer to user-specified camera trajectories at inference time.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"CamCo equips image-to-video generators with Plücker-coordinate camera inputs and epipolar attention to improve 3D consistency and camera controllability.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"CamCo adds precise camera pose control to image-to-video generation while enforcing 3D consistency across frames.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"9e355c76cda6241c7eb682f09765b59b0c155b6c8255ac13465cf99d76d33f7e"},"source":{"id":"2406.02509","kind":"arxiv","version":1},"verdict":{"id":"6c9be686-5027-41cb-9fcf-9c3222dad798","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-16T19:39:57.771538Z","strongest_claim":"Our experiments show that CamCo significantly improves 3D consistency and camera control capabilities compared to previous models while effectively generating plausible object motion.","one_line_summary":"CamCo equips image-to-video generators with Plücker-coordinate camera inputs and epipolar attention to improve 3D consistency and camera controllability.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That the epipolar attention module will enforce geometric consistency across frames without introducing new artifacts or reducing visual quality, and that fine-tuning on SfM-estimated poses from real videos will transfer to user-specified camera trajectories at inference time.","pith_extraction_headline":"CamCo adds precise camera pose control to image-to-video generation while enforcing 3D consistency across frames."},"references":{"count":63,"sample":[{"doi":"","year":2023,"title":"Stable video diffusion: Scaling latent video diffu- sion models to large datasets","work_id":"44aec313-7388-4097-b049-fb3888e3f072","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2021,"title":"Frozen in time: A joint video and image encoder for end-to-end retrieval","work_id":"2a136f10-92cd-4a8d-96ba-7aa9ab74f8d3","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2023,"title":"Improving image generation with better captions","work_id":"aa4e9e1a-4c37-468d-bdb4-412819771b5e","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2023,"title":"Align your latents: High-resolution video synthesis with latent diffusion models","work_id":"d4d6346d-ffdc-4dd2-a00c-f7409d76344d","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2024,"title":"Video generation models as world simulators","work_id":"d274a5e4-174c-445f-b42d-b4f55cdfb2f5","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":63,"snapshot_sha256":"1a4235ee2b6c90abb42075b49951c16dd440363dccd01fbec15dab03f20b7d68","internal_anchors":17},"formal_canon":{"evidence_count":2,"snapshot_sha256":"d7c36e7bd89d193282ea8b5bbc01bf4e3e69bdca6f4006b6d4a122be458b3fb2"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"6c9be686-5027-41cb-9fcf-9c3222dad798"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:46Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"FIhQ3SY6CcQnH0rvQwJrkKi3ZXwmZ8ao8r/IF4aLQbOELQ1N3fntjKPqiaVR3w4YS3HGVKCZP8jDXUuby/vICg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T09:38:45.090093Z"},"content_sha256":"173884ef62863b62d0da8a51c871ad4596c21f0386d22bec8b1308604d9bba06","schema_version":"1.0","event_id":"sha256:173884ef62863b62d0da8a51c871ad4596c21f0386d22bec8b1308604d9bba06"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/QUBC5UMTDPBC4QFEYWLPFN5E77/bundle.json","state_url":"https://pith.science/pith/QUBC5UMTDPBC4QFEYWLPFN5E77/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/QUBC5UMTDPBC4QFEYWLPFN5E77/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-31T09:38:45Z","links":{"resolver":"https://pith.science/pith/QUBC5UMTDPBC4QFEYWLPFN5E77","bundle":"https://pith.science/pith/QUBC5UMTDPBC4QFEYWLPFN5E77/bundle.json","state":"https://pith.science/pith/QUBC5UMTDPBC4QFEYWLPFN5E77/state.json","well_known_bundle":"https://pith.science/.well-known/pith/QUBC5UMTDPBC4QFEYWLPFN5E77/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2024:QUBC5UMTDPBC4QFEYWLPFN5E77","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"271e94179800efbd83c4c50a7553d59c8d5f0b355ff9be3032dc979f02d0c972","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2024-06-04T17:27:19Z","title_canon_sha256":"c358ab54f299c894e2ff6b254dd0f9df8d91edd137583060629b740233c5d527"},"schema_version":"1.0","source":{"id":"2406.02509","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2406.02509","created_at":"2026-05-17T23:38:46Z"},{"alias_kind":"arxiv_version","alias_value":"2406.02509v1","created_at":"2026-05-17T23:38:46Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2406.02509","created_at":"2026-05-17T23:38:46Z"},{"alias_kind":"pith_short_12","alias_value":"QUBC5UMTDPBC","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"QUBC5UMTDPBC4QFE","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"QUBC5UMT","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:173884ef62863b62d0da8a51c871ad4596c21f0386d22bec8b1308604d9bba06","target":"graph","created_at":"2026-05-17T23:38:46Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Our experiments show that CamCo significantly improves 3D consistency and camera control capabilities compared to previous models while effectively generating plausible object motion."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That the epipolar attention module will enforce geometric consistency across frames without introducing new artifacts or reducing visual quality, and that fine-tuning on SfM-estimated poses from real videos will transfer to user-specified camera trajectories at inference time."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"CamCo equips image-to-video generators with Plücker-coordinate camera inputs and epipolar attention to improve 3D consistency and camera controllability."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"CamCo adds precise camera pose control to image-to-video generation while enforcing 3D consistency across frames."}],"snapshot_sha256":"9e355c76cda6241c7eb682f09765b59b0c155b6c8255ac13465cf99d76d33f7e"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"d7c36e7bd89d193282ea8b5bbc01bf4e3e69bdca6f4006b6d4a122be458b3fb2"},"paper":{"abstract_excerpt":"Recently video diffusion models have emerged as expressive generative tools for high-quality video content creation readily available to general users. However, these models often do not offer precise control over camera poses for video generation, limiting the expression of cinematic language and user control. To address this issue, we introduce CamCo, which allows fine-grained Camera pose Control for image-to-video generation. We equip a pre-trained image-to-video generator with accurately parameterized camera pose input using Pl\\\"ucker coordinates. To enhance 3D consistency in the videos pr","authors_text":"Arash Vahdat, Chao Liu, Dejia Xu, Jan Kautz, Sifei Liu, Weili Nie, Zhangyang Wang","cross_cats":[],"headline":"CamCo adds precise camera pose control to image-to-video generation while enforcing 3D consistency across frames.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2024-06-04T17:27:19Z","title":"CamCo: Camera-Controllable 3D-Consistent Image-to-Video Generation"},"references":{"count":63,"internal_anchors":17,"resolved_work":63,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"Stable video diffusion: Scaling latent video diffu- sion models to large datasets","work_id":"44aec313-7388-4097-b049-fb3888e3f072","year":2023},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"Frozen in time: A joint video and image encoder for end-to-end retrieval","work_id":"2a136f10-92cd-4a8d-96ba-7aa9ab74f8d3","year":2021},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"Improving image generation with better captions","work_id":"aa4e9e1a-4c37-468d-bdb4-412819771b5e","year":2023},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"Align your latents: High-resolution video synthesis with latent diffusion models","work_id":"d4d6346d-ffdc-4dd2-a00c-f7409d76344d","year":2023},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"Video generation models as world simulators","work_id":"d274a5e4-174c-445f-b42d-b4f55cdfb2f5","year":2024}],"snapshot_sha256":"1a4235ee2b6c90abb42075b49951c16dd440363dccd01fbec15dab03f20b7d68"},"source":{"id":"2406.02509","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-16T19:39:57.771538Z","id":"6c9be686-5027-41cb-9fcf-9c3222dad798","model_set":{"reader":"grok-4.3"},"one_line_summary":"CamCo equips image-to-video generators with Plücker-coordinate camera inputs and epipolar attention to improve 3D consistency and camera controllability.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"CamCo adds precise camera pose control to image-to-video generation while enforcing 3D consistency across frames.","strongest_claim":"Our experiments show that CamCo significantly improves 3D consistency and camera control capabilities compared to previous models while effectively generating plausible object motion.","weakest_assumption":"That the epipolar attention module will enforce geometric consistency across frames without introducing new artifacts or reducing visual quality, and that fine-tuning on SfM-estimated poses from real videos will transfer to user-specified camera trajectories at inference time."}},"verdict_id":"6c9be686-5027-41cb-9fcf-9c3222dad798"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:12c6d94b54810955f70f8410bc5c746d55f0ec7693dbd081dc7968e6aa01e004","target":"record","created_at":"2026-05-17T23:38:46Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"271e94179800efbd83c4c50a7553d59c8d5f0b355ff9be3032dc979f02d0c972","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2024-06-04T17:27:19Z","title_canon_sha256":"c358ab54f299c894e2ff6b254dd0f9df8d91edd137583060629b740233c5d527"},"schema_version":"1.0","source":{"id":"2406.02509","kind":"arxiv","version":1}},"canonical_sha256":"85022ed1931bc22e40a4c596f2b7a4ffccf4e8ac3491af192700fa19049f38e4","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"85022ed1931bc22e40a4c596f2b7a4ffccf4e8ac3491af192700fa19049f38e4","first_computed_at":"2026-05-17T23:38:46.805055Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:38:46.805055Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"kfxfivGlFQcPvaqoQyt6aEegjVeOuP+Paj5Up9zdFPNykFc21XVjLR9t8gcSd8mojbV0LoabN5uAoeuptHMCDw==","signature_status":"signed_v1","signed_at":"2026-05-17T23:38:46.805598Z","signed_message":"canonical_sha256_bytes"},"source_id":"2406.02509","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:12c6d94b54810955f70f8410bc5c746d55f0ec7693dbd081dc7968e6aa01e004","sha256:173884ef62863b62d0da8a51c871ad4596c21f0386d22bec8b1308604d9bba06"],"state_sha256":"e3be290853df0a4462a7147f1f49c798320d34cac3905a6e6769c6df19a9251b"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"4bunxyqMyc6aKgKTTUARfisrBUEr2Op4lXWSPe6RpCEQBL60iWfqGoeDFebMDkqM2mOOuz5dlLH8MAo60njJBw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-31T09:38:45.095028Z","bundle_sha256":"8243c5f504ec429754a3872cbd45cbc3ed003d59dee6ea2754fb942f512161f1"}}