{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:ZS7YHV76TZS6UHVYD7VM2PWQ2J","short_pith_number":"pith:ZS7YHV76","schema_version":"1.0","canonical_sha256":"ccbf83d7fe9e65ea1eb81feacd3ed0d2658abeb1a85bbd67c701d6abf61ac7ee","source":{"kind":"arxiv","id":"2606.11180","version":1},"attestation_state":"computed","paper":{"title":"Lip Forcing: Few-Step Autoregressive Diffusion for Real-time Lip Synchronization","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"(2) AIPARK), Chulmin Park (2), Heeseong Shin (1), Jinhyuk Jang (1), Joungbin Lee (1), Jung Yi (1), Paul Hyunbin Cho (1), SeokYoung Lee (1), Seungryong Kim (1) ((1) KAIST AI, Siyoon Jin (1), Yunjin Park (2)","submitted_at":"2026-06-09T17:56:36Z","abstract_excerpt":"Diffusion-based lip synchronization models achieve strong visual quality and audio-visual alignment, but full-sequence bidirectional attention and many denoising steps make them impractical for real-time inference. We present Lip Forcing, to our knowledge the first autoregressive diffusion method for video-to-video (V2V) lip synchronization, which distills a 14B audio-conditioned bidirectional video diffusion teacher into causal students. At inference, the students generate each chunk in only two denoising steps without inference-time CFG, enabling real-time lip synchronization. A lip-sync-spe"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.11180","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-09T17:56:36Z","cross_cats_sorted":[],"title_canon_sha256":"499179d239a18a72d6bc9d63c1dcd6c8fc2e171b5cec25393c2e04b2d39e8cf8","abstract_canon_sha256":"f042dc5e1da020faa309750031f1a2bbbf38a79fd1f0830465edf2cdd9b8b4f3"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-10T01:11:15.583842Z","signature_b64":"F4How+hLoddykMIVtUVcfpmc0P3jAzpABixelnMva+H5B2aycm5Dq7+BQ4wRIaxYiuUGjTvuNGCEq5X8nvOhDw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"ccbf83d7fe9e65ea1eb81feacd3ed0d2658abeb1a85bbd67c701d6abf61ac7ee","last_reissued_at":"2026-06-10T01:11:15.583058Z","signature_status":"signed_v1","first_computed_at":"2026-06-10T01:11:15.583058Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Lip Forcing: Few-Step Autoregressive Diffusion for Real-time Lip Synchronization","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"(2) AIPARK), Chulmin Park (2), Heeseong Shin (1), Jinhyuk Jang (1), Joungbin Lee (1), Jung Yi (1), Paul Hyunbin Cho (1), SeokYoung Lee (1), Seungryong Kim (1) ((1) KAIST AI, Siyoon Jin (1), Yunjin Park (2)","submitted_at":"2026-06-09T17:56:36Z","abstract_excerpt":"Diffusion-based lip synchronization models achieve strong visual quality and audio-visual alignment, but full-sequence bidirectional attention and many denoising steps make them impractical for real-time inference. We present Lip Forcing, to our knowledge the first autoregressive diffusion method for video-to-video (V2V) lip synchronization, which distills a 14B audio-conditioned bidirectional video diffusion teacher into causal students. At inference, the students generate each chunk in only two denoising steps without inference-time CFG, enabling real-time lip synchronization. A lip-sync-spe"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.11180","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.11180/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.11180","created_at":"2026-06-10T01:11:15.583183+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.11180v1","created_at":"2026-06-10T01:11:15.583183+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.11180","created_at":"2026-06-10T01:11:15.583183+00:00"},{"alias_kind":"pith_short_12","alias_value":"ZS7YHV76TZS6","created_at":"2026-06-10T01:11:15.583183+00:00"},{"alias_kind":"pith_short_16","alias_value":"ZS7YHV76TZS6UHVY","created_at":"2026-06-10T01:11:15.583183+00:00"},{"alias_kind":"pith_short_8","alias_value":"ZS7YHV76","created_at":"2026-06-10T01:11:15.583183+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/ZS7YHV76TZS6UHVYD7VM2PWQ2J","json":"https://pith.science/pith/ZS7YHV76TZS6UHVYD7VM2PWQ2J.json","graph_json":"https://pith.science/api/pith-number/ZS7YHV76TZS6UHVYD7VM2PWQ2J/graph.json","events_json":"https://pith.science/api/pith-number/ZS7YHV76TZS6UHVYD7VM2PWQ2J/events.json","paper":"https://pith.science/paper/ZS7YHV76"},"agent_actions":{"view_html":"https://pith.science/pith/ZS7YHV76TZS6UHVYD7VM2PWQ2J","download_json":"https://pith.science/pith/ZS7YHV76TZS6UHVYD7VM2PWQ2J.json","view_paper":"https://pith.science/paper/ZS7YHV76","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.11180&json=true","fetch_graph":"https://pith.science/api/pith-number/ZS7YHV76TZS6UHVYD7VM2PWQ2J/graph.json","fetch_events":"https://pith.science/api/pith-number/ZS7YHV76TZS6UHVYD7VM2PWQ2J/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/ZS7YHV76TZS6UHVYD7VM2PWQ2J/action/timestamp_anchor","attest_storage":"https://pith.science/pith/ZS7YHV76TZS6UHVYD7VM2PWQ2J/action/storage_attestation","attest_author":"https://pith.science/pith/ZS7YHV76TZS6UHVYD7VM2PWQ2J/action/author_attestation","sign_citation":"https://pith.science/pith/ZS7YHV76TZS6UHVYD7VM2PWQ2J/action/citation_signature","submit_replication":"https://pith.science/pith/ZS7YHV76TZS6UHVYD7VM2PWQ2J/action/replication_record"}},"created_at":"2026-06-10T01:11:15.583183+00:00","updated_at":"2026-06-10T01:11:15.583183+00:00"}