{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2025:LJJFWT474TT3TFHKS5YCQNSXOX","short_pith_number":"pith:LJJFWT47","canonical_record":{"source":{"id":"2510.02283","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2025-10-02T17:55:42Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"a0b6f0aa34f4263b2dac7cc1fe967c50048cdbe39ec9d2d0743700e505f43d47","abstract_canon_sha256":"17ea339a40f1eab15b7d0eb4d0daa64de4838ce80bff720fb091434a659311e7"},"schema_version":"1.0"},"canonical_sha256":"5a525b4f9fe4e7b994ea977028365775f96458ee822d0503435ba3c6bcfd0005","source":{"kind":"arxiv","id":"2510.02283","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2510.02283","created_at":"2026-05-17T23:38:49Z"},{"alias_kind":"arxiv_version","alias_value":"2510.02283v1","created_at":"2026-05-17T23:38:49Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2510.02283","created_at":"2026-05-17T23:38:49Z"},{"alias_kind":"pith_short_12","alias_value":"LJJFWT474TT3","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"LJJFWT474TT3TFHK","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"LJJFWT47","created_at":"2026-05-18T12:33:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2025:LJJFWT474TT3TFHKS5YCQNSXOX","target":"record","payload":{"canonical_record":{"source":{"id":"2510.02283","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2025-10-02T17:55:42Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"a0b6f0aa34f4263b2dac7cc1fe967c50048cdbe39ec9d2d0743700e505f43d47","abstract_canon_sha256":"17ea339a40f1eab15b7d0eb4d0daa64de4838ce80bff720fb091434a659311e7"},"schema_version":"1.0"},"canonical_sha256":"5a525b4f9fe4e7b994ea977028365775f96458ee822d0503435ba3c6bcfd0005","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:38:49.904351Z","signature_b64":"N+L7YIsOlSCaDqNTU70lU2TnRxi+dwqvTuncyim3rCdiJZnphUUGB0xEupNV4E5xZqDOeXCH9m5f3gEvPVENBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"5a525b4f9fe4e7b994ea977028365775f96458ee822d0503435ba3c6bcfd0005","last_reissued_at":"2026-05-17T23:38:49.903873Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:38:49.903873Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2510.02283","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:49Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"oTSCT+UJlt1pt9WGxLkaTc2ul7UQ5flIxiyuO6Tx7WOjTwfoRSPhF0/r8co5b+Gz6nXCzyRHj0q8gX50Pg5TBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-01T07:50:04.649208Z"},"content_sha256":"c31718b1d10268a3d739a03c21cc48e9b20d27e2cf866618abaa6bdca51404f3","schema_version":"1.0","event_id":"sha256:c31718b1d10268a3d739a03c21cc48e9b20d27e2cf866618abaa6bdca51404f3"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2025:LJJFWT474TT3TFHKS5YCQNSXOX","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Self-Forcing++: Towards Minute-Scale High-Quality Video Generation","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"Self-generated segments from a video model steer it to produce coherent four-minute clips without long-video teachers or retraining.","cross_cats":["cs.AI"],"primary_cat":"cs.CV","authors_text":"Andrew Bai, Cho-Jui Hsieh, Jie Wu, Justin Cui, Ming Li, Rui Wang, Tao Yang, Xiaojie Li, Yuanhao Ban","submitted_at":"2025-10-02T17:55:42Z","abstract_excerpt":"Diffusion models have revolutionized image and video generation, achieving unprecedented visual quality. However, their reliance on transformer architectures incurs prohibitively high computational costs, particularly when extending generation to long videos. Recent work has explored autoregressive formulations for long video generation, typically by distilling from short-horizon bidirectional teachers. Nevertheless, given that teacher models cannot synthesize long videos, the extrapolation of student models beyond their training horizon often leads to pronounced quality degradation, arising f"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"our method shows the capability of generating videos up to 4 minutes and 15 seconds, equivalent to 99.9% of the maximum span supported by our base model's position embedding and more than 50x longer than that of our baseline model","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That segments sampled from the model's own long self-generated videos supply reliable, non-degrading guidance equivalent to teacher supervision without introducing new compounding errors in the latent space.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"Self-Forcing++ scales autoregressive video diffusion to over 4 minutes by using self-generated segments for guidance, reducing error accumulation and outperforming baselines in fidelity and consistency.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Self-generated segments from a video model steer it to produce coherent four-minute clips without long-video teachers or retraining.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"d9ac5ef6d5fef45a7e6ee6ba5daa0c3c00ed3ff4baac0bd156317f7b1e7cba82"},"source":{"id":"2510.02283","kind":"arxiv","version":1},"verdict":{"id":"cea5dc07-0b47-4428-9a12-f3ef0fd9b0e8","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-15T22:36:28.846851Z","strongest_claim":"our method shows the capability of generating videos up to 4 minutes and 15 seconds, equivalent to 99.9% of the maximum span supported by our base model's position embedding and more than 50x longer than that of our baseline model","one_line_summary":"Self-Forcing++ scales autoregressive video diffusion to over 4 minutes by using self-generated segments for guidance, reducing error accumulation and outperforming baselines in fidelity and consistency.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That segments sampled from the model's own long self-generated videos supply reliable, non-degrading guidance equivalent to teacher supervision without introducing new compounding errors in the latent space.","pith_extraction_headline":"Self-generated segments from a video model steer it to produce coherent four-minute clips without long-video teachers or retraining."},"references":{"count":72,"sample":[{"doi":"","year":2024,"title":"Diffusion for world modeling: Visual details matter in atari.Advancesin Neural Information Processing Systems, 37:58757–58791, 2024","work_id":"a36c75da-338f-4d9d-8e10-975975cff124","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2023,"title":"Stable Video Diffusion: Scaling Latent Video Diffusion Models to Large Datasets","work_id":"4f68eada-27e3-437a-a2fe-6e4ca524d0d3","ref_index":2,"cited_arxiv_id":"2311.15127","is_internal_anchor":true},{"doi":"","year":2024,"title":"Genie: Generative interactive environments","work_id":"6c633c28-756b-4f8a-b31e-d5ac37197f04","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2025,"title":"Videojam: Joint appearance-motion representations for en- hanced motion generation in video models","work_id":"d22ef704-e6df-4caf-a9b3-f220ad768f8b","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2024,"title":"Diffusion forcing: Next-token prediction meets full-sequence diffusion.Advancesin Neural Information Processing Systems, 37:24081–24125, 2024","work_id":"9d20d335-5d78-4362-b7ce-11f7495019af","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":72,"snapshot_sha256":"9ad9ca0662156e44d7f1bb4d45e592642936163101b102f2c85bde14aa61bd85","internal_anchors":28},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"cea5dc07-0b47-4428-9a12-f3ef0fd9b0e8"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:49Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"C3kz1l1pk+7pmqOSkzTshiVJCohBjSrPq/l6nXxm06QTCsmPQRYPAIVuWkkL2i642k7fHJv5+43TKNT2XX3xBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-01T07:50:04.649831Z"},"content_sha256":"2f2b92794ba9c9d3ca61b76c4c9d3c0fa7e0943fc7d2a660686e21466a707ad0","schema_version":"1.0","event_id":"sha256:2f2b92794ba9c9d3ca61b76c4c9d3c0fa7e0943fc7d2a660686e21466a707ad0"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/LJJFWT474TT3TFHKS5YCQNSXOX/bundle.json","state_url":"https://pith.science/pith/LJJFWT474TT3TFHKS5YCQNSXOX/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/LJJFWT474TT3TFHKS5YCQNSXOX/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-01T07:50:04Z","links":{"resolver":"https://pith.science/pith/LJJFWT474TT3TFHKS5YCQNSXOX","bundle":"https://pith.science/pith/LJJFWT474TT3TFHKS5YCQNSXOX/bundle.json","state":"https://pith.science/pith/LJJFWT474TT3TFHKS5YCQNSXOX/state.json","well_known_bundle":"https://pith.science/.well-known/pith/LJJFWT474TT3TFHKS5YCQNSXOX/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:LJJFWT474TT3TFHKS5YCQNSXOX","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"17ea339a40f1eab15b7d0eb4d0daa64de4838ce80bff720fb091434a659311e7","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2025-10-02T17:55:42Z","title_canon_sha256":"a0b6f0aa34f4263b2dac7cc1fe967c50048cdbe39ec9d2d0743700e505f43d47"},"schema_version":"1.0","source":{"id":"2510.02283","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2510.02283","created_at":"2026-05-17T23:38:49Z"},{"alias_kind":"arxiv_version","alias_value":"2510.02283v1","created_at":"2026-05-17T23:38:49Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2510.02283","created_at":"2026-05-17T23:38:49Z"},{"alias_kind":"pith_short_12","alias_value":"LJJFWT474TT3","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"LJJFWT474TT3TFHK","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"LJJFWT47","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:2f2b92794ba9c9d3ca61b76c4c9d3c0fa7e0943fc7d2a660686e21466a707ad0","target":"graph","created_at":"2026-05-17T23:38:49Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"our method shows the capability of generating videos up to 4 minutes and 15 seconds, equivalent to 99.9% of the maximum span supported by our base model's position embedding and more than 50x longer than that of our baseline model"},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That segments sampled from the model's own long self-generated videos supply reliable, non-degrading guidance equivalent to teacher supervision without introducing new compounding errors in the latent space."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"Self-Forcing++ scales autoregressive video diffusion to over 4 minutes by using self-generated segments for guidance, reducing error accumulation and outperforming baselines in fidelity and consistency."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Self-generated segments from a video model steer it to produce coherent four-minute clips without long-video teachers or retraining."}],"snapshot_sha256":"d9ac5ef6d5fef45a7e6ee6ba5daa0c3c00ed3ff4baac0bd156317f7b1e7cba82"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Diffusion models have revolutionized image and video generation, achieving unprecedented visual quality. However, their reliance on transformer architectures incurs prohibitively high computational costs, particularly when extending generation to long videos. Recent work has explored autoregressive formulations for long video generation, typically by distilling from short-horizon bidirectional teachers. Nevertheless, given that teacher models cannot synthesize long videos, the extrapolation of student models beyond their training horizon often leads to pronounced quality degradation, arising f","authors_text":"Andrew Bai, Cho-Jui Hsieh, Jie Wu, Justin Cui, Ming Li, Rui Wang, Tao Yang, Xiaojie Li, Yuanhao Ban","cross_cats":["cs.AI"],"headline":"Self-generated segments from a video model steer it to produce coherent four-minute clips without long-video teachers or retraining.","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2025-10-02T17:55:42Z","title":"Self-Forcing++: Towards Minute-Scale High-Quality Video Generation"},"references":{"count":72,"internal_anchors":28,"resolved_work":72,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"Diffusion for world modeling: Visual details matter in atari.Advancesin Neural Information Processing Systems, 37:58757–58791, 2024","work_id":"a36c75da-338f-4d9d-8e10-975975cff124","year":2024},{"cited_arxiv_id":"2311.15127","doi":"","is_internal_anchor":true,"ref_index":2,"title":"Stable Video Diffusion: Scaling Latent Video Diffusion Models to Large Datasets","work_id":"4f68eada-27e3-437a-a2fe-6e4ca524d0d3","year":2023},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"Genie: Generative interactive environments","work_id":"6c633c28-756b-4f8a-b31e-d5ac37197f04","year":2024},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"Videojam: Joint appearance-motion representations for en- hanced motion generation in video models","work_id":"d22ef704-e6df-4caf-a9b3-f220ad768f8b","year":2025},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"Diffusion forcing: Next-token prediction meets full-sequence diffusion.Advancesin Neural Information Processing Systems, 37:24081–24125, 2024","work_id":"9d20d335-5d78-4362-b7ce-11f7495019af","year":2024}],"snapshot_sha256":"9ad9ca0662156e44d7f1bb4d45e592642936163101b102f2c85bde14aa61bd85"},"source":{"id":"2510.02283","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-15T22:36:28.846851Z","id":"cea5dc07-0b47-4428-9a12-f3ef0fd9b0e8","model_set":{"reader":"grok-4.3"},"one_line_summary":"Self-Forcing++ scales autoregressive video diffusion to over 4 minutes by using self-generated segments for guidance, reducing error accumulation and outperforming baselines in fidelity and consistency.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Self-generated segments from a video model steer it to produce coherent four-minute clips without long-video teachers or retraining.","strongest_claim":"our method shows the capability of generating videos up to 4 minutes and 15 seconds, equivalent to 99.9% of the maximum span supported by our base model's position embedding and more than 50x longer than that of our baseline model","weakest_assumption":"That segments sampled from the model's own long self-generated videos supply reliable, non-degrading guidance equivalent to teacher supervision without introducing new compounding errors in the latent space."}},"verdict_id":"cea5dc07-0b47-4428-9a12-f3ef0fd9b0e8"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:c31718b1d10268a3d739a03c21cc48e9b20d27e2cf866618abaa6bdca51404f3","target":"record","created_at":"2026-05-17T23:38:49Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"17ea339a40f1eab15b7d0eb4d0daa64de4838ce80bff720fb091434a659311e7","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2025-10-02T17:55:42Z","title_canon_sha256":"a0b6f0aa34f4263b2dac7cc1fe967c50048cdbe39ec9d2d0743700e505f43d47"},"schema_version":"1.0","source":{"id":"2510.02283","kind":"arxiv","version":1}},"canonical_sha256":"5a525b4f9fe4e7b994ea977028365775f96458ee822d0503435ba3c6bcfd0005","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"5a525b4f9fe4e7b994ea977028365775f96458ee822d0503435ba3c6bcfd0005","first_computed_at":"2026-05-17T23:38:49.903873Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:38:49.903873Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"N+L7YIsOlSCaDqNTU70lU2TnRxi+dwqvTuncyim3rCdiJZnphUUGB0xEupNV4E5xZqDOeXCH9m5f3gEvPVENBg==","signature_status":"signed_v1","signed_at":"2026-05-17T23:38:49.904351Z","signed_message":"canonical_sha256_bytes"},"source_id":"2510.02283","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:c31718b1d10268a3d739a03c21cc48e9b20d27e2cf866618abaa6bdca51404f3","sha256:2f2b92794ba9c9d3ca61b76c4c9d3c0fa7e0943fc7d2a660686e21466a707ad0"],"state_sha256":"0328798977eff69a10910ad1fb83f4dc47965c5c4aa61be2cbc3fdaa150faabb"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"cCxm1zXgOx0BxCbPGFkjMPvzFS4YVcARQ8ny2vg8tnrXuYZ5+InUthKkIssjzTEjpm/Vgni68J6cIHGYcau/AQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-01T07:50:04.652369Z","bundle_sha256":"84b09241a4287d5f84acda734b0ddc0984b1f596d4639c5f26e0f5295f5821d2"}}