{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2024:Z3SH3GDBVKBL4C5BHZ2THBUHEM","short_pith_number":"pith:Z3SH3GDB","schema_version":"1.0","canonical_sha256":"cee47d9861aa82be0ba13e7533868723279b108f804c8eb8e2e98fc581828d2f","source":{"kind":"arxiv","id":"2410.12557","version":3},"attestation_state":"computed","paper":{"title":"One Step Diffusion via Shortcut Models","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"Shortcut models generate high-quality diffusion samples in one step using a single network.","cross_cats":["cs.CV"],"primary_cat":"cs.LG","authors_text":"Danijar Hafner, Kevin Frans, Pieter Abbeel, Sergey Levine","submitted_at":"2024-10-16T13:34:40Z","abstract_excerpt":"Diffusion models and flow-matching models have enabled generating diverse and realistic images by learning to transfer noise to data. However, sampling from these models involves iterative denoising over many neural network passes, making generation slow and expensive. Previous approaches for speeding up sampling require complex training regimes, such as multiple training phases, multiple networks, or fragile scheduling. We introduce shortcut models, a family of generative models that use a single network and training phase to produce high-quality samples in a single or multiple sampling steps"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":true,"formal_links_present":true},"canonical_record":{"source":{"id":"2410.12557","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2024-10-16T13:34:40Z","cross_cats_sorted":["cs.CV"],"title_canon_sha256":"3fc05110424b39f5ffe0b26a1dc0e3e541fa5acf9c2d6d3a031b40b293d7abc5","abstract_canon_sha256":"5d0dbb8d6952cd56a5ec73a36c02b8cb6fd2a48bd9091654cd6e00a68e10c8b9"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:38:53.234408Z","signature_b64":"jzWOSgfa944jpXlCNh1E8e+sP0wOC1YFgAeYWDPdLI1ftpp4XJHVn5Gj0zpxzafFOKpbA3STs5aNNUe3bASHCQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"cee47d9861aa82be0ba13e7533868723279b108f804c8eb8e2e98fc581828d2f","last_reissued_at":"2026-05-17T23:38:53.233683Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:38:53.233683Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"One Step Diffusion via Shortcut Models","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"Shortcut models generate high-quality diffusion samples in one step using a single network.","cross_cats":["cs.CV"],"primary_cat":"cs.LG","authors_text":"Danijar Hafner, Kevin Frans, Pieter Abbeel, Sergey Levine","submitted_at":"2024-10-16T13:34:40Z","abstract_excerpt":"Diffusion models and flow-matching models have enabled generating diverse and realistic images by learning to transfer noise to data. However, sampling from these models involves iterative denoising over many neural network passes, making generation slow and expensive. Previous approaches for speeding up sampling require complex training regimes, such as multiple training phases, multiple networks, or fragile scheduling. We introduce shortcut models, a family of generative models that use a single network and training phase to produce high-quality samples in a single or multiple sampling steps"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Shortcut models consistently produce higher quality samples than previous approaches, such as consistency models and reflow. Compared to distillation, shortcut models reduce complexity to a single network and training phase and additionally allow varying step budgets at inference time.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That a single network can learn effective large-step transitions across a wide range of step sizes during one training phase without quality degradation or the need for fragile scheduling.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"Shortcut models enable high-quality single or few-step sampling in diffusion models with one network and training phase by conditioning on desired step size.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Shortcut models generate high-quality diffusion samples in one step using a single network.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"18dd1c5ee98a82f54a2216d58e3b78d3de7d6fd6f1f4e210122fde23b2cc9bde"},"source":{"id":"2410.12557","kind":"arxiv","version":3},"verdict":{"id":"c649fac0-11fc-4937-bf6d-a784c5644915","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-15T06:36:06.295454Z","strongest_claim":"Shortcut models consistently produce higher quality samples than previous approaches, such as consistency models and reflow. Compared to distillation, shortcut models reduce complexity to a single network and training phase and additionally allow varying step budgets at inference time.","one_line_summary":"Shortcut models enable high-quality single or few-step sampling in diffusion models with one network and training phase by conditioning on desired step size.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That a single network can learn effective large-step transitions across a wide range of step sizes during one training phase without quality degradation or the need for fragile scheduling.","pith_extraction_headline":"Shortcut models generate high-quality diffusion samples in one step using a single network."},"references":{"count":28,"sample":[{"doi":"","year":null,"title":"Lumiere: A space-time diffusion model for video generation","work_id":"8a0a0735-d82c-4090-a039-697d06ccc3f0","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"Tract: Denoising diffusion models with transitive closure time-distillation","work_id":"d884704a-6249-4934-a29c-436da79e969e","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"arXiv preprint arXiv:2406.07507 (2024) 5","work_id":"3b39f5ec-7294-49ef-ac0e-d95360c0f177","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"Large Scale GAN Training for High Fidelity Natural Image Synthesis","work_id":"244e6f06-bad2-4f34-8186-ff370286427f","ref_index":4,"cited_arxiv_id":"1809.11096","is_internal_anchor":true},{"doi":"","year":null,"title":"Diffusion Policy: Visuomotor Policy Learning via Action Diffusion","work_id":"2dce18e6-f07a-4f57-8a81-e71c3e6a293c","ref_index":5,"cited_arxiv_id":"2303.04137","is_internal_anchor":true}],"resolved_work":28,"snapshot_sha256":"961f45c2a95c384988d788200a803c2f8ff5c59a0704cf1e7fd1bdb9f50fc1e8","internal_anchors":14},"formal_canon":{"evidence_count":3,"snapshot_sha256":"0e4e695f16497c7a72559d8dc9590b32ad87585f9f46b7f6464a3b19e7035bad"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2410.12557","created_at":"2026-05-17T23:38:53.233792+00:00"},{"alias_kind":"arxiv_version","alias_value":"2410.12557v3","created_at":"2026-05-17T23:38:53.233792+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2410.12557","created_at":"2026-05-17T23:38:53.233792+00:00"},{"alias_kind":"pith_short_12","alias_value":"Z3SH3GDBVKBL","created_at":"2026-05-18T12:33:37.589309+00:00"},{"alias_kind":"pith_short_16","alias_value":"Z3SH3GDBVKBL4C5B","created_at":"2026-05-18T12:33:37.589309+00:00"},{"alias_kind":"pith_short_8","alias_value":"Z3SH3GDB","created_at":"2026-05-18T12:33:37.589309+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":33,"internal_anchor_count":33,"sample":[{"citing_arxiv_id":"2509.01629","citing_title":"Lipschitz-Guided Design of Interpolation Schedules in Generative Models","ref_index":14,"is_internal_anchor":true},{"citing_arxiv_id":"2602.16813","citing_title":"Flow Map Language Models: One-step Language Modeling via Continuous Denoising","ref_index":49,"is_internal_anchor":true},{"citing_arxiv_id":"2604.27147","citing_title":"How to Guide Your Flow: Few-Step Alignment via Flow Map Reward Guidance","ref_index":34,"is_internal_anchor":true},{"citing_arxiv_id":"2605.15592","citing_title":"Efficient Image Synthesis with Sphere Latent Encoder","ref_index":7,"is_internal_anchor":true},{"citing_arxiv_id":"2605.17834","citing_title":"Stabilizing, Scaling & Enhancing MeanFlow for Large-scale Diffusion Distillation","ref_index":6,"is_internal_anchor":true},{"citing_arxiv_id":"2605.18346","citing_title":"Focused Forcing: Content-Aware Per-Frame KV Selection for Efficient Autoregressive Video Diffusion","ref_index":14,"is_internal_anchor":true},{"citing_arxiv_id":"2605.17546","citing_title":"Accelerating Redshift-Conditioned Galaxy Image Synthesis with One-step Generative Modeling","ref_index":30,"is_internal_anchor":true},{"citing_arxiv_id":"2512.04677","citing_title":"Live Avatar: Streaming Real-time Audio-Driven Avatar Generation with Infinite Length","ref_index":12,"is_internal_anchor":true},{"citing_arxiv_id":"2504.20690","citing_title":"In-Context Edit: Enabling Instructional Image Editing with In-Context Generation in Large Scale Diffusion Transformer","ref_index":44,"is_internal_anchor":true},{"citing_arxiv_id":"2602.05449","citing_title":"DisCa: Accelerating Video Diffusion Transformers with Distillation-Compatible Learnable Feature Caching","ref_index":12,"is_internal_anchor":true},{"citing_arxiv_id":"2602.10764","citing_title":"Dual-End Consistency Model","ref_index":9,"is_internal_anchor":true},{"citing_arxiv_id":"2602.16813","citing_title":"Flow Map Language Models: One-step Language Modeling via Continuous Denoising","ref_index":49,"is_internal_anchor":true},{"citing_arxiv_id":"2512.14614","citing_title":"WorldPlay: Towards Long-Term Geometric Consistency for Real-Time Interactive World Modeling","ref_index":12,"is_internal_anchor":true},{"citing_arxiv_id":"2506.07339","citing_title":"Real-Time Execution of Action Chunking Flow Policies","ref_index":17,"is_internal_anchor":true},{"citing_arxiv_id":"2605.14513","citing_title":"HASTE: Training-Free Video Diffusion Acceleration via Head-Wise Adaptive Sparse Attention","ref_index":6,"is_internal_anchor":true},{"citing_arxiv_id":"2509.24527","citing_title":"Training Agents Inside of Scalable World Models","ref_index":21,"is_internal_anchor":true},{"citing_arxiv_id":"2605.13778","citing_title":"Realtime-VLA FLASH: Speculative Inference Framework for Diffusion-based VLAs","ref_index":7,"is_internal_anchor":true},{"citing_arxiv_id":"2604.03225","citing_title":"VOSR: A Vision-Only Generative Model for Image Super-Resolution","ref_index":14,"is_internal_anchor":true},{"citing_arxiv_id":"2604.03118","citing_title":"Salt: Self-Consistent Distribution Matching with Cache-Aware Training for Fast Video Generation","ref_index":7,"is_internal_anchor":true},{"citing_arxiv_id":"2605.11755","citing_title":"One-Step Generative Modeling via Wasserstein Gradient Flows","ref_index":15,"is_internal_anchor":true},{"citing_arxiv_id":"2605.12183","citing_title":"DriftXpress: Faster Drifting Models via Projected RKHS Fields","ref_index":8,"is_internal_anchor":true},{"citing_arxiv_id":"2604.28185","citing_title":"Visual Generation in the New Era: An Evolution from Atomic Mapping to Agentic World Modeling","ref_index":20,"is_internal_anchor":true},{"citing_arxiv_id":"2604.27147","citing_title":"How to Guide Your Flow: Few-Step Alignment via Flow Map Reward Guidance","ref_index":34,"is_internal_anchor":true},{"citing_arxiv_id":"2604.26065","citing_title":"FlowS: One-Step Motion Prediction via Local Transport Conditioning","ref_index":21,"is_internal_anchor":true},{"citing_arxiv_id":"2604.25819","citing_title":"Mutual Forcing: Dual-Mode Self-Evolution for Fast Autoregressive Audio-Video Character Generation","ref_index":11,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":3,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/Z3SH3GDBVKBL4C5BHZ2THBUHEM","json":"https://pith.science/pith/Z3SH3GDBVKBL4C5BHZ2THBUHEM.json","graph_json":"https://pith.science/api/pith-number/Z3SH3GDBVKBL4C5BHZ2THBUHEM/graph.json","events_json":"https://pith.science/api/pith-number/Z3SH3GDBVKBL4C5BHZ2THBUHEM/events.json","paper":"https://pith.science/paper/Z3SH3GDB"},"agent_actions":{"view_html":"https://pith.science/pith/Z3SH3GDBVKBL4C5BHZ2THBUHEM","download_json":"https://pith.science/pith/Z3SH3GDBVKBL4C5BHZ2THBUHEM.json","view_paper":"https://pith.science/paper/Z3SH3GDB","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2410.12557&json=true","fetch_graph":"https://pith.science/api/pith-number/Z3SH3GDBVKBL4C5BHZ2THBUHEM/graph.json","fetch_events":"https://pith.science/api/pith-number/Z3SH3GDBVKBL4C5BHZ2THBUHEM/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/Z3SH3GDBVKBL4C5BHZ2THBUHEM/action/timestamp_anchor","attest_storage":"https://pith.science/pith/Z3SH3GDBVKBL4C5BHZ2THBUHEM/action/storage_attestation","attest_author":"https://pith.science/pith/Z3SH3GDBVKBL4C5BHZ2THBUHEM/action/author_attestation","sign_citation":"https://pith.science/pith/Z3SH3GDBVKBL4C5BHZ2THBUHEM/action/citation_signature","submit_replication":"https://pith.science/pith/Z3SH3GDBVKBL4C5BHZ2THBUHEM/action/replication_record"}},"created_at":"2026-05-17T23:38:53.233792+00:00","updated_at":"2026-05-17T23:38:53.233792+00:00"}