{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:GHNBNG4UEWLWPVU2NRB2RO2EY3","short_pith_number":"pith:GHNBNG4U","canonical_record":{"source":{"id":"2605.15178","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-14T17:58:03Z","cross_cats_sorted":[],"title_canon_sha256":"ac7f02e2b426f77376b0a94085db9f39f21a7eac26efeb50d349b13165daa19a","abstract_canon_sha256":"eef6e0529d268c02237d963884733f3ac70a5b621928b74e9f4c12eaa73e4cd1"},"schema_version":"1.0"},"canonical_sha256":"31da169b94259767d69a6c43a8bb44c6d0d47bdb22323b83a8eea4144ede5b37","source":{"kind":"arxiv","id":"2605.15178","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.15178","created_at":"2026-05-17T21:18:32Z"},{"alias_kind":"arxiv_version","alias_value":"2605.15178v1","created_at":"2026-05-17T21:18:32Z"},{"alias_kind":"pith_short_12","alias_value":"GHNBNG4UEWLW","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"GHNBNG4UEWLWPVU2","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"GHNBNG4U","created_at":"2026-05-18T12:33:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:GHNBNG4UEWLWPVU2NRB2RO2EY3","target":"record","payload":{"canonical_record":{"source":{"id":"2605.15178","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-14T17:58:03Z","cross_cats_sorted":[],"title_canon_sha256":"ac7f02e2b426f77376b0a94085db9f39f21a7eac26efeb50d349b13165daa19a","abstract_canon_sha256":"eef6e0529d268c02237d963884733f3ac70a5b621928b74e9f4c12eaa73e4cd1"},"schema_version":"1.0"},"canonical_sha256":"31da169b94259767d69a6c43a8bb44c6d0d47bdb22323b83a8eea4144ede5b37","receipt":{"kind":"pith_receipt","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.2","canonical_sha256":"31da169b94259767d69a6c43a8bb44c6d0d47bdb22323b83a8eea4144ede5b37","last_reissued_at":"2026-05-17T21:57:18.558439Z","signature_status":"unsigned_v0","first_computed_at":"2026-05-17T21:40:25.203865Z"},"source_kind":"arxiv","source_id":"2605.15178","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T21:18:32Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"pI1X+aeu4TL3FrTRc3xpQFKNFXyGm9NEEQR5lejdexX5WHv/4IwOYrky44QerSKrytpxq/pKtEi1hSLydSieDQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-21T05:30:47.341223Z"},"content_sha256":"e514e0a9896a9ff55c8e06090097adbb7602f629fe3eb6a3775a18ef4297af41","schema_version":"1.0","event_id":"sha256:e514e0a9896a9ff55c8e06090097adbb7602f629fe3eb6a3775a18ef4297af41"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:GHNBNG4UEWLWPVU2NRB2RO2EY3","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"SANA-WM: Efficient Minute-Scale World Modeling with Hybrid Linear Diffusion Transformer","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"SANA-WM generates minute-scale 720p videos with camera control at 36 times higher throughput than prior open-source models.","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Enze Xie, Haoyi Zhu, Haozhe Liu, Jincheng Yu, Junsong Chen, Song Han, Tian Ye, Tong He, Yuyang Zhao","submitted_at":"2026-05-14T17:58:03Z","abstract_excerpt":"We introduce SANA-WM, an efficient 2.6B-parameter open-source world model natively trained for one-minute generation, synthesizing high-fidelity, 720p, minute-scale videos with precise camera control. SANA-WM achieves visual quality comparable to large-scale industrial baselines such as LingBot-World and HY-WorldPlay, while significantly improving efficiency. Four core designs drive our architecture: (1) Hybrid Linear Attention combines frame-wise Gated DeltaNet (GDN) with softmax attention for memory-efficient long-context modeling. (2) Dual-Branch Camera Control ensures precise 6-DoF traject"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"SANA-WM achieves visual quality comparable to large-scale industrial baselines such as LingBot-World and HY-WorldPlay, while significantly improving efficiency... On our one-minute world-model benchmark, SANA-WM demonstrates stronger action-following accuracy than prior open-source baselines and achieves comparable visual quality at 36× higher throughput for scalable world modeling.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The robust annotation pipeline extracts accurate metric-scale 6-DoF camera poses from public videos to yield high-quality, spatiotemporally consistent action labels that enable effective training of the world model.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"SANA-WM is a 2.6B-parameter efficient world model that synthesizes minute-scale 720p videos with 6-DoF camera control, trained on 213K public clips in 15 days on 64 H100s and runnable on single GPUs at 36x higher throughput than prior open baselines.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"SANA-WM generates minute-scale 720p videos with camera control at 36 times higher throughput than prior open-source models.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"55dd0e6b4c1d1b95062655e9933b87c866f4fab94d32b2de282676097a8c805f"},"source":{"id":"2605.15178","kind":"arxiv","version":1},"verdict":{"id":"f6f23a26-71f1-4e50-9451-a62bfc91b10d","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-15T13:52:18.797742Z","strongest_claim":"SANA-WM achieves visual quality comparable to large-scale industrial baselines such as LingBot-World and HY-WorldPlay, while significantly improving efficiency... On our one-minute world-model benchmark, SANA-WM demonstrates stronger action-following accuracy than prior open-source baselines and achieves comparable visual quality at 36× higher throughput for scalable world modeling.","one_line_summary":"SANA-WM is a 2.6B-parameter efficient world model that synthesizes minute-scale 720p videos with 6-DoF camera control, trained on 213K public clips in 15 days on 64 H100s and runnable on single GPUs at 36x higher throughput than prior open baselines.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The robust annotation pipeline extracts accurate metric-scale 6-DoF camera poses from public videos to yield high-quality, spatiotemporally consistent action labels that enable effective training of the world model.","pith_extraction_headline":"SANA-WM generates minute-scale 720p videos with camera control at 36 times higher throughput than prior open-source models."},"references":{"count":102,"sample":[{"doi":"","year":2018,"title":"World Models","work_id":"07227eee-8445-4c98-bce4-c6a6fd5ed907","ref_index":1,"cited_arxiv_id":"1803.10122","is_internal_anchor":true},{"doi":"","year":2025,"title":"Genie 3: A new frontier for world models","work_id":"94eb34fb-202e-47c8-bd7e-5e1c88ff88ae","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2023,"title":"GAIA-1: A Generative World Model for Autonomous Driving","work_id":"313484e6-a442-4522-8e19-d07e502844a8","ref_index":3,"cited_arxiv_id":"2309.17080","is_internal_anchor":true},{"doi":"","year":2026,"title":"DreamDojo: A Generalist Robot World Model from Large-Scale Human Videos","work_id":"95f2f415-c659-4084-a008-39303bea8638","ref_index":4,"cited_arxiv_id":"2602.06949","is_internal_anchor":true},{"doi":"","year":2025,"title":"Aether: Geometric-aware unified world modeling","work_id":"3aecc6ce-b828-4cf4-8553-7b25b9e2051c","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":102,"snapshot_sha256":"c904497025477ff38bfb7bb1e27b222f9c4bf279206dc36796e2646b766a77b4","internal_anchors":46},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"f6f23a26-71f1-4e50-9451-a62bfc91b10d"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T21:57:18Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"8y9tuLo3u8K0IRKcdROSeG/M0LWAEZkqzwsbugy6DLh8R8iS2gZz897rr/M5aS6tvG88eGO6mG28bEXgNeAHBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-21T05:30:47.341782Z"},"content_sha256":"03ac14979a0cda6bc8e8681ab25161cb0a495932b11db8df55b8554805efdbf3","schema_version":"1.0","event_id":"sha256:03ac14979a0cda6bc8e8681ab25161cb0a495932b11db8df55b8554805efdbf3"},{"event_type":"integrity_finding","subject_pith_number":"pith:2026:GHNBNG4UEWLWPVU2NRB2RO2EY3","target":"integrity","payload":{"note":"URL 'https://deepmind.google/blog/genie-2-' returned status 404 (Not Found) at last check.","snippet":null,"arxiv_id":"2605.15178","detector":"external_links","evidence":{"url":"https://deepmind.google/blog/genie-2-","final_url":"https://deepmind.google/blog/genie-2-/","host_kind":"website","status_code":404,"status_text":"Not Found","verdict_class":"incontrovertible","checked_at_unix":1779190347.3567443},"severity":"advisory","ref_index":null,"audited_at":"2026-05-19T11:32:28.352287Z","event_type":"pith.integrity.v1","detected_doi":null,"detector_url":"https://pith.science/pith-integrity-protocol#external_links","external_url":"https://deepmind.google/blog/genie-2-","finding_type":"dead_url","evidence_hash":"6c84339f77343914bf080b3f7c5489fafb4699fb6f9593408b187bcbe9e842b9","paper_version":1,"verdict_class":"incontrovertible","resolved_title":null,"detector_version":"1.0.0","detected_arxiv_id":null,"integrity_event_id":1076,"payload_sha256":"6b256440bdfe3ed558ab746a1709a700c49018fbd2cc266ab93e3c673eb163af","signature_b64":"QVHlGVZF2UCEzWCUpvEVakS+C9mschcUtn0lcybgRPGVBMOWi0C4JqDP5rRjmyKeCrwegh25alEMpezS5mNyDw==","signing_key_id":"pith-v1-2026-05"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-19T11:37:01Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Ka13mxdNkffvpfu+uiIZhh/UrfFWiU+vC5EoYgAaQkE3t4s3OZ3bQ4aRr9LmDcSF0BORdwIOegzmHTnp2okwAQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-21T05:30:47.342668Z"},"content_sha256":"1ad87288d5eff31cc79aebc69197b1a63cb93bff65b8a300e5203158a64a0c84","schema_version":"1.0","event_id":"sha256:1ad87288d5eff31cc79aebc69197b1a63cb93bff65b8a300e5203158a64a0c84"},{"event_type":"integrity_finding","subject_pith_number":"pith:2026:GHNBNG4UEWLWPVU2NRB2RO2EY3","target":"integrity","payload":{"note":"URL 'https://blog.google/innovation-and-ai/products/nano-' returned status 404 (Not Found) at last check.","snippet":null,"arxiv_id":"2605.15178","detector":"external_links","evidence":{"url":"https://blog.google/innovation-and-ai/products/nano-","final_url":"https://blog.google/innovation-and-ai/products/nano-/","host_kind":"website","status_code":404,"status_text":"Not Found","verdict_class":"incontrovertible","checked_at_unix":1779190345.802933},"severity":"advisory","ref_index":null,"audited_at":"2026-05-19T11:32:28.352287Z","event_type":"pith.integrity.v1","detected_doi":null,"detector_url":"https://pith.science/pith-integrity-protocol#external_links","external_url":"https://blog.google/innovation-and-ai/products/nano-","finding_type":"dead_url","evidence_hash":"ca680c7df8b0e8b7e73f93ce57d2e9c40758c19ea79c913408bc08d9c7603aae","paper_version":1,"verdict_class":"incontrovertible","resolved_title":null,"detector_version":"1.0.0","detected_arxiv_id":null,"integrity_event_id":1075,"payload_sha256":"1fb870c25c0b4335d828618592aed30bc773f4a774a8d75ec5b0a34d6f5c8c8a","signature_b64":"agfSpR08PBmmhhZ/I7PE8LscZn24eIvjjYZVn8QlWdWJujoTehtN9FpkKLHZ/UMDE7tAs81XcM5CW88LRgQFCQ==","signing_key_id":"pith-v1-2026-05"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-19T11:37:01Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"/NF27mkp9pSxWBExFaIHiaxFvQoasJzvYXtoJWGt2wnAHpSs2f0lhkcwdNfjAu/+ImD9Q2L+hu7/3y7xQcKrCg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-21T05:30:47.342964Z"},"content_sha256":"4c134ca032d7128d3403c36739c5cc65b76f94a256686506dfef48af848efea2","schema_version":"1.0","event_id":"sha256:4c134ca032d7128d3403c36739c5cc65b76f94a256686506dfef48af848efea2"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/GHNBNG4UEWLWPVU2NRB2RO2EY3/bundle.json","state_url":"https://pith.science/pith/GHNBNG4UEWLWPVU2NRB2RO2EY3/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/GHNBNG4UEWLWPVU2NRB2RO2EY3/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-21T05:30:47Z","links":{"resolver":"https://pith.science/pith/GHNBNG4UEWLWPVU2NRB2RO2EY3","bundle":"https://pith.science/pith/GHNBNG4UEWLWPVU2NRB2RO2EY3/bundle.json","state":"https://pith.science/pith/GHNBNG4UEWLWPVU2NRB2RO2EY3/state.json","well_known_bundle":"https://pith.science/.well-known/pith/GHNBNG4UEWLWPVU2NRB2RO2EY3/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:GHNBNG4UEWLWPVU2NRB2RO2EY3","merge_version":"pith-open-graph-merge-v1","event_count":4,"valid_event_count":4,"invalid_event_count":0,"equivocation_count":1,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"eef6e0529d268c02237d963884733f3ac70a5b621928b74e9f4c12eaa73e4cd1","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-14T17:58:03Z","title_canon_sha256":"ac7f02e2b426f77376b0a94085db9f39f21a7eac26efeb50d349b13165daa19a"},"schema_version":"1.0","source":{"id":"2605.15178","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.15178","created_at":"2026-05-17T21:18:32Z"},{"alias_kind":"arxiv_version","alias_value":"2605.15178v1","created_at":"2026-05-17T21:18:32Z"},{"alias_kind":"pith_short_12","alias_value":"GHNBNG4UEWLW","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"GHNBNG4UEWLWPVU2","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"GHNBNG4U","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:03ac14979a0cda6bc8e8681ab25161cb0a495932b11db8df55b8554805efdbf3","target":"graph","created_at":"2026-05-17T21:57:18Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"SANA-WM achieves visual quality comparable to large-scale industrial baselines such as LingBot-World and HY-WorldPlay, while significantly improving efficiency... On our one-minute world-model benchmark, SANA-WM demonstrates stronger action-following accuracy than prior open-source baselines and achieves comparable visual quality at 36× higher throughput for scalable world modeling."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The robust annotation pipeline extracts accurate metric-scale 6-DoF camera poses from public videos to yield high-quality, spatiotemporally consistent action labels that enable effective training of the world model."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"SANA-WM is a 2.6B-parameter efficient world model that synthesizes minute-scale 720p videos with 6-DoF camera control, trained on 213K public clips in 15 days on 64 H100s and runnable on single GPUs at 36x higher throughput than prior open baselines."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"SANA-WM generates minute-scale 720p videos with camera control at 36 times higher throughput than prior open-source models."}],"snapshot_sha256":"55dd0e6b4c1d1b95062655e9933b87c866f4fab94d32b2de282676097a8c805f"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We introduce SANA-WM, an efficient 2.6B-parameter open-source world model natively trained for one-minute generation, synthesizing high-fidelity, 720p, minute-scale videos with precise camera control. SANA-WM achieves visual quality comparable to large-scale industrial baselines such as LingBot-World and HY-WorldPlay, while significantly improving efficiency. Four core designs drive our architecture: (1) Hybrid Linear Attention combines frame-wise Gated DeltaNet (GDN) with softmax attention for memory-efficient long-context modeling. (2) Dual-Branch Camera Control ensures precise 6-DoF traject","authors_text":"Enze Xie, Haoyi Zhu, Haozhe Liu, Jincheng Yu, Junsong Chen, Song Han, Tian Ye, Tong He, Yuyang Zhao","cross_cats":[],"headline":"SANA-WM generates minute-scale 720p videos with camera control at 36 times higher throughput than prior open-source models.","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-14T17:58:03Z","title":"SANA-WM: Efficient Minute-Scale World Modeling with Hybrid Linear Diffusion Transformer"},"references":{"count":102,"internal_anchors":46,"resolved_work":102,"sample":[{"cited_arxiv_id":"1803.10122","doi":"","is_internal_anchor":true,"ref_index":1,"title":"World Models","work_id":"07227eee-8445-4c98-bce4-c6a6fd5ed907","year":2018},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"Genie 3: A new frontier for world models","work_id":"94eb34fb-202e-47c8-bd7e-5e1c88ff88ae","year":2025},{"cited_arxiv_id":"2309.17080","doi":"","is_internal_anchor":true,"ref_index":3,"title":"GAIA-1: A Generative World Model for Autonomous Driving","work_id":"313484e6-a442-4522-8e19-d07e502844a8","year":2023},{"cited_arxiv_id":"2602.06949","doi":"","is_internal_anchor":true,"ref_index":4,"title":"DreamDojo: A Generalist Robot World Model from Large-Scale Human Videos","work_id":"95f2f415-c659-4084-a008-39303bea8638","year":2026},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"Aether: Geometric-aware unified world modeling","work_id":"3aecc6ce-b828-4cf4-8553-7b25b9e2051c","year":2025}],"snapshot_sha256":"c904497025477ff38bfb7bb1e27b222f9c4bf279206dc36796e2646b766a77b4"},"source":{"id":"2605.15178","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-15T13:52:18.797742Z","id":"f6f23a26-71f1-4e50-9451-a62bfc91b10d","model_set":{"reader":"grok-4.3"},"one_line_summary":"SANA-WM is a 2.6B-parameter efficient world model that synthesizes minute-scale 720p videos with 6-DoF camera control, trained on 213K public clips in 15 days on 64 H100s and runnable on single GPUs at 36x higher throughput than prior open baselines.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"SANA-WM generates minute-scale 720p videos with camera control at 36 times higher throughput than prior open-source models.","strongest_claim":"SANA-WM achieves visual quality comparable to large-scale industrial baselines such as LingBot-World and HY-WorldPlay, while significantly improving efficiency... On our one-minute world-model benchmark, SANA-WM demonstrates stronger action-following accuracy than prior open-source baselines and achieves comparable visual quality at 36× higher throughput for scalable world modeling.","weakest_assumption":"The robust annotation pipeline extracts accurate metric-scale 6-DoF camera poses from public videos to yield high-quality, spatiotemporally consistent action labels that enable effective training of the world model."}},"verdict_id":"f6f23a26-71f1-4e50-9451-a62bfc91b10d"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:e514e0a9896a9ff55c8e06090097adbb7602f629fe3eb6a3775a18ef4297af41","target":"record","created_at":"2026-05-17T21:18:32Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"eef6e0529d268c02237d963884733f3ac70a5b621928b74e9f4c12eaa73e4cd1","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-14T17:58:03Z","title_canon_sha256":"ac7f02e2b426f77376b0a94085db9f39f21a7eac26efeb50d349b13165daa19a"},"schema_version":"1.0","source":{"id":"2605.15178","kind":"arxiv","version":1}},"canonical_sha256":"31da169b94259767d69a6c43a8bb44c6d0d47bdb22323b83a8eea4144ede5b37","receipt":{"builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"31da169b94259767d69a6c43a8bb44c6d0d47bdb22323b83a8eea4144ede5b37","first_computed_at":"2026-05-17T21:40:25.203865Z","kind":"pith_receipt","last_reissued_at":"2026-05-17T21:57:18.558439Z","receipt_version":"0.2","signature_status":"unsigned_v0"},"source_id":"2605.15178","source_kind":"arxiv","source_version":1}}},"equivocations":[{"signer_id":"pith.science","event_type":"integrity_finding","target":"integrity","event_ids":["sha256:1ad87288d5eff31cc79aebc69197b1a63cb93bff65b8a300e5203158a64a0c84","sha256:4c134ca032d7128d3403c36739c5cc65b76f94a256686506dfef48af848efea2"]}],"invalid_events":[],"applied_event_ids":["sha256:e514e0a9896a9ff55c8e06090097adbb7602f629fe3eb6a3775a18ef4297af41","sha256:03ac14979a0cda6bc8e8681ab25161cb0a495932b11db8df55b8554805efdbf3"],"state_sha256":"c078e1a27d10c777b1c9a937cd263a9b1a8c670afdde92e591181aaf76303030"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"mB/pjT6Z9t/260hUGlDsMI/LYBqDQqUgsmRvD7vU65A1WIJ0lChqYWY+jorBj8QN/IWRFcxLXVlsoXwc3YkDBw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-21T05:30:47.345645Z","bundle_sha256":"9cf1030d12271365ba79877fd6c7e309d75478b4d8609a636af23a176357c0d1"}}