{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:L47BPRRKTA4UYTHJVTKSHITPJG","short_pith_number":"pith:L47BPRRK","schema_version":"1.0","canonical_sha256":"5f3e17c62a98394c4ce9acd523a26f4985c1fdf43294e29b3e0f521290df8a64","source":{"kind":"arxiv","id":"2605.15391","version":1},"attestation_state":"computed","paper":{"title":"PanoWorld: Geometry-Consistent Panoramic Video World Modeling","license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","headline":"PanoWorld improves geometric consistency in panoramic videos by enforcing depth and trajectory constraints.","cross_cats":["cs.AI"],"primary_cat":"cs.CV","authors_text":"Bishoy Galoaa, Caleb James Lee, Edmund Yeh, Jennifer Dy, Le Jiang, Sarah Ostadabbas, Shayda Moezzi, Tooba Imtiaz, Xiangyu Bai, Yanzhi Wang","submitted_at":"2026-05-14T20:24:23Z","abstract_excerpt":"We present PanoWorld, a panoramic video world model that generates geometry-consistent 360$\\degree$ video from a single image and a caption. Existing panoramic video methods optimize primarily for visual realism and do not explicitly constrain the underlying 3D scene state, producing outputs that appear plausible yet exhibit inconsistent depth, broken correspondences, and implausible motion across the spherical surface. We address this gap by framing panoramic video generation as a geometry- and dynamics-consistent latent state modeling problem rather than pure visual synthesis. Building on a "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":true,"formal_links_present":true},"canonical_record":{"source":{"id":"2605.15391","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-14T20:24:23Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"dade878fb12c476abe0a05e0820f658609943324088b39a72acfdd57efbae087","abstract_canon_sha256":"31c4172212f18d76e2cbc67b1dc53f5409693b7f9cfeba4305fc4f2f4d568712"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:00:56.201496Z","signature_b64":"t74r3y9oBEcscDR/O22g2CMo7x8Oyv0jJys/N67D7AiapEX2Pd5oq/9fixNf75njAYEL/cDPtdn+GjZ+rMaHAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"5f3e17c62a98394c4ce9acd523a26f4985c1fdf43294e29b3e0f521290df8a64","last_reissued_at":"2026-05-20T00:00:56.200573Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:00:56.200573Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"PanoWorld: Geometry-Consistent Panoramic Video World Modeling","license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","headline":"PanoWorld improves geometric consistency in panoramic videos by enforcing depth and trajectory constraints.","cross_cats":["cs.AI"],"primary_cat":"cs.CV","authors_text":"Bishoy Galoaa, Caleb James Lee, Edmund Yeh, Jennifer Dy, Le Jiang, Sarah Ostadabbas, Shayda Moezzi, Tooba Imtiaz, Xiangyu Bai, Yanzhi Wang","submitted_at":"2026-05-14T20:24:23Z","abstract_excerpt":"We present PanoWorld, a panoramic video world model that generates geometry-consistent 360$\\degree$ video from a single image and a caption. Existing panoramic video methods optimize primarily for visual realism and do not explicitly constrain the underlying 3D scene state, producing outputs that appear plausible yet exhibit inconsistent depth, broken correspondences, and implausible motion across the spherical surface. We address this gap by framing panoramic video generation as a geometry- and dynamics-consistent latent state modeling problem rather than pure visual synthesis. Building on a "},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"By framing panoramic video generation as geometry- and dynamics-consistent latent state modeling and introducing depth consistency loss against pseudo ground-truth panoramic depth plus trajectory consistency loss on 3D world-frame positions, PanoWorld improves geometric consistency over prior panoramic generation methods while maintaining competitive visual realism.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The pseudo ground-truth panoramic depth maps used for the depth consistency loss are accurate enough to enforce genuine 3D consistency without introducing systematic errors or artifacts into the generated video.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"PanoWorld adds depth consistency and trajectory consistency losses plus spherical adaptations to a pre-trained video model, plus a new PanoGeo dataset, to produce geometry-consistent 360 video.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"PanoWorld improves geometric consistency in panoramic videos by enforcing depth and trajectory constraints.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"8940f25daf32af98996f78c9356681127c09750a52436612cce1a7d412cda886"},"source":{"id":"2605.15391","kind":"arxiv","version":1},"verdict":{"id":"7d7b1aeb-68e6-4d6a-95d5-b8ef29d0131b","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-19T15:37:19.171881Z","strongest_claim":"By framing panoramic video generation as geometry- and dynamics-consistent latent state modeling and introducing depth consistency loss against pseudo ground-truth panoramic depth plus trajectory consistency loss on 3D world-frame positions, PanoWorld improves geometric consistency over prior panoramic generation methods while maintaining competitive visual realism.","one_line_summary":"PanoWorld adds depth consistency and trajectory consistency losses plus spherical adaptations to a pre-trained video model, plus a new PanoGeo dataset, to produce geometry-consistent 360 video.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The pseudo ground-truth panoramic depth maps used for the depth consistency loss are accurate enough to enforce genuine 3D consistency without introducing systematic errors or artifacts into the generated video.","pith_extraction_headline":"PanoWorld improves geometric consistency in panoramic videos by enforcing depth and trajectory constraints."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.15391/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"doi_title_agreement","ran_at":"2026-05-19T16:01:18.031393Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"doi_compliance","ran_at":"2026-05-19T15:53:37.073288Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"citation_quote_validity","ran_at":"2026-05-19T15:50:57.026447Z","status":"completed","version":"0.1.0","findings_count":0},{"name":"claim_evidence","ran_at":"2026-05-19T14:21:54.169583Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"ai_meta_artifact","ran_at":"2026-05-19T13:33:22.722658Z","status":"skipped","version":"1.0.0","findings_count":0}],"snapshot_sha256":"2f5fa581d382eac58793da7630fa1edd570e8a67f87337aa2a968d972f4e4aeb"},"references":{"count":30,"sample":[{"doi":"","year":null,"title":"Cosmos World Foundation Model Platform for Physical AI","work_id":"a2dba24c-318d-476a-8b21-4289c265810c","ref_index":1,"cited_arxiv_id":"2501.03575","is_internal_anchor":true},{"doi":"","year":null,"title":"Videophy: Evaluating physical commonsense for video generation","work_id":"27ed795c-abbe-4de1-9a7a-2ecf39c354f3","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2024,"title":"Lumiere: A space-time diffusion model for video generation","work_id":"13a727c4-59d9-4ea4-acbe-3d7b8572737c","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"Revisiting Feature Prediction for Learning Visual Representations from Video","work_id":"f7251dcf-5341-4915-bfe7-27812387b61a","ref_index":4,"cited_arxiv_id":"2404.08471","is_internal_anchor":true},{"doi":"","year":null,"title":"Stable Video Diffusion: Scaling Latent Video Diffusion Models to Large Datasets","work_id":"4f68eada-27e3-437a-a2fe-6e4ca524d0d3","ref_index":5,"cited_arxiv_id":"2311.15127","is_internal_anchor":true}],"resolved_work":30,"snapshot_sha256":"4b6eef94cc700a7abd0348a7d799f2a0cddd8628c96ff1c01b0b342f2b5ab626","internal_anchors":12},"formal_canon":{"evidence_count":2,"snapshot_sha256":"32c4dae6e8cc94929aa675c47b90cd361805d12cef6abe345f96cb8dc43e1b77"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.15391","created_at":"2026-05-20T00:00:56.200725+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.15391v1","created_at":"2026-05-20T00:00:56.200725+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.15391","created_at":"2026-05-20T00:00:56.200725+00:00"},{"alias_kind":"pith_short_12","alias_value":"L47BPRRKTA4U","created_at":"2026-05-20T00:00:56.200725+00:00"},{"alias_kind":"pith_short_16","alias_value":"L47BPRRKTA4UYTHJ","created_at":"2026-05-20T00:00:56.200725+00:00"},{"alias_kind":"pith_short_8","alias_value":"L47BPRRK","created_at":"2026-05-20T00:00:56.200725+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":2,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/L47BPRRKTA4UYTHJVTKSHITPJG","json":"https://pith.science/pith/L47BPRRKTA4UYTHJVTKSHITPJG.json","graph_json":"https://pith.science/api/pith-number/L47BPRRKTA4UYTHJVTKSHITPJG/graph.json","events_json":"https://pith.science/api/pith-number/L47BPRRKTA4UYTHJVTKSHITPJG/events.json","paper":"https://pith.science/paper/L47BPRRK"},"agent_actions":{"view_html":"https://pith.science/pith/L47BPRRKTA4UYTHJVTKSHITPJG","download_json":"https://pith.science/pith/L47BPRRKTA4UYTHJVTKSHITPJG.json","view_paper":"https://pith.science/paper/L47BPRRK","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.15391&json=true","fetch_graph":"https://pith.science/api/pith-number/L47BPRRKTA4UYTHJVTKSHITPJG/graph.json","fetch_events":"https://pith.science/api/pith-number/L47BPRRKTA4UYTHJVTKSHITPJG/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/L47BPRRKTA4UYTHJVTKSHITPJG/action/timestamp_anchor","attest_storage":"https://pith.science/pith/L47BPRRKTA4UYTHJVTKSHITPJG/action/storage_attestation","attest_author":"https://pith.science/pith/L47BPRRKTA4UYTHJVTKSHITPJG/action/author_attestation","sign_citation":"https://pith.science/pith/L47BPRRKTA4UYTHJVTKSHITPJG/action/citation_signature","submit_replication":"https://pith.science/pith/L47BPRRKTA4UYTHJVTKSHITPJG/action/replication_record"}},"created_at":"2026-05-20T00:00:56.200725+00:00","updated_at":"2026-05-20T00:00:56.200725+00:00"}