{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:VRTPAE72LFNVS4OKOBTAZG5TUS","short_pith_number":"pith:VRTPAE72","schema_version":"1.0","canonical_sha256":"ac66f013fa595b5971ca70660c9bb3a49dab515744f6a2d1ed169df00a06d74c","source":{"kind":"arxiv","id":"2606.31388","version":1},"attestation_state":"computed","paper":{"title":"One Video, One World: Turning Monocular Video into Physical 4D Scenes","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Boran Zhang, Congcong Zhu, Hao Zhao, Henghaofan Zhang, Junhao Chen, Mingjin Chen, Ruqi Huang, Saining Zhang, Yufei Wang, Zhihao Li","submitted_at":"2026-06-30T09:16:21Z","abstract_excerpt":"We introduce \\textbf{OVOW}, the first training-free system that reconstructs \\emph{instance-level, simulation-ready} 4D mesh scenes from a single monocular video. Recent 4D reconstruction achieves impressive rendering quality, but its outputs (\\eg, implicit fields, Gaussian primitives, or point clouds) lack the watertight topology, instance separation, and standardized physical interfaces required by physics simulators and embodied AI. OVOW closes this gap with a four-stage pipeline: a vision-language model discovers, labels, and motion-classifies all instances; category-aware reconstruction y"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.31388","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-30T09:16:21Z","cross_cats_sorted":[],"title_canon_sha256":"a42a3ef1f1ce00e61cdae37ad6b24fd8b5c2cce193d6f5a95702ef3e0a876d64","abstract_canon_sha256":"05675537a2619ac2c0448b1917b20e1de3c648fe3136fc6ac969ec69c2525699"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-01T01:18:01.600711Z","signature_b64":"VxG5BqY6bH5pIoRU1qsdOrgN9N2coX2ljwl3cZzXmlFdaz022wj2qFjgE38jXSmRRBogp/AWj8TXao4R1nq8AA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"ac66f013fa595b5971ca70660c9bb3a49dab515744f6a2d1ed169df00a06d74c","last_reissued_at":"2026-07-01T01:18:01.600306Z","signature_status":"signed_v1","first_computed_at":"2026-07-01T01:18:01.600306Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"One Video, One World: Turning Monocular Video into Physical 4D Scenes","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Boran Zhang, Congcong Zhu, Hao Zhao, Henghaofan Zhang, Junhao Chen, Mingjin Chen, Ruqi Huang, Saining Zhang, Yufei Wang, Zhihao Li","submitted_at":"2026-06-30T09:16:21Z","abstract_excerpt":"We introduce \\textbf{OVOW}, the first training-free system that reconstructs \\emph{instance-level, simulation-ready} 4D mesh scenes from a single monocular video. Recent 4D reconstruction achieves impressive rendering quality, but its outputs (\\eg, implicit fields, Gaussian primitives, or point clouds) lack the watertight topology, instance separation, and standardized physical interfaces required by physics simulators and embodied AI. OVOW closes this gap with a four-stage pipeline: a vision-language model discovers, labels, and motion-classifies all instances; category-aware reconstruction y"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.31388","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.31388/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.31388","created_at":"2026-07-01T01:18:01.600366+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.31388v1","created_at":"2026-07-01T01:18:01.600366+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.31388","created_at":"2026-07-01T01:18:01.600366+00:00"},{"alias_kind":"pith_short_12","alias_value":"VRTPAE72LFNV","created_at":"2026-07-01T01:18:01.600366+00:00"},{"alias_kind":"pith_short_16","alias_value":"VRTPAE72LFNVS4OK","created_at":"2026-07-01T01:18:01.600366+00:00"},{"alias_kind":"pith_short_8","alias_value":"VRTPAE72","created_at":"2026-07-01T01:18:01.600366+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/VRTPAE72LFNVS4OKOBTAZG5TUS","json":"https://pith.science/pith/VRTPAE72LFNVS4OKOBTAZG5TUS.json","graph_json":"https://pith.science/api/pith-number/VRTPAE72LFNVS4OKOBTAZG5TUS/graph.json","events_json":"https://pith.science/api/pith-number/VRTPAE72LFNVS4OKOBTAZG5TUS/events.json","paper":"https://pith.science/paper/VRTPAE72"},"agent_actions":{"view_html":"https://pith.science/pith/VRTPAE72LFNVS4OKOBTAZG5TUS","download_json":"https://pith.science/pith/VRTPAE72LFNVS4OKOBTAZG5TUS.json","view_paper":"https://pith.science/paper/VRTPAE72","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.31388&json=true","fetch_graph":"https://pith.science/api/pith-number/VRTPAE72LFNVS4OKOBTAZG5TUS/graph.json","fetch_events":"https://pith.science/api/pith-number/VRTPAE72LFNVS4OKOBTAZG5TUS/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/VRTPAE72LFNVS4OKOBTAZG5TUS/action/timestamp_anchor","attest_storage":"https://pith.science/pith/VRTPAE72LFNVS4OKOBTAZG5TUS/action/storage_attestation","attest_author":"https://pith.science/pith/VRTPAE72LFNVS4OKOBTAZG5TUS/action/author_attestation","sign_citation":"https://pith.science/pith/VRTPAE72LFNVS4OKOBTAZG5TUS/action/citation_signature","submit_replication":"https://pith.science/pith/VRTPAE72LFNVS4OKOBTAZG5TUS/action/replication_record"}},"created_at":"2026-07-01T01:18:01.600366+00:00","updated_at":"2026-07-01T01:18:01.600366+00:00"}