{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:VPQAMYOBC7TAH3WUL7CG6C7TDV","short_pith_number":"pith:VPQAMYOB","schema_version":"1.0","canonical_sha256":"abe00661c117e603eed45fc46f0bf31d63b701d4f84a1468e5094594c554e71c","source":{"kind":"arxiv","id":"1710.11252","version":2},"attestation_state":"computed","paper":{"title":"Stochastic Variational Video Prediction","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.RO"],"primary_cat":"cs.CV","authors_text":"Chelsea Finn, Dumitru Erhan, Mohammad Babaeizadeh, Roy H. Campbell, Sergey Levine","submitted_at":"2017-10-30T21:48:54Z","abstract_excerpt":"Predicting the future in real-world settings, particularly from raw sensory observations such as images, is exceptionally challenging. Real-world events can be stochastic and unpredictable, and the high dimensionality and complexity of natural images requires the predictive model to build an intricate understanding of the natural world. Many existing methods tackle this problem by making simplifying assumptions about the environment. One common assumption is that the outcome is deterministic and there is only one plausible future. This can lead to low-quality predictions in real-world settings"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1710.11252","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2017-10-30T21:48:54Z","cross_cats_sorted":["cs.RO"],"title_canon_sha256":"3d7d316e86de92992e782ff5c43b9cafa6a20c056592bdce4dab58835e893f48","abstract_canon_sha256":"dc5cc0c638aff809daedef8f4fbd61173ab5a1c70b4d910cf0ab3f75a094e04e"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:21:54.320832Z","signature_b64":"yZHXbn4A51XKERzylZzfYg9VRMItMO+cscNI/1EazENo6QgrPxZ3j6pax7mAkpD/sdI1QYGAZfwM4/GmhndCAQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"abe00661c117e603eed45fc46f0bf31d63b701d4f84a1468e5094594c554e71c","last_reissued_at":"2026-05-18T00:21:54.320261Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:21:54.320261Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Stochastic Variational Video Prediction","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.RO"],"primary_cat":"cs.CV","authors_text":"Chelsea Finn, Dumitru Erhan, Mohammad Babaeizadeh, Roy H. Campbell, Sergey Levine","submitted_at":"2017-10-30T21:48:54Z","abstract_excerpt":"Predicting the future in real-world settings, particularly from raw sensory observations such as images, is exceptionally challenging. Real-world events can be stochastic and unpredictable, and the high dimensionality and complexity of natural images requires the predictive model to build an intricate understanding of the natural world. Many existing methods tackle this problem by making simplifying assumptions about the environment. One common assumption is that the outcome is deterministic and there is only one plausible future. This can lead to low-quality predictions in real-world settings"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1710.11252","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1710.11252","created_at":"2026-05-18T00:21:54.320367+00:00"},{"alias_kind":"arxiv_version","alias_value":"1710.11252v2","created_at":"2026-05-18T00:21:54.320367+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1710.11252","created_at":"2026-05-18T00:21:54.320367+00:00"},{"alias_kind":"pith_short_12","alias_value":"VPQAMYOBC7TA","created_at":"2026-05-18T12:31:49.984773+00:00"},{"alias_kind":"pith_short_16","alias_value":"VPQAMYOBC7TAH3WU","created_at":"2026-05-18T12:31:49.984773+00:00"},{"alias_kind":"pith_short_8","alias_value":"VPQAMYOB","created_at":"2026-05-18T12:31:49.984773+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":11,"internal_anchor_count":6,"sample":[{"citing_arxiv_id":"2605.23856","citing_title":"Point Tracking Improves World Action Models","ref_index":41,"is_internal_anchor":true},{"citing_arxiv_id":"1907.08845","citing_title":"Order Matters: Shuffling Sequence Generation for Video Prediction","ref_index":2,"is_internal_anchor":true},{"citing_arxiv_id":"2304.11193","citing_title":"Multi-Modal World Model for Physical Robot Interactions: Simultaneous Visual and Tactile Predictions for Enhanced Accuracy","ref_index":1,"is_internal_anchor":true},{"citing_arxiv_id":"2410.05882","citing_title":"Frame forecasting in cine MRI using the PCA respiratory motion model: comparing recurrent neural networks trained online and transformers","ref_index":40,"is_internal_anchor":true},{"citing_arxiv_id":"2308.08089","citing_title":"DragNUWA: Fine-grained Control in Video Generation by Integrating Text, Image, and Trajectory","ref_index":71,"is_internal_anchor":true},{"citing_arxiv_id":"2010.02193","citing_title":"Mastering Atari with Discrete World Models","ref_index":1,"is_internal_anchor":true},{"citing_arxiv_id":"2104.10157","citing_title":"VideoGPT: Video Generation using VQ-VAE and Transformers","ref_index":3,"is_internal_anchor":false},{"citing_arxiv_id":"2204.03458","citing_title":"Video Diffusion Models","ref_index":3,"is_internal_anchor":false},{"citing_arxiv_id":"2605.12090","citing_title":"World Action Models: The Next Frontier in Embodied AI","ref_index":283,"is_internal_anchor":false},{"citing_arxiv_id":"2605.01517","citing_title":"VAnim: Rendering-Aware Sparse State Modeling for Structure-Preserving Vector Animation","ref_index":216,"is_internal_anchor":false},{"citing_arxiv_id":"2210.02303","citing_title":"Imagen Video: High Definition Video Generation with Diffusion Models","ref_index":1,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/VPQAMYOBC7TAH3WUL7CG6C7TDV","json":"https://pith.science/pith/VPQAMYOBC7TAH3WUL7CG6C7TDV.json","graph_json":"https://pith.science/api/pith-number/VPQAMYOBC7TAH3WUL7CG6C7TDV/graph.json","events_json":"https://pith.science/api/pith-number/VPQAMYOBC7TAH3WUL7CG6C7TDV/events.json","paper":"https://pith.science/paper/VPQAMYOB"},"agent_actions":{"view_html":"https://pith.science/pith/VPQAMYOBC7TAH3WUL7CG6C7TDV","download_json":"https://pith.science/pith/VPQAMYOBC7TAH3WUL7CG6C7TDV.json","view_paper":"https://pith.science/paper/VPQAMYOB","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1710.11252&json=true","fetch_graph":"https://pith.science/api/pith-number/VPQAMYOBC7TAH3WUL7CG6C7TDV/graph.json","fetch_events":"https://pith.science/api/pith-number/VPQAMYOBC7TAH3WUL7CG6C7TDV/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/VPQAMYOBC7TAH3WUL7CG6C7TDV/action/timestamp_anchor","attest_storage":"https://pith.science/pith/VPQAMYOBC7TAH3WUL7CG6C7TDV/action/storage_attestation","attest_author":"https://pith.science/pith/VPQAMYOBC7TAH3WUL7CG6C7TDV/action/author_attestation","sign_citation":"https://pith.science/pith/VPQAMYOBC7TAH3WUL7CG6C7TDV/action/citation_signature","submit_replication":"https://pith.science/pith/VPQAMYOBC7TAH3WUL7CG6C7TDV/action/replication_record"}},"created_at":"2026-05-18T00:21:54.320367+00:00","updated_at":"2026-05-18T00:21:54.320367+00:00"}