{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2016:WGXXCQVE7NTFNS5JR3WHVZ5IFY","short_pith_number":"pith:WGXXCQVE","schema_version":"1.0","canonical_sha256":"b1af7142a4fb6656cba98eec7ae7a82e1fb4c90b90bb98b47402a2ece13db640","source":{"kind":"arxiv","id":"1605.07157","version":4},"attestation_state":"computed","paper":{"title":"Unsupervised Learning for Physical Interaction through Video Prediction","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.CV","cs.RO"],"primary_cat":"cs.LG","authors_text":"Chelsea Finn, Ian Goodfellow, Sergey Levine","submitted_at":"2016-05-23T19:45:55Z","abstract_excerpt":"A core challenge for an agent learning to interact with the world is to predict how its actions affect objects in its environment. Many existing methods for learning the dynamics of physical interactions require labeled object information. However, to scale real-world interaction learning to a variety of scenes and objects, acquiring labeled data becomes increasingly impractical. To learn about physical object motion without labels, we develop an action-conditioned video prediction model that explicitly models pixel motion, by predicting a distribution over pixel motion from previous frames. B"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1605.07157","kind":"arxiv","version":4},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-05-23T19:45:55Z","cross_cats_sorted":["cs.AI","cs.CV","cs.RO"],"title_canon_sha256":"d06f9a5db47fc974348ebaba4306cd139f57aec08e8cafd930b88c925d7876d6","abstract_canon_sha256":"066547be1fef29b05e1d512ba02a388da349991c7d5c26712e7acaf73c901d71"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:02:04.223990Z","signature_b64":"YjnXicmo65JQCotZaRsKuUDuU4qDg4jXQUUi3yOok6PcNqoGP1LFfDzkG6MAtukPuJc7GvKP/KuORQclCyt7BQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"b1af7142a4fb6656cba98eec7ae7a82e1fb4c90b90bb98b47402a2ece13db640","last_reissued_at":"2026-05-18T01:02:04.223371Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:02:04.223371Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Unsupervised Learning for Physical Interaction through Video Prediction","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.CV","cs.RO"],"primary_cat":"cs.LG","authors_text":"Chelsea Finn, Ian Goodfellow, Sergey Levine","submitted_at":"2016-05-23T19:45:55Z","abstract_excerpt":"A core challenge for an agent learning to interact with the world is to predict how its actions affect objects in its environment. Many existing methods for learning the dynamics of physical interactions require labeled object information. However, to scale real-world interaction learning to a variety of scenes and objects, acquiring labeled data becomes increasingly impractical. To learn about physical object motion without labels, we develop an action-conditioned video prediction model that explicitly models pixel motion, by predicting a distribution over pixel motion from previous frames. B"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1605.07157","kind":"arxiv","version":4},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1605.07157","created_at":"2026-05-18T01:02:04.223453+00:00"},{"alias_kind":"arxiv_version","alias_value":"1605.07157v4","created_at":"2026-05-18T01:02:04.223453+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1605.07157","created_at":"2026-05-18T01:02:04.223453+00:00"},{"alias_kind":"pith_short_12","alias_value":"WGXXCQVE7NTF","created_at":"2026-05-18T12:30:48.956258+00:00"},{"alias_kind":"pith_short_16","alias_value":"WGXXCQVE7NTFNS5J","created_at":"2026-05-18T12:30:48.956258+00:00"},{"alias_kind":"pith_short_8","alias_value":"WGXXCQVE","created_at":"2026-05-18T12:30:48.956258+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":4,"internal_anchor_count":3,"sample":[{"citing_arxiv_id":"1907.06143","citing_title":"Neural Embedding for Physical Manipulations","ref_index":6,"is_internal_anchor":true},{"citing_arxiv_id":"2304.11193","citing_title":"Multi-Modal World Model for Physical Robot Interactions: Simultaneous Visual and Tactile Predictions for Enhanced Accuracy","ref_index":9,"is_internal_anchor":true},{"citing_arxiv_id":"2410.05882","citing_title":"Frame forecasting in cine MRI using the PCA respiratory motion model: comparing recurrent neural networks trained online and transformers","ref_index":37,"is_internal_anchor":true},{"citing_arxiv_id":"2605.12090","citing_title":"World Action Models: The Next Frontier in Embodied AI","ref_index":281,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/WGXXCQVE7NTFNS5JR3WHVZ5IFY","json":"https://pith.science/pith/WGXXCQVE7NTFNS5JR3WHVZ5IFY.json","graph_json":"https://pith.science/api/pith-number/WGXXCQVE7NTFNS5JR3WHVZ5IFY/graph.json","events_json":"https://pith.science/api/pith-number/WGXXCQVE7NTFNS5JR3WHVZ5IFY/events.json","paper":"https://pith.science/paper/WGXXCQVE"},"agent_actions":{"view_html":"https://pith.science/pith/WGXXCQVE7NTFNS5JR3WHVZ5IFY","download_json":"https://pith.science/pith/WGXXCQVE7NTFNS5JR3WHVZ5IFY.json","view_paper":"https://pith.science/paper/WGXXCQVE","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1605.07157&json=true","fetch_graph":"https://pith.science/api/pith-number/WGXXCQVE7NTFNS5JR3WHVZ5IFY/graph.json","fetch_events":"https://pith.science/api/pith-number/WGXXCQVE7NTFNS5JR3WHVZ5IFY/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/WGXXCQVE7NTFNS5JR3WHVZ5IFY/action/timestamp_anchor","attest_storage":"https://pith.science/pith/WGXXCQVE7NTFNS5JR3WHVZ5IFY/action/storage_attestation","attest_author":"https://pith.science/pith/WGXXCQVE7NTFNS5JR3WHVZ5IFY/action/author_attestation","sign_citation":"https://pith.science/pith/WGXXCQVE7NTFNS5JR3WHVZ5IFY/action/citation_signature","submit_replication":"https://pith.science/pith/WGXXCQVE7NTFNS5JR3WHVZ5IFY/action/replication_record"}},"created_at":"2026-05-18T01:02:04.223453+00:00","updated_at":"2026-05-18T01:02:04.223453+00:00"}