{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2015:MZD3A5FRLSZKM6ZJII7T6GQ7GA","short_pith_number":"pith:MZD3A5FR","schema_version":"1.0","canonical_sha256":"6647b074b15cb2a67b29423f3f1a1f303e2312c02a5db05c64a75b51328b1b4c","source":{"kind":"arxiv","id":"1507.08750","version":2},"attestation_state":"computed","paper":{"title":"Action-Conditional Video Prediction using Deep Networks in Atari Games","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.CV"],"primary_cat":"cs.LG","authors_text":"Honglak Lee, Junhyuk Oh, Richard Lewis, Satinder Singh, Xiaoxiao Guo","submitted_at":"2015-07-31T04:43:30Z","abstract_excerpt":"Motivated by vision-based reinforcement learning (RL) problems, in particular Atari games from the recent benchmark Aracade Learning Environment (ALE), we consider spatio-temporal prediction problems where future (image-)frames are dependent on control variables or actions as well as previous frames. While not composed of natural scenes, frames in Atari games are high-dimensional in size, can involve tens of objects with one or more objects being controlled by the actions directly and many other objects being influenced indirectly, can involve entry and departure of objects, and can involve de"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1507.08750","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2015-07-31T04:43:30Z","cross_cats_sorted":["cs.AI","cs.CV"],"title_canon_sha256":"8328896a2d7bdc7331486b8b9c8adfca5b06d7a00d8204008807501fd2d54a7a","abstract_canon_sha256":"650d8b12842371f8c1081291da80f5d83bc330e95f85b635de127a7e24a4ce0d"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:23:55.927732Z","signature_b64":"JpBX1oVhjzN+7oKamr87NVbg4t95tGg6QXzPGVUy1/jLanEEh2SaQQxd2wHiGo9mwFOG7yjSMfIKE/K8SkndAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"6647b074b15cb2a67b29423f3f1a1f303e2312c02a5db05c64a75b51328b1b4c","last_reissued_at":"2026-05-18T01:23:55.927111Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:23:55.927111Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Action-Conditional Video Prediction using Deep Networks in Atari Games","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.CV"],"primary_cat":"cs.LG","authors_text":"Honglak Lee, Junhyuk Oh, Richard Lewis, Satinder Singh, Xiaoxiao Guo","submitted_at":"2015-07-31T04:43:30Z","abstract_excerpt":"Motivated by vision-based reinforcement learning (RL) problems, in particular Atari games from the recent benchmark Aracade Learning Environment (ALE), we consider spatio-temporal prediction problems where future (image-)frames are dependent on control variables or actions as well as previous frames. While not composed of natural scenes, frames in Atari games are high-dimensional in size, can involve tens of objects with one or more objects being controlled by the actions directly and many other objects being influenced indirectly, can involve entry and departure of objects, and can involve de"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1507.08750","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1507.08750","created_at":"2026-05-18T01:23:55.927221+00:00"},{"alias_kind":"arxiv_version","alias_value":"1507.08750v2","created_at":"2026-05-18T01:23:55.927221+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1507.08750","created_at":"2026-05-18T01:23:55.927221+00:00"},{"alias_kind":"pith_short_12","alias_value":"MZD3A5FRLSZK","created_at":"2026-05-18T12:29:32.376354+00:00"},{"alias_kind":"pith_short_16","alias_value":"MZD3A5FRLSZKM6ZJ","created_at":"2026-05-18T12:29:32.376354+00:00"},{"alias_kind":"pith_short_8","alias_value":"MZD3A5FR","created_at":"2026-05-18T12:29:32.376354+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":3,"internal_anchor_count":2,"sample":[{"citing_arxiv_id":"1907.06143","citing_title":"Neural Embedding for Physical Manipulations","ref_index":5,"is_internal_anchor":true},{"citing_arxiv_id":"2512.15692","citing_title":"mimic-video: Video-Action Models for Generalizable Robot Control Beyond VLAs","ref_index":42,"is_internal_anchor":true},{"citing_arxiv_id":"2605.12090","citing_title":"World Action Models: The Next Frontier in Embodied AI","ref_index":280,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/MZD3A5FRLSZKM6ZJII7T6GQ7GA","json":"https://pith.science/pith/MZD3A5FRLSZKM6ZJII7T6GQ7GA.json","graph_json":"https://pith.science/api/pith-number/MZD3A5FRLSZKM6ZJII7T6GQ7GA/graph.json","events_json":"https://pith.science/api/pith-number/MZD3A5FRLSZKM6ZJII7T6GQ7GA/events.json","paper":"https://pith.science/paper/MZD3A5FR"},"agent_actions":{"view_html":"https://pith.science/pith/MZD3A5FRLSZKM6ZJII7T6GQ7GA","download_json":"https://pith.science/pith/MZD3A5FRLSZKM6ZJII7T6GQ7GA.json","view_paper":"https://pith.science/paper/MZD3A5FR","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1507.08750&json=true","fetch_graph":"https://pith.science/api/pith-number/MZD3A5FRLSZKM6ZJII7T6GQ7GA/graph.json","fetch_events":"https://pith.science/api/pith-number/MZD3A5FRLSZKM6ZJII7T6GQ7GA/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/MZD3A5FRLSZKM6ZJII7T6GQ7GA/action/timestamp_anchor","attest_storage":"https://pith.science/pith/MZD3A5FRLSZKM6ZJII7T6GQ7GA/action/storage_attestation","attest_author":"https://pith.science/pith/MZD3A5FRLSZKM6ZJII7T6GQ7GA/action/author_attestation","sign_citation":"https://pith.science/pith/MZD3A5FRLSZKM6ZJII7T6GQ7GA/action/citation_signature","submit_replication":"https://pith.science/pith/MZD3A5FRLSZKM6ZJII7T6GQ7GA/action/replication_record"}},"created_at":"2026-05-18T01:23:55.927221+00:00","updated_at":"2026-05-18T01:23:55.927221+00:00"}