{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:ELVWJ5EH4AJ2ZHGPJFJM7YSTWK","short_pith_number":"pith:ELVWJ5EH","schema_version":"1.0","canonical_sha256":"22eb64f487e013ac9ccf4952cfe253b29e13a09def041035930ce6bfd415580d","source":{"kind":"arxiv","id":"1806.09655","version":2},"attestation_state":"computed","paper":{"title":"Learning what you can do before doing anything","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CV","stat.ML"],"primary_cat":"cs.LG","authors_text":"Andrew Jaegle, Karl Pertsch, Konstantinos G. Derpanis, Kostas Daniilidis, Oleh Rybkin","submitted_at":"2018-06-25T18:33:34Z","abstract_excerpt":"Intelligent agents can learn to represent the action spaces of other agents simply by observing them act. Such representations help agents quickly learn to predict the effects of their own actions on the environment and to plan complex action sequences. In this work, we address the problem of learning an agent's action space purely from visual observation. We use stochastic video prediction to learn a latent variable that captures the scene's dynamics while being minimally sensitive to the scene's static content. We introduce a loss term that encourages the network to capture the composability"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1806.09655","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-06-25T18:33:34Z","cross_cats_sorted":["cs.CV","stat.ML"],"title_canon_sha256":"9873273a8d4347b1fb59d13c06defef919a615daa0f5baf299a06b6da59926af","abstract_canon_sha256":"018d45602294815da310093b829f7fdc422583f2e3b47b549e8d410290017f2f"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:54:16.394690Z","signature_b64":"e4SXuiqmpO8mysK9UoZNi2l4h7sC9rEk+vxdI/yGBIyzdfIpL3PSpDPkrInEFpwAcCYw74VH7mOTZKS+qtt9DA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"22eb64f487e013ac9ccf4952cfe253b29e13a09def041035930ce6bfd415580d","last_reissued_at":"2026-05-17T23:54:16.394209Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:54:16.394209Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Learning what you can do before doing anything","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CV","stat.ML"],"primary_cat":"cs.LG","authors_text":"Andrew Jaegle, Karl Pertsch, Konstantinos G. Derpanis, Kostas Daniilidis, Oleh Rybkin","submitted_at":"2018-06-25T18:33:34Z","abstract_excerpt":"Intelligent agents can learn to represent the action spaces of other agents simply by observing them act. Such representations help agents quickly learn to predict the effects of their own actions on the environment and to plan complex action sequences. In this work, we address the problem of learning an agent's action space purely from visual observation. We use stochastic video prediction to learn a latent variable that captures the scene's dynamics while being minimally sensitive to the scene's static content. We introduce a loss term that encourages the network to capture the composability"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1806.09655","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1806.09655","created_at":"2026-05-17T23:54:16.394287+00:00"},{"alias_kind":"arxiv_version","alias_value":"1806.09655v2","created_at":"2026-05-17T23:54:16.394287+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1806.09655","created_at":"2026-05-17T23:54:16.394287+00:00"},{"alias_kind":"pith_short_12","alias_value":"ELVWJ5EH4AJ2","created_at":"2026-05-18T12:32:22.470017+00:00"},{"alias_kind":"pith_short_16","alias_value":"ELVWJ5EH4AJ2ZHGP","created_at":"2026-05-18T12:32:22.470017+00:00"},{"alias_kind":"pith_short_8","alias_value":"ELVWJ5EH","created_at":"2026-05-18T12:32:22.470017+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":2,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"2309.16797","citing_title":"Promptbreeder: Self-Referential Self-Improvement Via Prompt Evolution","ref_index":279,"is_internal_anchor":true},{"citing_arxiv_id":"2604.04974","citing_title":"From Video to Control: A Survey of Learning Manipulation Interfaces from Temporal Visual Data","ref_index":78,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/ELVWJ5EH4AJ2ZHGPJFJM7YSTWK","json":"https://pith.science/pith/ELVWJ5EH4AJ2ZHGPJFJM7YSTWK.json","graph_json":"https://pith.science/api/pith-number/ELVWJ5EH4AJ2ZHGPJFJM7YSTWK/graph.json","events_json":"https://pith.science/api/pith-number/ELVWJ5EH4AJ2ZHGPJFJM7YSTWK/events.json","paper":"https://pith.science/paper/ELVWJ5EH"},"agent_actions":{"view_html":"https://pith.science/pith/ELVWJ5EH4AJ2ZHGPJFJM7YSTWK","download_json":"https://pith.science/pith/ELVWJ5EH4AJ2ZHGPJFJM7YSTWK.json","view_paper":"https://pith.science/paper/ELVWJ5EH","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1806.09655&json=true","fetch_graph":"https://pith.science/api/pith-number/ELVWJ5EH4AJ2ZHGPJFJM7YSTWK/graph.json","fetch_events":"https://pith.science/api/pith-number/ELVWJ5EH4AJ2ZHGPJFJM7YSTWK/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/ELVWJ5EH4AJ2ZHGPJFJM7YSTWK/action/timestamp_anchor","attest_storage":"https://pith.science/pith/ELVWJ5EH4AJ2ZHGPJFJM7YSTWK/action/storage_attestation","attest_author":"https://pith.science/pith/ELVWJ5EH4AJ2ZHGPJFJM7YSTWK/action/author_attestation","sign_citation":"https://pith.science/pith/ELVWJ5EH4AJ2ZHGPJFJM7YSTWK/action/citation_signature","submit_replication":"https://pith.science/pith/ELVWJ5EH4AJ2ZHGPJFJM7YSTWK/action/replication_record"}},"created_at":"2026-05-17T23:54:16.394287+00:00","updated_at":"2026-05-17T23:54:16.394287+00:00"}