{"work":{"id":"0c843c6a-ce05-4ca4-992b-e25516f490db","openalex_id":null,"doi":null,"arxiv_id":"2401.00025","raw_key":null,"title":"Any-point Trajectory Modeling for Policy Learning","authors":null,"authors_text":"Chuan Wen, Xingyu Lin, John So, Kai Chen, Qi Dou, Yang Gao","year":2023,"venue":"cs.RO","abstract":"Learning from demonstration is a powerful method for teaching robots new skills, and having more demonstration data often improves policy learning. However, the high cost of collecting demonstration data is a significant bottleneck. Videos, as a rich data source, contain knowledge of behaviors, physics, and semantics, but extracting control-specific information from them is challenging due to the lack of action labels. In this work, we introduce a novel framework, Any-point Trajectory Modeling (ATM), that utilizes video demonstrations by pre-training a trajectory model to predict future trajectories of arbitrary points within a video frame. Once trained, these trajectories provide detailed control guidance, enabling the learning of robust visuomotor policies with minimal action-labeled data. Across over 130 language-conditioned tasks we evaluated in both simulation and the real world, ATM outperforms strong video pre-training baselines by 80% on average. Furthermore, we show effective transfer learning of manipulation skills from human videos and videos from a different robot morphology. Visualizations and code are available at: \\url{https://xingyu-lin.github.io/atm}.","external_url":"https://arxiv.org/abs/2401.00025","cited_by_count":null,"metadata_source":"pith","metadata_fetched_at":"2026-05-23T07:02:41.482136+00:00","pith_arxiv_id":"2401.00025","created_at":"2026-05-10T09:48:48.385875+00:00","updated_at":"2026-05-23T07:02:41.482136+00:00","title_quality_ok":true,"display_title":"Any-point Trajectory Modeling for Policy Learning","render_title":"Any-point Trajectory Modeling for Policy Learning"},"hub":{"state":{"work_id":"0c843c6a-ce05-4ca4-992b-e25516f490db","tier":"hub","tier_reason":"10+ Pith inbound or 1,000+ external citations","pith_inbound_count":30,"external_cited_by_count":null,"distinct_field_count":3,"first_pith_cited_at":"2024-09-03T06:45:22+00:00","last_pith_cited_at":"2026-05-21T17:58:26+00:00","author_build_status":"not_needed","summary_status":"needed","contexts_status":"needed","graph_status":"needed","ask_index_status":"not_needed","reader_status":"not_needed","recognition_status":"not_needed","updated_at":"2026-05-24T01:44:42.750409+00:00","tier_text":"hub"},"tier":"hub","role_counts":[{"context_role":"background","n":9}],"polarity_counts":[{"context_polarity":"background","n":9}],"runs":{},"summary":{},"graph":{},"authors":[]}}