{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:SIX5YOXPJ4365HYDQHPLXON7WA","short_pith_number":"pith:SIX5YOXP","schema_version":"1.0","canonical_sha256":"922fdc3aef4f37ee9f0381debbb9bfb0233274fe2d1864dfd40c6ffe1f22d683","source":{"kind":"arxiv","id":"2505.17006","version":3},"attestation_state":"computed","paper":{"title":"CoMo: Learning Continuous Latent Motion from Internet Videos for Scalable Robot Learning","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":["cs.RO"],"primary_cat":"cs.CV","authors_text":"Gangshan Wu, Haoyi Zhu, Jiange Yang, Kaijing Ma, Limin Wang, Mingyu Liu, Tong He, Yansong Shi, Yating Wang","submitted_at":"2025-05-22T17:58:27Z","abstract_excerpt":"Unsupervised learning of latent motion from Internet videos is crucial for robot learning. Existing discrete methods generally mitigate the shortcut learning caused by extracting excessive static backgrounds through vector quantization with a small codebook size. However, they suffer from information loss and struggle to capture more complex and fine-grained dynamics. Moreover, there is an inherent gap between the distribution of discrete latent motion and continuous robot action, which hinders the joint learning of a unified policy. We propose CoMo, which aims to learn more precise continuous"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2505.17006","kind":"arxiv","version":3},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2025-05-22T17:58:27Z","cross_cats_sorted":["cs.RO"],"title_canon_sha256":"3fa01807e356a21f018029f8d278db270bd794e498b157ba2c3602f848d3c606","abstract_canon_sha256":"ff7b8f0e74da7fc0c5d5de72ba70e47a5b177cc77f186fcb161f842f30486364"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-19T16:12:45.902438Z","signature_b64":"yiDAw1WF5TVeyHi1hwprbolUjW6m1YByYW7YeKsXKbqT+m4uN38AAAz47RhFh6xPpqFAn8N/HhOoP5xyRwjNDA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"922fdc3aef4f37ee9f0381debbb9bfb0233274fe2d1864dfd40c6ffe1f22d683","last_reissued_at":"2026-06-19T16:12:45.901940Z","signature_status":"signed_v1","first_computed_at":"2026-06-19T16:12:45.901940Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"CoMo: Learning Continuous Latent Motion from Internet Videos for Scalable Robot Learning","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":["cs.RO"],"primary_cat":"cs.CV","authors_text":"Gangshan Wu, Haoyi Zhu, Jiange Yang, Kaijing Ma, Limin Wang, Mingyu Liu, Tong He, Yansong Shi, Yating Wang","submitted_at":"2025-05-22T17:58:27Z","abstract_excerpt":"Unsupervised learning of latent motion from Internet videos is crucial for robot learning. Existing discrete methods generally mitigate the shortcut learning caused by extracting excessive static backgrounds through vector quantization with a small codebook size. However, they suffer from information loss and struggle to capture more complex and fine-grained dynamics. Moreover, there is an inherent gap between the distribution of discrete latent motion and continuous robot action, which hinders the joint learning of a unified policy. We propose CoMo, which aims to learn more precise continuous"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2505.17006","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2505.17006/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2505.17006","created_at":"2026-06-19T16:12:45.901996+00:00"},{"alias_kind":"arxiv_version","alias_value":"2505.17006v3","created_at":"2026-06-19T16:12:45.901996+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2505.17006","created_at":"2026-06-19T16:12:45.901996+00:00"},{"alias_kind":"pith_short_12","alias_value":"SIX5YOXPJ436","created_at":"2026-06-19T16:12:45.901996+00:00"},{"alias_kind":"pith_short_16","alias_value":"SIX5YOXPJ4365HYD","created_at":"2026-06-19T16:12:45.901996+00:00"},{"alias_kind":"pith_short_8","alias_value":"SIX5YOXP","created_at":"2026-06-19T16:12:45.901996+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":10,"internal_anchor_count":10,"sample":[{"citing_arxiv_id":"2601.23087","citing_title":"CoLA-Flow Policy: Temporally Coherent Imitation Learning via Continuous Latent Action Flow Matching for Robotic Manipulation","ref_index":27,"is_internal_anchor":true},{"citing_arxiv_id":"2602.06949","citing_title":"DreamDojo: A Generalist Robot World Model from Large-Scale Human Videos","ref_index":107,"is_internal_anchor":true},{"citing_arxiv_id":"2601.23087","citing_title":"CoLA-Flow Policy: Temporally Coherent Imitation Learning via Continuous Latent Action Flow Matching for Robotic Manipulation","ref_index":27,"is_internal_anchor":true},{"citing_arxiv_id":"2507.23682","citing_title":"villa-X: Enhancing Latent Action Modeling in Vision-Language-Action Models","ref_index":67,"is_internal_anchor":true},{"citing_arxiv_id":"2605.13403","citing_title":"RotVLA: Rotational Latent Action for Vision-Language-Action Model","ref_index":31,"is_internal_anchor":true},{"citing_arxiv_id":"2604.01907","citing_title":"Lifting Unlabeled Internet-level Data for 3D Scene Understanding","ref_index":113,"is_internal_anchor":true},{"citing_arxiv_id":"2605.12167","citing_title":"From Imagined Futures to Executable Actions: Mixture of Latent Actions for Robot Manipulation","ref_index":51,"is_internal_anchor":true},{"citing_arxiv_id":"2512.13030","citing_title":"Motus: A Unified Latent Action World Model","ref_index":50,"is_internal_anchor":true},{"citing_arxiv_id":"2604.22615","citing_title":"GazeVLA: Learning Human Intention for Robotic Manipulation","ref_index":70,"is_internal_anchor":true},{"citing_arxiv_id":"2605.00078","citing_title":"Being-H0.7: A Latent World-Action Model from Egocentric Videos","ref_index":83,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/SIX5YOXPJ4365HYDQHPLXON7WA","json":"https://pith.science/pith/SIX5YOXPJ4365HYDQHPLXON7WA.json","graph_json":"https://pith.science/api/pith-number/SIX5YOXPJ4365HYDQHPLXON7WA/graph.json","events_json":"https://pith.science/api/pith-number/SIX5YOXPJ4365HYDQHPLXON7WA/events.json","paper":"https://pith.science/paper/SIX5YOXP"},"agent_actions":{"view_html":"https://pith.science/pith/SIX5YOXPJ4365HYDQHPLXON7WA","download_json":"https://pith.science/pith/SIX5YOXPJ4365HYDQHPLXON7WA.json","view_paper":"https://pith.science/paper/SIX5YOXP","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2505.17006&json=true","fetch_graph":"https://pith.science/api/pith-number/SIX5YOXPJ4365HYDQHPLXON7WA/graph.json","fetch_events":"https://pith.science/api/pith-number/SIX5YOXPJ4365HYDQHPLXON7WA/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/SIX5YOXPJ4365HYDQHPLXON7WA/action/timestamp_anchor","attest_storage":"https://pith.science/pith/SIX5YOXPJ4365HYDQHPLXON7WA/action/storage_attestation","attest_author":"https://pith.science/pith/SIX5YOXPJ4365HYDQHPLXON7WA/action/author_attestation","sign_citation":"https://pith.science/pith/SIX5YOXPJ4365HYDQHPLXON7WA/action/citation_signature","submit_replication":"https://pith.science/pith/SIX5YOXPJ4365HYDQHPLXON7WA/action/replication_record"}},"created_at":"2026-06-19T16:12:45.901996+00:00","updated_at":"2026-06-19T16:12:45.901996+00:00"}