{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:7UNFV772FNRC5AX63ECEBJJNLW","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"b0058c9cb08bfe16717aa0f2996ad0907bf79985020d3514a734f2490e937e93","cross_cats_sorted":["cs.CV"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2026-05-13T11:58:02Z","title_canon_sha256":"156756c0e83713819ca534a0933f03d1d22d311eef5d9fb16e469ff32a419c15"},"schema_version":"1.0","source":{"id":"2605.13403","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.13403","created_at":"2026-05-18T02:44:47Z"},{"alias_kind":"arxiv_version","alias_value":"2605.13403v1","created_at":"2026-05-18T02:44:47Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.13403","created_at":"2026-05-18T02:44:47Z"},{"alias_kind":"pith_short_12","alias_value":"7UNFV772FNRC","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"7UNFV772FNRC5AX6","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"7UNFV772","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:eaedebcd7583e6c73b43cccb99834cb353527beb69db40cd0f8d40474fd8b7aa","target":"graph","created_at":"2026-05-18T02:44:47Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"With only 1.7B parameters and 1700+ hours of pretraining data, RotVLA achieves 98.2% on LIBERO and 89.6% / 88.5% on RoboTwin2.0 under clean and randomized settings, respectively. It also demonstrates strong real-world performance on manipulation tasks, consistently outperforming existing VLA models."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That representing latent actions as elements of SO(n) together with a triplet-frame objective inherently supplies continuity, compositionality, and physically meaningful structure while preventing trivial frame-reconstruction solutions."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"RotVLA models latent actions as continuous SO(n) rotations with triplet-frame supervision and flow-matching to reach 98.2% success on LIBERO and 89.6%/88.5% on RoboTwin2.0 using a 1.7B-parameter model."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"RotVLA replaces discrete action codes with continuous rotations in SO(n) for vision-language-action models."}],"snapshot_sha256":"a9c8e1113c85f322833bd3fa93239dafd3ee05470983c42e33923972ba7d27b0"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"61af6b48e18519772de59e4d3a20be8a02b289c706a90aad0c87e7dc08b7a177"},"paper":{"abstract_excerpt":"Latent Action Models (LAMs) have emerged as an effective paradigm for handling heterogeneous datasets during Vision-Language-Action (VLA) model pretraining, offering a unified action space across embodiments. However, existing LAMs often rely on discrete quantization encode and decode pipelines, which can lead to trivial frame reconstruction behavior, limited representational capacity, and a lack of physically meaningful structure. We introduce RotVLA, a VLA framework built on a continuous rotational latent action representation. Latent actions are modeled as elements of SO(n), providing conti","authors_text":"Hangjun Ye, Jiahuan Zhou, Peiyan Li, Qiwei Li, Quanyun Zhou, Xicheng Gong, Xinghang Li, Yadong Mu","cross_cats":["cs.CV"],"headline":"RotVLA replaces discrete action codes with continuous rotations in SO(n) for vision-language-action models.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2026-05-13T11:58:02Z","title":"RotVLA: Rotational Latent Action for Vision-Language-Action Model"},"references":{"count":86,"internal_anchors":28,"resolved_work":86,"sample":[{"cited_arxiv_id":"2405.14093","doi":"","is_internal_anchor":true,"ref_index":1,"title":"A Survey on Vision-Language-Action Models for Embodied AI","work_id":"9492fb3d-d667-4892-81bb-b2878f12ff0c","year":2024},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"Roumelio- tis, and Manoj Karkee","work_id":"5f0bf2cc-1901-4ad0-940b-1e742cc6d7e7","year":2025},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"Visual instruction tuning.Advances in neural information processing systems, 36:34892–34916","work_id":"115823a2-8918-4227-8872-3d0a36ff07a9","year":2023},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"Internvl: Scaling up vision foundation models and aligning for generic visual-linguistic tasks","work_id":"321b2bd4-950a-44f0-ab50-e70251e75187","year":2024},{"cited_arxiv_id":"2502.13923","doi":"","is_internal_anchor":true,"ref_index":5,"title":"Qwen2.5-VL Technical Report","work_id":"69dffacb-bfe8-442d-be86-48624c60426f","year":2025}],"snapshot_sha256":"114c3a6b5eda750fac0cfaa8fbe9b84492ddf74d37e389105a4d0f5a207e0207"},"source":{"id":"2605.13403","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-14T17:44:25.288826Z","id":"7f51c42f-7b3a-4437-a206-a8768d87e6b6","model_set":{"reader":"grok-4.3"},"one_line_summary":"RotVLA models latent actions as continuous SO(n) rotations with triplet-frame supervision and flow-matching to reach 98.2% success on LIBERO and 89.6%/88.5% on RoboTwin2.0 using a 1.7B-parameter model.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"RotVLA replaces discrete action codes with continuous rotations in SO(n) for vision-language-action models.","strongest_claim":"With only 1.7B parameters and 1700+ hours of pretraining data, RotVLA achieves 98.2% on LIBERO and 89.6% / 88.5% on RoboTwin2.0 under clean and randomized settings, respectively. It also demonstrates strong real-world performance on manipulation tasks, consistently outperforming existing VLA models.","weakest_assumption":"That representing latent actions as elements of SO(n) together with a triplet-frame objective inherently supplies continuity, compositionality, and physically meaningful structure while preventing trivial frame-reconstruction solutions."}},"verdict_id":"7f51c42f-7b3a-4437-a206-a8768d87e6b6"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:304a5c61aa87659164c19199409ac71aeb91b3ccebb2867ee4d2318e04bd36ea","target":"record","created_at":"2026-05-18T02:44:47Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"b0058c9cb08bfe16717aa0f2996ad0907bf79985020d3514a734f2490e937e93","cross_cats_sorted":["cs.CV"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2026-05-13T11:58:02Z","title_canon_sha256":"156756c0e83713819ca534a0933f03d1d22d311eef5d9fb16e469ff32a419c15"},"schema_version":"1.0","source":{"id":"2605.13403","kind":"arxiv","version":1}},"canonical_sha256":"fd1a5afffa2b622e82fed90440a52d5da9bbc0d853c481f1fed1519437d83071","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"fd1a5afffa2b622e82fed90440a52d5da9bbc0d853c481f1fed1519437d83071","first_computed_at":"2026-05-18T02:44:47.562788Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T02:44:47.562788Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"+YB4vFlj8LSr6rvLu7tU2lGA7JinpKWjW+9m0N1uhq7/BqjFPq1HYIFE7X0mszsTQMZ8qHXrvkVYO4ZCCS3kDQ==","signature_status":"signed_v1","signed_at":"2026-05-18T02:44:47.563245Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.13403","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:304a5c61aa87659164c19199409ac71aeb91b3ccebb2867ee4d2318e04bd36ea","sha256:eaedebcd7583e6c73b43cccb99834cb353527beb69db40cd0f8d40474fd8b7aa"],"state_sha256":"0e793c9e5ee1dab85556591aa1cf54acaf49fa77b1887e1da09c5251d42fec1b"}