{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:HEPKW5KCFGKVMBQITEM3MRKBOK","short_pith_number":"pith:HEPKW5KC","schema_version":"1.0","canonical_sha256":"391eab754229955606089919b64541729c2d5c00e81ce06f8d7fc643a896e6d9","source":{"kind":"arxiv","id":"2602.18532","version":2},"attestation_state":"computed","paper":{"title":"VLANeXt: Recipes for Building Strong VLA Models","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.RO"],"primary_cat":"cs.CV","authors_text":"Bin Fan, Chen Change Loy, Jian-Jian Jiang, Kang Liao, Runze Yang, Wei-Shi Zheng, Xiao-Ming Wu, Yihang Luo, Zhonghua Wu","submitted_at":"2026-02-20T09:26:17Z","abstract_excerpt":"Following the rise of large foundation models, Vision-Language-Action models (VLAs) emerged, leveraging strong visual and language understanding from Vision-Language Models for general-purpose policy learning. Yet, the current VLA landscape remains fragmented and exploratory. Although many groups have proposed their own VLA models, inconsistencies in training protocols and evaluation settings make it difficult to identify which design choices truly matter. To bring structure to this evolving space, we reexamine the VLA design space under a unified framework and evaluation setup. Starting from "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2602.18532","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-02-20T09:26:17Z","cross_cats_sorted":["cs.AI","cs.RO"],"title_canon_sha256":"d6315d1a6a35ba27a24e62625fb9d98b9922cfa40be6aadfd0dcb9004aa47375","abstract_canon_sha256":"340b990ebc91a59628cfde680a9e1979464e737bfceba136f6d58ea83330e145"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-21T01:05:16.554888Z","signature_b64":"peI2xhC4ZP/cocKY2Asjf3tzaU4jWd36EV01VygZAZVrbqgDu3VPmy9ZUVKG9fxukeOng6cIZLZbiOTXTqdGAQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"391eab754229955606089919b64541729c2d5c00e81ce06f8d7fc643a896e6d9","last_reissued_at":"2026-05-21T01:05:16.554134Z","signature_status":"signed_v1","first_computed_at":"2026-05-21T01:05:16.554134Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"VLANeXt: Recipes for Building Strong VLA Models","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.RO"],"primary_cat":"cs.CV","authors_text":"Bin Fan, Chen Change Loy, Jian-Jian Jiang, Kang Liao, Runze Yang, Wei-Shi Zheng, Xiao-Ming Wu, Yihang Luo, Zhonghua Wu","submitted_at":"2026-02-20T09:26:17Z","abstract_excerpt":"Following the rise of large foundation models, Vision-Language-Action models (VLAs) emerged, leveraging strong visual and language understanding from Vision-Language Models for general-purpose policy learning. Yet, the current VLA landscape remains fragmented and exploratory. Although many groups have proposed their own VLA models, inconsistencies in training protocols and evaluation settings make it difficult to identify which design choices truly matter. To bring structure to this evolving space, we reexamine the VLA design space under a unified framework and evaluation setup. Starting from "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2602.18532","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2602.18532/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2602.18532","created_at":"2026-05-21T01:05:16.554248+00:00"},{"alias_kind":"arxiv_version","alias_value":"2602.18532v2","created_at":"2026-05-21T01:05:16.554248+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2602.18532","created_at":"2026-05-21T01:05:16.554248+00:00"},{"alias_kind":"pith_short_12","alias_value":"HEPKW5KCFGKV","created_at":"2026-05-21T01:05:16.554248+00:00"},{"alias_kind":"pith_short_16","alias_value":"HEPKW5KCFGKVMBQI","created_at":"2026-05-21T01:05:16.554248+00:00"},{"alias_kind":"pith_short_8","alias_value":"HEPKW5KC","created_at":"2026-05-21T01:05:16.554248+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":3,"internal_anchor_count":3,"sample":[{"citing_arxiv_id":"2605.19282","citing_title":"Rethinking Muon Beyond Pretraining: Spectral Failures and High-Pass Remedies for VLA and RLVR","ref_index":36,"is_internal_anchor":true},{"citing_arxiv_id":"2605.06175","citing_title":"VLA-GSE: Boosting Parameter-Efficient Fine-Tuning in VLA with Generalized and Specialized Experts","ref_index":2,"is_internal_anchor":true},{"citing_arxiv_id":"2605.06175","citing_title":"VLA-GSE: Boosting Parameter-Efficient Fine-Tuning in VLA with Generalized and Specialized Experts","ref_index":29,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/HEPKW5KCFGKVMBQITEM3MRKBOK","json":"https://pith.science/pith/HEPKW5KCFGKVMBQITEM3MRKBOK.json","graph_json":"https://pith.science/api/pith-number/HEPKW5KCFGKVMBQITEM3MRKBOK/graph.json","events_json":"https://pith.science/api/pith-number/HEPKW5KCFGKVMBQITEM3MRKBOK/events.json","paper":"https://pith.science/paper/HEPKW5KC"},"agent_actions":{"view_html":"https://pith.science/pith/HEPKW5KCFGKVMBQITEM3MRKBOK","download_json":"https://pith.science/pith/HEPKW5KCFGKVMBQITEM3MRKBOK.json","view_paper":"https://pith.science/paper/HEPKW5KC","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2602.18532&json=true","fetch_graph":"https://pith.science/api/pith-number/HEPKW5KCFGKVMBQITEM3MRKBOK/graph.json","fetch_events":"https://pith.science/api/pith-number/HEPKW5KCFGKVMBQITEM3MRKBOK/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/HEPKW5KCFGKVMBQITEM3MRKBOK/action/timestamp_anchor","attest_storage":"https://pith.science/pith/HEPKW5KCFGKVMBQITEM3MRKBOK/action/storage_attestation","attest_author":"https://pith.science/pith/HEPKW5KCFGKVMBQITEM3MRKBOK/action/author_attestation","sign_citation":"https://pith.science/pith/HEPKW5KCFGKVMBQITEM3MRKBOK/action/citation_signature","submit_replication":"https://pith.science/pith/HEPKW5KCFGKVMBQITEM3MRKBOK/action/replication_record"}},"created_at":"2026-05-21T01:05:16.554248+00:00","updated_at":"2026-05-21T01:05:16.554248+00:00"}