{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:C73Z2YA6PP5FWNJSAQQFPH3U4R","short_pith_number":"pith:C73Z2YA6","schema_version":"1.0","canonical_sha256":"17f79d601e7bfa5b35320420579f74e473ab5b2a297ec653c7efb1ed2df8b374","source":{"kind":"arxiv","id":"2605.24642","version":1},"attestation_state":"computed","paper":{"title":"Understanding the Impact of Geometric Foundation Models on Vision-Language-Action Models","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.RO"],"primary_cat":"cs.CV","authors_text":"Cheng-Hao Kuo, Luca Carlone, Martin Labrie, Muyuan Lin, Roberto Martin-Martin, Shreekant Gayaka, Yurou Yang","submitted_at":"2026-05-23T16:18:41Z","abstract_excerpt":"Recent work explores new opportunities at the intersection of vision-language-action models (VLAs) and geometric foundation models (GFMs) for 3D reconstruction, such as VGGT. While the resulting geometric VLAs often show improved performance, it remains unclear (i) if modern VLAs already have sufficient geometric understanding to start with, (ii) what is the best architecture to inject geometric understanding into a VLA, and (iii) what is the effect of other design choices that affect geometric VLAs. In this paper we provide a rigorous experimental analysis to shed light on these questions, fo"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.24642","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-05-23T16:18:41Z","cross_cats_sorted":["cs.RO"],"title_canon_sha256":"27cc7cae9ef92ac6c0d353004262bf73fe1c071d9637594e477c281d43636267","abstract_canon_sha256":"74f9ea7cc29026dd68967dd08b9ff3dab3f76bf915e3c6517cc9c7626e6336d4"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-26T01:03:50.794960Z","signature_b64":"5U6Dv1DFnwodUnlFQBdrulCRqkRQ7PS8cR+NyQi7U5xbTboqCTalrAG7ytM2DyLPcSt4c0jAGE+OmM9As7ZsCQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"17f79d601e7bfa5b35320420579f74e473ab5b2a297ec653c7efb1ed2df8b374","last_reissued_at":"2026-05-26T01:03:50.794121Z","signature_status":"signed_v1","first_computed_at":"2026-05-26T01:03:50.794121Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Understanding the Impact of Geometric Foundation Models on Vision-Language-Action Models","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.RO"],"primary_cat":"cs.CV","authors_text":"Cheng-Hao Kuo, Luca Carlone, Martin Labrie, Muyuan Lin, Roberto Martin-Martin, Shreekant Gayaka, Yurou Yang","submitted_at":"2026-05-23T16:18:41Z","abstract_excerpt":"Recent work explores new opportunities at the intersection of vision-language-action models (VLAs) and geometric foundation models (GFMs) for 3D reconstruction, such as VGGT. While the resulting geometric VLAs often show improved performance, it remains unclear (i) if modern VLAs already have sufficient geometric understanding to start with, (ii) what is the best architecture to inject geometric understanding into a VLA, and (iii) what is the effect of other design choices that affect geometric VLAs. In this paper we provide a rigorous experimental analysis to shed light on these questions, fo"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.24642","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.24642/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.24642","created_at":"2026-05-26T01:03:50.794252+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.24642v1","created_at":"2026-05-26T01:03:50.794252+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.24642","created_at":"2026-05-26T01:03:50.794252+00:00"},{"alias_kind":"pith_short_12","alias_value":"C73Z2YA6PP5F","created_at":"2026-05-26T01:03:50.794252+00:00"},{"alias_kind":"pith_short_16","alias_value":"C73Z2YA6PP5FWNJS","created_at":"2026-05-26T01:03:50.794252+00:00"},{"alias_kind":"pith_short_8","alias_value":"C73Z2YA6","created_at":"2026-05-26T01:03:50.794252+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/C73Z2YA6PP5FWNJSAQQFPH3U4R","json":"https://pith.science/pith/C73Z2YA6PP5FWNJSAQQFPH3U4R.json","graph_json":"https://pith.science/api/pith-number/C73Z2YA6PP5FWNJSAQQFPH3U4R/graph.json","events_json":"https://pith.science/api/pith-number/C73Z2YA6PP5FWNJSAQQFPH3U4R/events.json","paper":"https://pith.science/paper/C73Z2YA6"},"agent_actions":{"view_html":"https://pith.science/pith/C73Z2YA6PP5FWNJSAQQFPH3U4R","download_json":"https://pith.science/pith/C73Z2YA6PP5FWNJSAQQFPH3U4R.json","view_paper":"https://pith.science/paper/C73Z2YA6","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.24642&json=true","fetch_graph":"https://pith.science/api/pith-number/C73Z2YA6PP5FWNJSAQQFPH3U4R/graph.json","fetch_events":"https://pith.science/api/pith-number/C73Z2YA6PP5FWNJSAQQFPH3U4R/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/C73Z2YA6PP5FWNJSAQQFPH3U4R/action/timestamp_anchor","attest_storage":"https://pith.science/pith/C73Z2YA6PP5FWNJSAQQFPH3U4R/action/storage_attestation","attest_author":"https://pith.science/pith/C73Z2YA6PP5FWNJSAQQFPH3U4R/action/author_attestation","sign_citation":"https://pith.science/pith/C73Z2YA6PP5FWNJSAQQFPH3U4R/action/citation_signature","submit_replication":"https://pith.science/pith/C73Z2YA6PP5FWNJSAQQFPH3U4R/action/replication_record"}},"created_at":"2026-05-26T01:03:50.794252+00:00","updated_at":"2026-05-26T01:03:50.794252+00:00"}