{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:CFUQQNK3LVTZXEZAP56EF4SMLW","short_pith_number":"pith:CFUQQNK3","schema_version":"1.0","canonical_sha256":"116908355b5d679b93207f7c42f24c5d8e5b51ba8d46fbc60d85145a21402f06","source":{"kind":"arxiv","id":"2605.22036","version":1},"attestation_state":"computed","paper":{"title":"GA-VLN: Geometry-Aware BEV Representation for Efficient Vision-Language Navigation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CV","authors_text":"Jiahao Yang, Shuqiang Jiang, Xiangyang Li, Xing Zhu, Yinghao Xu, Yujun Shen, Zihan Wang","submitted_at":"2026-05-21T06:20:17Z","abstract_excerpt":"Despite significant progress in Vision-Language Navigation (VLN), existing approaches still rely on dense RGB videos that produce excessive patch tokens and lack explicit spatial structure, resulting in substantial computational overhead and limited spatial reasoning. To address these issues, we introduce the Geometry-Aware BEV (GA-BEV) - a compact, 3D-grounded feature representation that integrates both explicit and implicit geometric cues into multimodal large language model (MLLM) - based navigation systems. We construct BEV spatial maps from RGB-D inputs by projecting visual features into "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.22036","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-05-21T06:20:17Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"b17f78890ad4d8dc5f8b8010fa99680027155fa889a38c9fc6e77535078925b1","abstract_canon_sha256":"75790f576f8f4a34282ae4dde3f2d5c24d0fb206d63694819bcef488117e2a01"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-22T01:04:21.705304Z","signature_b64":"BfmILuGkhJOxULs0QoyBS2b3+U/2DlJYdl2EUme6FjuT1fmeciMKWt3JPeOJtNLFA10a1CK65fZZ6P3t2B6UDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"116908355b5d679b93207f7c42f24c5d8e5b51ba8d46fbc60d85145a21402f06","last_reissued_at":"2026-05-22T01:04:21.704505Z","signature_status":"signed_v1","first_computed_at":"2026-05-22T01:04:21.704505Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"GA-VLN: Geometry-Aware BEV Representation for Efficient Vision-Language Navigation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CV","authors_text":"Jiahao Yang, Shuqiang Jiang, Xiangyang Li, Xing Zhu, Yinghao Xu, Yujun Shen, Zihan Wang","submitted_at":"2026-05-21T06:20:17Z","abstract_excerpt":"Despite significant progress in Vision-Language Navigation (VLN), existing approaches still rely on dense RGB videos that produce excessive patch tokens and lack explicit spatial structure, resulting in substantial computational overhead and limited spatial reasoning. To address these issues, we introduce the Geometry-Aware BEV (GA-BEV) - a compact, 3D-grounded feature representation that integrates both explicit and implicit geometric cues into multimodal large language model (MLLM) - based navigation systems. We construct BEV spatial maps from RGB-D inputs by projecting visual features into "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.22036","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.22036/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.22036","created_at":"2026-05-22T01:04:21.704634+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.22036v1","created_at":"2026-05-22T01:04:21.704634+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.22036","created_at":"2026-05-22T01:04:21.704634+00:00"},{"alias_kind":"pith_short_12","alias_value":"CFUQQNK3LVTZ","created_at":"2026-05-22T01:04:21.704634+00:00"},{"alias_kind":"pith_short_16","alias_value":"CFUQQNK3LVTZXEZA","created_at":"2026-05-22T01:04:21.704634+00:00"},{"alias_kind":"pith_short_8","alias_value":"CFUQQNK3","created_at":"2026-05-22T01:04:21.704634+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/CFUQQNK3LVTZXEZAP56EF4SMLW","json":"https://pith.science/pith/CFUQQNK3LVTZXEZAP56EF4SMLW.json","graph_json":"https://pith.science/api/pith-number/CFUQQNK3LVTZXEZAP56EF4SMLW/graph.json","events_json":"https://pith.science/api/pith-number/CFUQQNK3LVTZXEZAP56EF4SMLW/events.json","paper":"https://pith.science/paper/CFUQQNK3"},"agent_actions":{"view_html":"https://pith.science/pith/CFUQQNK3LVTZXEZAP56EF4SMLW","download_json":"https://pith.science/pith/CFUQQNK3LVTZXEZAP56EF4SMLW.json","view_paper":"https://pith.science/paper/CFUQQNK3","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.22036&json=true","fetch_graph":"https://pith.science/api/pith-number/CFUQQNK3LVTZXEZAP56EF4SMLW/graph.json","fetch_events":"https://pith.science/api/pith-number/CFUQQNK3LVTZXEZAP56EF4SMLW/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/CFUQQNK3LVTZXEZAP56EF4SMLW/action/timestamp_anchor","attest_storage":"https://pith.science/pith/CFUQQNK3LVTZXEZAP56EF4SMLW/action/storage_attestation","attest_author":"https://pith.science/pith/CFUQQNK3LVTZXEZAP56EF4SMLW/action/author_attestation","sign_citation":"https://pith.science/pith/CFUQQNK3LVTZXEZAP56EF4SMLW/action/citation_signature","submit_replication":"https://pith.science/pith/CFUQQNK3LVTZXEZAP56EF4SMLW/action/replication_record"}},"created_at":"2026-05-22T01:04:21.704634+00:00","updated_at":"2026-05-22T01:04:21.704634+00:00"}