{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:VGLOYHWFMPJ2DP6A75VL3BMZD7","short_pith_number":"pith:VGLOYHWF","schema_version":"1.0","canonical_sha256":"a996ec1ec563d3a1bfc0ff6abd85991fd4ec63f8c5a055509822a2c9385f0270","source":{"kind":"arxiv","id":"2606.00095","version":1},"attestation_state":"computed","paper":{"title":"Bridging the 2D-3D Gap: A Hierarchical Semantic-Geometric Map for Vision Language Navigation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.CL","cs.RO"],"primary_cat":"cs.CV","authors_text":"Jingyu Gong, Kailing Li, Liang He, Lijin Yang, Tianwen Qian, Xiaoling Wang, Yuqian Fu","submitted_at":"2026-05-25T08:53:21Z","abstract_excerpt":"Vision-Language Navigation (VLN) enables embodied agents to reach target locations in unseen environments by following language instructions. Despite recent progress with vision-language models (VLMs), a critical semantic-geometric gap remains: while VLMs excel at language and 2D visual understanding, they struggle with 3D spatial reasoning and fail to capture the causal dynamics between actions and spatial transitions, resulting in unreliable navigation, particularly in zero-shot settings. To bridge this gap, we propose a Hierarchical Semantic-Geometric Map (HSGM) that transforms 3D geometric"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.00095","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-05-25T08:53:21Z","cross_cats_sorted":["cs.AI","cs.CL","cs.RO"],"title_canon_sha256":"45128861a283add2ecb0b5c92313ef90ce2897837bd5dc7c56663a66c390da7c","abstract_canon_sha256":"6740a7b136b8f9d6bcac9ea637a8e740e3c89b53f738fe2abfffdcc31ffb747b"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-02T00:03:14.853084Z","signature_b64":"gMZ+t3a7T9ip6SS7GsyytKDnIhPxoZKHK7T+vmWKMRCT5cgG9IhfdPLWh7QFmArQmHrHCsqGFbGAk/aTSTh8CA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"a996ec1ec563d3a1bfc0ff6abd85991fd4ec63f8c5a055509822a2c9385f0270","last_reissued_at":"2026-06-02T00:03:14.852603Z","signature_status":"signed_v1","first_computed_at":"2026-06-02T00:03:14.852603Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Bridging the 2D-3D Gap: A Hierarchical Semantic-Geometric Map for Vision Language Navigation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.CL","cs.RO"],"primary_cat":"cs.CV","authors_text":"Jingyu Gong, Kailing Li, Liang He, Lijin Yang, Tianwen Qian, Xiaoling Wang, Yuqian Fu","submitted_at":"2026-05-25T08:53:21Z","abstract_excerpt":"Vision-Language Navigation (VLN) enables embodied agents to reach target locations in unseen environments by following language instructions. Despite recent progress with vision-language models (VLMs), a critical semantic-geometric gap remains: while VLMs excel at language and 2D visual understanding, they struggle with 3D spatial reasoning and fail to capture the causal dynamics between actions and spatial transitions, resulting in unreliable navigation, particularly in zero-shot settings. To bridge this gap, we propose a Hierarchical Semantic-Geometric Map (HSGM) that transforms 3D geometric"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.00095","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.00095/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.00095","created_at":"2026-06-02T00:03:14.852668+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.00095v1","created_at":"2026-06-02T00:03:14.852668+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.00095","created_at":"2026-06-02T00:03:14.852668+00:00"},{"alias_kind":"pith_short_12","alias_value":"VGLOYHWFMPJ2","created_at":"2026-06-02T00:03:14.852668+00:00"},{"alias_kind":"pith_short_16","alias_value":"VGLOYHWFMPJ2DP6A","created_at":"2026-06-02T00:03:14.852668+00:00"},{"alias_kind":"pith_short_8","alias_value":"VGLOYHWF","created_at":"2026-06-02T00:03:14.852668+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/VGLOYHWFMPJ2DP6A75VL3BMZD7","json":"https://pith.science/pith/VGLOYHWFMPJ2DP6A75VL3BMZD7.json","graph_json":"https://pith.science/api/pith-number/VGLOYHWFMPJ2DP6A75VL3BMZD7/graph.json","events_json":"https://pith.science/api/pith-number/VGLOYHWFMPJ2DP6A75VL3BMZD7/events.json","paper":"https://pith.science/paper/VGLOYHWF"},"agent_actions":{"view_html":"https://pith.science/pith/VGLOYHWFMPJ2DP6A75VL3BMZD7","download_json":"https://pith.science/pith/VGLOYHWFMPJ2DP6A75VL3BMZD7.json","view_paper":"https://pith.science/paper/VGLOYHWF","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.00095&json=true","fetch_graph":"https://pith.science/api/pith-number/VGLOYHWFMPJ2DP6A75VL3BMZD7/graph.json","fetch_events":"https://pith.science/api/pith-number/VGLOYHWFMPJ2DP6A75VL3BMZD7/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/VGLOYHWFMPJ2DP6A75VL3BMZD7/action/timestamp_anchor","attest_storage":"https://pith.science/pith/VGLOYHWFMPJ2DP6A75VL3BMZD7/action/storage_attestation","attest_author":"https://pith.science/pith/VGLOYHWFMPJ2DP6A75VL3BMZD7/action/author_attestation","sign_citation":"https://pith.science/pith/VGLOYHWFMPJ2DP6A75VL3BMZD7/action/citation_signature","submit_replication":"https://pith.science/pith/VGLOYHWFMPJ2DP6A75VL3BMZD7/action/replication_record"}},"created_at":"2026-06-02T00:03:14.852668+00:00","updated_at":"2026-06-02T00:03:14.852668+00:00"}