{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2024:P4KL45MZHTYCWF6VQIQW3PQDWV","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"4db2bef8e97b5fbee9ad57a0aa57ecbe04b66fad10f3907cac3add794caeb3b9","cross_cats_sorted":["cs.RO"],"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2024-10-29T17:53:56Z","title_canon_sha256":"ae2cf11612e74b8675dfee265a59fcc34206c63f3a805027932443606d76fa4d"},"schema_version":"1.0","source":{"id":"2410.22313","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2410.22313","created_at":"2026-05-17T23:38:51Z"},{"alias_kind":"arxiv_version","alias_value":"2410.22313v1","created_at":"2026-05-17T23:38:51Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2410.22313","created_at":"2026-05-17T23:38:51Z"},{"alias_kind":"pith_short_12","alias_value":"P4KL45MZHTYC","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"P4KL45MZHTYCWF6V","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"P4KL45MZ","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:6fff578496f540d4c9e9256e805ece96d83b7d12bf119c9968ae944852ce8415","target":"graph","created_at":"2026-05-17T23:38:51Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Senna achieves state-of-the-art planning performance. Notably, with pre-training on a large-scale dataset DriveX and fine-tuning on nuScenes, Senna significantly reduces average planning error by 27.12% and collision rate by 33.33% over model without pre-training."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That natural-language planning outputs from the LVLM can be translated into low-level trajectories by the E2E model without introducing critical errors or losing necessary detail in complex or rare scenarios."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"Senna decouples language-based high-level planning from an LVLM with low-level trajectory prediction from an E2E model, reporting 27% lower planning error and 33% lower collisions after pre-training on DriveX and fine-tuning on nuScenes."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Senna uses a large vision-language model for natural language driving plans that an end-to-end model converts into precise trajectories."}],"snapshot_sha256":"44eca63ca804d8e916c3308e194805b8be6e1dce2a17651cae3fcbe4b027deb6"},"formal_canon":{"evidence_count":3,"snapshot_sha256":"575564e6eedc3fe4bcad57cafbd57e501dbd67687fe6f5a55ae30f27d6be644a"},"paper":{"abstract_excerpt":"End-to-end autonomous driving demonstrates strong planning capabilities with large-scale data but still struggles in complex, rare scenarios due to limited commonsense. In contrast, Large Vision-Language Models (LVLMs) excel in scene understanding and reasoning. The path forward lies in merging the strengths of both approaches. Previous methods using LVLMs to predict trajectories or control signals yield suboptimal results, as LVLMs are not well-suited for precise numerical predictions. This paper presents Senna, an autonomous driving system combining an LVLM (Senna-VLM) with an end-to-end mod","authors_text":"Bencheng Liao, Bo Jiang, Chang Huang, Qian Zhang, Shaoyu Chen, Wei Yin, Wenyu Liu, Xinggang Wang, Xingyu Zhang","cross_cats":["cs.RO"],"headline":"Senna uses a large vision-language model for natural language driving plans that an end-to-end model converts into precise trajectories.","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2024-10-29T17:53:56Z","title":"Senna: Bridging Large Vision-Language Models and End-to-End Autonomous Driving"},"references":{"count":73,"internal_anchors":12,"resolved_work":73,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"Detr3d: 3d object detection from multi-view images via 3d-to-2d queries,","work_id":"0b78527b-7dcb-4a11-a7ff-861bbafb5d54","year":2022},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"Y . Hu, J. Yang, L. Chen, K. Li, C. Sima, X. Zhu, S. Chai, S. Du, T. Lin, W. Wang et al., “Planning-oriented autonomous driving,” in CVPR, 2023","work_id":"2c9e1c5b-2400-4ab1-a5ea-9c0fbb17099a","year":2023},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"Vad: Vectorized scene representation for efficient autonomous driving,","work_id":"732a1c3d-0c3a-465c-8c7c-46ff24852f6e","year":2023},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"Lift, splat, shoot: Encoding images from arbitrary camera rigs by implicitly unprojecting to 3d,","work_id":"9e1067db-f398-40c3-9bd3-99f858c0e73d","year":2020},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"arXiv preprint arXiv:2203.17270 (2022)","work_id":"107ceb33-47b0-4f30-bab3-36a44b3c77e3","year":2022}],"snapshot_sha256":"4c728b57d6b142bf5799ff1c26c99990dd1cf7063faa3a4466312ef60cc992db"},"source":{"id":"2410.22313","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-15T15:19:50.878304Z","id":"7946fc83-b532-4654-b34d-c33174394eaf","model_set":{"reader":"grok-4.3"},"one_line_summary":"Senna decouples language-based high-level planning from an LVLM with low-level trajectory prediction from an E2E model, reporting 27% lower planning error and 33% lower collisions after pre-training on DriveX and fine-tuning on nuScenes.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Senna uses a large vision-language model for natural language driving plans that an end-to-end model converts into precise trajectories.","strongest_claim":"Senna achieves state-of-the-art planning performance. Notably, with pre-training on a large-scale dataset DriveX and fine-tuning on nuScenes, Senna significantly reduces average planning error by 27.12% and collision rate by 33.33% over model without pre-training.","weakest_assumption":"That natural-language planning outputs from the LVLM can be translated into low-level trajectories by the E2E model without introducing critical errors or losing necessary detail in complex or rare scenarios."}},"verdict_id":"7946fc83-b532-4654-b34d-c33174394eaf"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:cfffdc69cb230c48aca5335b104610b0648d2707fd8f037865f82f916464854f","target":"record","created_at":"2026-05-17T23:38:51Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"4db2bef8e97b5fbee9ad57a0aa57ecbe04b66fad10f3907cac3add794caeb3b9","cross_cats_sorted":["cs.RO"],"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2024-10-29T17:53:56Z","title_canon_sha256":"ae2cf11612e74b8675dfee265a59fcc34206c63f3a805027932443606d76fa4d"},"schema_version":"1.0","source":{"id":"2410.22313","kind":"arxiv","version":1}},"canonical_sha256":"7f14be75993cf02b17d582216dbe03b5642e447c32af17337aa20574e8bcd085","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"7f14be75993cf02b17d582216dbe03b5642e447c32af17337aa20574e8bcd085","first_computed_at":"2026-05-17T23:38:51.101947Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:38:51.101947Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"X5hm6B5rq/3sTHurfb4fUnY0n0bdFbqrc8GZp6XRzVP52dtr+YWHw79yc4nrmgB5RSfDgr3iCkh51HfDb3/7AQ==","signature_status":"signed_v1","signed_at":"2026-05-17T23:38:51.102398Z","signed_message":"canonical_sha256_bytes"},"source_id":"2410.22313","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:cfffdc69cb230c48aca5335b104610b0648d2707fd8f037865f82f916464854f","sha256:6fff578496f540d4c9e9256e805ece96d83b7d12bf119c9968ae944852ce8415"],"state_sha256":"34e10abac1f7cfafb8d14590e493aadd8e34336451a1496e1c99ccd9cab37861"}