{"total":35,"items":[{"citing_arxiv_id":"2605.29705","ref_index":5,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"BitTP: The Lightweight Trajectory Prediction Model with BitLLM for Edge-Devices","primary_cat":"cs.AI","submitted_at":"2026-05-28T10:04:02+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"BitTP applies weight-only 1.58-bit quantization to LLM trajectory predictors, claiming improved ADE/FDE over BF16 baseline with reduced resource demands on edge devices.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.25393","ref_index":38,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Decision-Making with Lightweight Confidence-Aware Language Model for Autonomous Driving","primary_cat":"cs.RO","submitted_at":"2026-05-25T03:38:12+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Lightweight confidence-aware LM distilled from multi-agent CoT demonstrations achieves SOTA success rates on nuPlan benchmark for AD decision-making with low inference latency.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.21139","ref_index":1,"ref_count":2,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Distill to Think, Foresee to Act: Cognitive-Physical Reinforcement Learning for Autonomous Driving","primary_cat":"cs.CV","submitted_at":"2026-05-20T13:14:28+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"CoPhy is a new RL framework that distills VLM cognition into BEV encoders, adds an auto-regressive BEV world model for action-conditioned future prediction, and optimizes policies via GRPO with dual physical-cognitive rewards, claiming SOTA on NAVSIM v1/v2.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.19771","ref_index":3,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Beyond Imitation: Learning Safe End-to-End Autonomous Driving from Hard Negatives","primary_cat":"cs.RO","submitted_at":"2026-05-19T12:41:47+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"BeyondDrive augments imitation learning with synthesized safety-critical negative trajectories and a repulsive loss to improve safety in autonomous driving, reporting 89.7 PDMS on NAVSIMv1 and generalization to other models.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.19631","ref_index":9,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"HEAT: Heterogeneous End-to-End Autonomous Driving via Trajectory-Guided World Models","primary_cat":"cs.RO","submitted_at":"2026-05-19T10:12:01+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"HEAT uses a trajectory-driven learning paradigm and a world model predicting future latent features from ego actions to enable a single unified end-to-end autonomous driving model to perform well across heterogeneous domains on nuScenes, NAVSIM, and Waymo benchmarks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.19033","ref_index":5,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"RLFTSim: Realistic and Controllable Multi-Agent Traffic Simulation via Reinforcement Learning Fine-Tuning","primary_cat":"cs.RO","submitted_at":"2026-05-18T18:57:13+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"RLFTSim uses RL fine-tuning on a pre-trained model with a balanced reward to align traffic simulator rollouts to real data distributions and distill goal-conditioned controllability, reporting SOTA realism on the Waymo Open Motion Dataset.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.18059","ref_index":67,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Bench2Drive-Robust: Benchmarking Closed-Loop Autonomous Driving under Deployment Perturbations","primary_cat":"cs.RO","submitted_at":"2026-05-18T08:45:24+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Bench2Drive-Robust is a new closed-loop benchmark that evaluates end-to-end autonomous driving models under deployment perturbations from camera failures, ego-state errors, and compute delays, showing substantial performance degradation beyond image-level tests.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.16737","ref_index":2,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"DriveSafer: End-to-End Autonomous Driving with Safety Guidance","primary_cat":"cs.RO","submitted_at":"2026-05-16T01:21:30+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"DriveSafer reduces catastrophic failures (PDMS=0) by 48% and drivable-area compliance failures by over 65% versus DiffusionDrive on the NAVSIM benchmark by combining training-time safety constraints with inference-time guidance.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.13646","ref_index":2,"ref_count":2,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Causality-Aware End-to-End Autonomous Driving via Ego-Centric Joint Scene Modeling","primary_cat":"cs.RO","submitted_at":"2026-05-13T15:06:22+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"CaAD adds ego-centric joint-causal modeling and causality-aware policy alignment to end-to-end driving, reporting Driving Score 87.53 and PDMS 91.1 on Bench2Drive and NAVSIM.","context_count":1,"top_context_role":"dataset","top_context_polarity":"use_dataset","context_text":"important directions for future work. 9 References [1] Holger Caesar, Varun Bankiti, Alex H Lang, Sourabh V ora, Venice Erin Liong, Qiang Xu, Anush Krishnan, Yu Pan, Giancarlo Baldan, and Oscar Beijbom. nuscenes: A multimodal dataset for autonomous driving. InProceedings of the IEEE/CVF conference on computer vision and pattern recognition, pages 11621-11631, 2020. [2] Holger Caesar, Juraj Kabzan, Kok Seang Tan, Whye Kit Fong, Eric Wolff, Alex Lang, Luke Fletcher, Oscar Beijbom, and Sammy Omari. nuplan: A closed-loop ml-based planning bench- mark for autonomous vehicles.arXiv preprint arXiv:2106.11810, 2021. [3] Raphael Chekroun, Marin Toromanoff, Sascha Hornauer, and Fabien Moutarde. Gri: General reinforced imitation and its application to vision-based autonomous driving."},{"citing_arxiv_id":"2605.10904","ref_index":25,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"MDrive: Benchmarking Closed-Loop Cooperative Driving for End-to-End Multi-agent Systems","primary_cat":"cs.RO","submitted_at":"2026-05-11T17:44:34+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"MDrive benchmark shows multi-agent cooperative driving systems generally outperform single-agent ones in closed-loop settings but perception sharing does not always improve planning and negotiation can harm performance in complex traffic.","context_count":1,"top_context_role":"dataset","top_context_polarity":"use_dataset","context_text":"This is related to recent findings [23, 2 Table 1:Comparisons with existing end-to-end open-loop and closed-loop driving benchmarks. Bg. Actors: background actors; Reactive: reactive simulation; Perc. Sharing: perception sharing. Benchmark E2E Closed-loop Bg. Actors Reactive Perc. Sharing Negotiation Scenario Gen. Single-agent Driving Benchmarks nuPlan [25] × ×✓× × × Log-replay NA VSIM [26, 27] ✓×✓× × × Log-replay Bench2Drive [19] ✓ ✓ ✓ ✓ × × Hand-crafted HUGSIM [28] ✓ ✓ ✓ ✓ × × Real-world Data DriveArena [29] ✓ ✓ ✓ ✓ × × Real-world Data Fail2Drive [24] ✓ ✓ ✓ ✓ × × Hand-crafted BridgeSim [16] ✓ ✓ ✓ ✓ × × Real2Sim Multi-agent Driving Benchmarks RiskMM [15] ✓×✓× ✓× Real-world Data V2Xverse [30] ✓ ✓ ✓ ✓ ✓× Hand-crafted"},{"citing_arxiv_id":"2605.10426","ref_index":13,"ref_count":4,"confidence":0.9,"is_internal_anchor":true,"paper_title":"CoWorld-VLA: Thinking in a Multi-Expert World Model for Autonomous Driving","primary_cat":"cs.CV","submitted_at":"2026-05-11T12:01:13+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"CoWorld-VLA extracts semantic, geometric, dynamic, and trajectory expert tokens from multi-source supervision and feeds them into a diffusion-based hierarchical planner, achieving competitive collision avoidance and trajectory accuracy on the NAVSIM v1 benchmark.","context_count":2,"top_context_role":"dataset","top_context_polarity":"use_dataset","context_text":"αe ˆAe,L act =L diff +λ fusion ¯A−A norm 2 2 .(18) At inference time, HMEF starts from Gaussian noise, iteratively denoises the expert trajectories, fuses them withα, and denormalizes the result to obtain the executable ego plan. 4 Experiments 4.1 Experimental settings Datasets.Following previous studies [ 50, 60, 66, 73, 74], we train CoWorld-VLA on NuPlan [13] and NA VSIM v1 [14], evaluating video generation on NuPlan and trajectory planning on NA VSIM v1. NuPlan contains approximately 1,200 hours of real-world driving data from four cities. NA VSIM v1, built upon OpenScene [75], provides 120 hours of 2 Hz multi-view driving data for planning-oriented evaluation under challenging dynamic scenarios, with 1,192 training clips and 136 testing clips."},{"citing_arxiv_id":"2605.10388","ref_index":5,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Temporal Sampling Frequency Matters: A Capacity-Aware Study of End-to-End Driving Trajectory Prediction","primary_cat":"cs.CV","submitted_at":"2026-05-11T11:34:42+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Smaller end-to-end autonomous driving models achieve optimal 3-second trajectory prediction accuracy at lower or intermediate temporal sampling frequencies, whereas larger VLA-style models perform best at the highest frequencies across Waymo, nuScenes, and PAVE datasets.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"The capacity-aware view is supported indirectly by the observed frequency responses and the iteration-matched controls. We do not directly separate selected camera frames into driving-relevant information, redundant visual content, and driving-irrelevant off-manifold noise. The study is also limited to offline ADE/FDE evaluation. Deployment-oriented systems should further validate temporal sampling frequency under closed-loop evaluation [5]. 5 Conclusion Temporal sampling frequency should be treated as an explicit training variable in E2E autonomous- driving trajectory prediction. Across Waymo, nuScenes, and PA VE, smaller E2E models often prefer lower or intermediate temporal sampling frequencies rather than the highest evaluated frequency. Specifically, for 3-second ADE: E2EDriver and BEV-E2EDriver perform best at 8 Hz on Waymo, 6 Hz"},{"citing_arxiv_id":"2605.09701","ref_index":52,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"DriveFuture: Future-Aware Latent World Models for Autonomous Driving","primary_cat":"cs.CV","submitted_at":"2026-05-10T18:45:21+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"DriveFuture achieves SOTA results on NAVSIM by conditioning latent world model states on future predictions to directly inform trajectory planning.","context_count":1,"top_context_role":"dataset","top_context_polarity":"use_dataset","context_text":"7 97.3 97.6 98.1 87.3 - 89.3DriveFuture 98.8 99.1 99.6 99.9 86.6 98.4 96.4 98.3 74.8 86.4 89.9 4 Experiments 4.1 Datasets and Evaluation Metrics We evaluate DriveFuture on the publicNAVSIMbenchmark [1], which is built on OpenScene [50] and nuPlan [51] logs for lightweight planning evaluation. We report results onNAVSIM-v1 navtest[ 1], NAVSIM-v2 navtest[ 52], andNAVSIM-v2 navhard[ 52].navtestmeasures general planning per- formance, whilenavhardemphasizes safety-critical and long-tail scenarios. Following the official protocol, we use PDMS forNAVSIM-v1and EPDMS forNAVSIM-v2. EPDMS extends PDMS with additional rule- and comfort-related metrics, including driving-direction compliance, traffic-light compliance, lane keeping, and extended comfort."},{"citing_arxiv_id":"2605.08528","ref_index":12,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"SceneFactory: GPU-Accelerated Multi-Agent Driving Simulation with Physics-Based Vehicle Dynamics","primary_cat":"cs.MA","submitted_at":"2026-05-08T22:23:11+00:00","verdict":"ACCEPT","verdict_confidence":"MODERATE","novelty_score":6.0,"formal_verification":"none","one_line_summary":"SceneFactory delivers a batched GPU platform for physics-based multi-agent autonomous driving simulation that achieves 127x higher throughput than non-vectorized PhysX while supporting articulated dynamics and road-condition friction.","context_count":1,"top_context_role":"baseline","top_context_polarity":"baseline","context_text":"boundaries, and dataset-derived origin-destination tasks, and runs many such worlds concurrently on one GPU. To support road-condition-aware evaluation, SceneFactory further includes a weather-to- 2 Simulator Multi-agentBatched GPU envs ‡ Sensor SimExpert DataSim-agentsRigid-body physicsWeather→Friction † Routes / Goals CARLA [2]✓ ✓ ✓Waypoints SUMMIT [10]✓(≥400)✓ ✓ ✓- SMARTS [11]✓ ✓Waypoints nuPlan [12]✓ ✓ ✓Waypoints Nocturne [13]✓(≥128)✓ ✓ ✓Goal point MetaDrive [3]✓ ✓ ✓ ✓ ✓- TorchDriveSim [14]✓ ✓- Waymax [4]✓(≥128)✓ ✓ ✓Waypoints GPUDrive [5]✓(≥128)✓ ✓ ✓ ✓Goal point SceneFactory (ours)✓(≥128)✓ ✓ ✓ ✓ ✓ ✓Goal point † Physics-engine friction set from precipitation + surface type; not just manual friction or visual weather. ‡ Single-process GPU-resident batched stepping; CARLA/MetaDrive use multi-process parallelism"},{"citing_arxiv_id":"2605.06987","ref_index":128,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Response Time Enhances Alignment with Heterogeneous Preferences","primary_cat":"cs.LG","submitted_at":"2026-05-07T22:05:23+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Response times modeled as drift-diffusion processes enable consistent estimation of population-average preferences from heterogeneous anonymous binary choices.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.04647","ref_index":94,"ref_count":2,"confidence":0.9,"is_internal_anchor":true,"paper_title":"ReflectDrive-2: Reinforcement-Learning-Aligned Self-Editing for Discrete Diffusion Driving","primary_cat":"cs.RO","submitted_at":"2026-05-06T08:52:32+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"ReflectDrive-2 combines masked discrete diffusion with RL-aligned self-editing to generate and refine driving trajectories, reaching 91.0 PDMS on NAVSIM camera-only and 94.8 in best-of-6.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.25329","ref_index":1,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"ProDrive: Proactive Planning for Autonomous Driving via Ego-Environment Co-Evolution","primary_cat":"cs.RO","submitted_at":"2026-04-28T07:46:30+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"ProDrive couples a query-centric planner with a BEV world model for end-to-end ego-environment co-evolution, enabling future-outcome assessment that improves safety and efficiency over reactive baselines on NAVSIM v1.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.17915","ref_index":3,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"OneDrive: Unified Multi-Paradigm Driving with Vision-Language-Action Models","primary_cat":"cs.CV","submitted_at":"2026-04-20T07:50:00+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"OneDrive unifies heterogeneous decoding in a single VLM transformer decoder for end-to-end driving, achieving 0.28 L2 error and 0.18 collision rate on nuScenes plus 86.8 PDMS on NAVSIM.","context_count":1,"top_context_role":"dataset","top_context_polarity":"use_dataset","context_text":"extension [47], which augments nuScenes with QA-style annotations spanning perception, prediction, and planning. For planning in the open-loop setting, we measure trajectory accuracy using the L2 displacement error and assess safety using collision rate. OneDrive 9 NAVSIM[15] is a planning-oriented benchmark built upon OpenScene [13], a redistribution of nuPlan [3]. The dataset is divided into 1,192 training scenes (navtrain)and136evaluationscenes(navtest).ComparedwithnuScenes,NAVSIM focuses more heavily on interactive and safety-critical planning scenarios. Met- rics include average displacement error, collision rate, and the official NAVSIM score PDMS, which jointly reflect safety, rule compliance, and driving efficiency."},{"citing_arxiv_id":"2604.13853","ref_index":28,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Mosaic: An Extensible Framework for Composing Rule-Based and Learned Motion Planners","primary_cat":"cs.RO","submitted_at":"2026-04-15T13:23:47+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Mosaic integrates rule-based and learned planners via arbitration graphs to set new state-of-the-art scores on nuPlan and interPlan benchmarks while cutting at-fault collisions by 30%.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.12857","ref_index":25,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Artificial Intelligence for Modeling and Simulation of Mixed Automated and Human Traffic","primary_cat":"cs.AI","submitted_at":"2026-04-14T15:09:07+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"This survey synthesizes AI techniques for mixed autonomy traffic simulation and introduces a taxonomy spanning agent-level behavior models, environment-level methods, and cognitive/physics-informed approaches.","context_count":1,"top_context_role":"dataset","top_context_polarity":"use_dataset","context_text":"trajectory generation, driver modeling, and scenario gener- ation). We monitored key venues from both the machine learning and transportation communities. Citation tracking and snow balling was also used from foundational and recent papers to identify additional relevant works. We also tracked major benchmarks and challenges, including the Waymo Open Motion Dataset [23], Argoverse [24], nuPlan [25], and the Waymo Open Sim Agents Challenge (WOSAC) [26]. For cognitive and physics-informed methods, we also explored the human factors and cognitive science literature, including journals focusing on Human Factors and Cognitive Science. For each methodological category, we selected 7-15 representative papers based on five criteria: (1) a foundational"},{"citing_arxiv_id":"2604.10856","ref_index":12,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"BridgeSim: Unveiling the OL-CL Gap in End-to-End Autonomous Driving","primary_cat":"cs.RO","submitted_at":"2026-04-12T23:37:07+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"The primary OL-CL gap in end-to-end autonomous driving arises from objective mismatch creating structural inability to model reactive behaviors, which a test-time adaptation method can mitigate.","context_count":1,"top_context_role":"dataset","top_context_polarity":"use_dataset","context_text":"3 BridgeSim: A Unified Cross-Simulator Closed-loop Simulation Platform for E2E Driving Policies To empirically investigate the OL-CL gap with quantifiable rigor, we introduce BridgeSim, a cross- simulator platform designed to evaluate OL pretrained policies within high-fidelity CL environ- ments. BridgeSim designs a unified scenario protocol to incorporate diverse map scenarios (e.g., nuPlan [12], WOMD [45], and nuScenes [1]) with heterogeneous traffic modes (e.g., log-replay, IDM [46], and adversarial policies [47]) to stress-test E2E driving policies under a closed-loop simulation environment. Furthermore, BridgeSim offers a flexible deployment setting to simulate open-loop policy with varying execution frequencies and simulation horizons."},{"citing_arxiv_id":"2604.07378","ref_index":37,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Evaluation as Evolution: Transforming Adversarial Diffusion into Closed-Loop Curricula for Autonomous Vehicles","primary_cat":"cs.RO","submitted_at":"2026-04-08T01:34:05+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"E² uses transport-regularized sparse control on learned reverse-time SDEs with topology-driven selection and Topological Anchoring to generate realistic adversarial scenarios, improving collision discovery by 9.01% on nuScenes and up to 21.43% on nuPlan while enabling closed-loop robustness gains.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.01044","ref_index":43,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"A global dataset of continuous urban dashcam driving","primary_cat":"cs.CV","submitted_at":"2026-04-01T15:52:17+00:00","verdict":"ACCEPT","verdict_confidence":"MODERATE","novelty_score":7.0,"formal_verification":"none","one_line_summary":"CROWD is a new global dataset of 51,753 continuous urban dashcam segments spanning over 20,000 hours from 238 countries, with manual labels and automated object detections for routine driving analysis.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"nuScenes [17] USA (Boston), Singapore ≈ 5.6k Both 1,000 scenes × 20 s; 6 cameras (front included) + LiDAR/radar; includes night and rain. ONCE [42] Not fully enumerated (200 km2 driving regions) 144 Both 1M LiDAR scenes with 7M camera images; 7 cameras + LiDAR; diverse environments (day, night, sunny, rainy, urban, suburban). OpenDV 2K (OpenDrive- Lab) [43] ≥ 40 countries, ≥ 244 cities 2,059 (1,747 YouTube) Not quantiﬁed W eb mined front view driving videos paired with text; country and city counts are estimates from video titles; camera setup is described as uncalibrated. Oxford Robot- Car [44] UK (Oxford) N/A l Both Repeated route over ∼ 1,000 km; data collected across varied weather and lighting conditions;"},{"citing_arxiv_id":"2603.29908","ref_index":68,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"C-TRAIL: A Commonsense World Framework for Trajectory Planning in Autonomous Driving","primary_cat":"cs.AI","submitted_at":"2026-03-31T15:53:29+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"C-TRAIL combines LLM commonsense with a dual-trust mechanism and Dirichlet-weighted Monte Carlo Tree Search to improve trajectory planning accuracy and safety in autonomous driving.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"empirically detects LLM errors and degrades gracefully, formal safety verification for LLM-integrated planners remains an open challenge. Future work includes extending C-TRAIL to multi-agent interaction-aware planning, incorporating mul- timodal inputs for richer commonsense grounding, and vali- dating on high-fidelity simulators such as CARLA [ 67] and nuPlan [68] that offer continuous vehicle dynamics beyond the kinematic model used here. AC K N OW L E D G M E N T This work has received funding from the European Union's Horizon Europe research and innovation programme under the Marie Skłodowska-Curie grant agreement No. 101126636. RE F E R E N C E S [1] Y . Hu, J. Yang, L. Chen, K. Li, C. Sima, X. Zhu, S. Chai, S."},{"citing_arxiv_id":"2603.23607","ref_index":10,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"LongTail Driving Scenarios with Reasoning Traces: The KITScenes LongTail Dataset","primary_cat":"cs.CV","submitted_at":"2026-03-24T18:00:08+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"KITScenes LongTail supplies multimodal driving data and multilingual expert reasoning traces to benchmark models on rare scenarios beyond basic safety metrics.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2602.22801","ref_index":8,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Unleashing the Potential of Diffusion Models for End-to-End Autonomous Driving","primary_cat":"cs.RO","submitted_at":"2026-02-26T09:37:38+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"The paper introduces Hyper Diffusion Planner (HDP), a diffusion-based E2E AD framework that identifies insights on loss space, trajectory representation and data scaling, adds RL post-training, and reports 10x performance gains over 200 km of real-world testing across 6 scenarios.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2512.23421","ref_index":10,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"DriveLaW:Unifying Planning and Video Generation in a Latent Driving World","primary_cat":"cs.CV","submitted_at":"2025-12-29T12:32:27+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"DriveLaW unifies video world modeling and trajectory planning by injecting video-generator latents into a diffusion planner, achieving SOTA video prediction and a new record on the NAVSIM planning benchmark.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2512.07661","ref_index":2,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Optimization-Guided Diffusion for Interactive Scene Generation","primary_cat":"cs.CV","submitted_at":"2025-12-08T15:56:18+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"OMEGA guides diffusion sampling with per-step constrained optimization and game-theoretic adversarial modeling to generate physically valid and interactive driving scenes, raising valid scene ratios from 32% to 72% and producing 5x more near-collisions.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2507.04049","ref_index":57,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"DIVER: Reinforced Diffusion Breaks Imitation Bottlenecks in End-to-End Autonomous Driving","primary_cat":"cs.CV","submitted_at":"2025-07-05T14:19:19+00:00","verdict":null,"verdict_confidence":null,"novelty_score":null,"formal_verification":null,"one_line_summary":null,"context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2506.08052","ref_index":6,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"ReCogDrive: A Reinforced Cognitive Framework for End-to-End Autonomous Driving","primary_cat":"cs.CV","submitted_at":"2025-06-09T03:14:04+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"ReCogDrive unifies VLM scene understanding with a diffusion planner reinforced by DiffGRPO to reach state-of-the-art results on NAVSIM and Bench2Drive benchmarks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2506.00560","ref_index":3,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Using Ensemble Diffusion to Estimate Uncertainty for End-to-End Autonomous Driving","primary_cat":"cs.RO","submitted_at":"2025-05-31T13:33:27+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"EnDfuser replaces point-estimate trajectory planning with ensemble diffusion in a single attention-pooling transformer module to model posterior trajectory uncertainty and improve safety in end-to-end autonomous driving.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2505.17209","ref_index":3,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"LiloDriver: A Lifelong Learning Framework for Closed-loop Motion Planning in Long-tail Autonomous Driving Scenarios","primary_cat":"cs.RO","submitted_at":"2025-05-22T18:33:08+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"LiloDriver uses LLMs and memory-augmented planning in a four-stage pipeline to outperform rule-based and learning-based methods on both common and rare scenarios in the nuPlan benchmark.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2504.02450","ref_index":19,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"CHARMS: A Cognitive Hierarchical Agent for Reasoning and Motion Stylization in Autonomous Driving","primary_cat":"cs.RO","submitted_at":"2025-04-03T10:15:19+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"CHARMS applies Level-k game theory and Poisson cognitive hierarchy theory to autonomous driving agents via a two-stage RL-then-SFT pipeline for human-like decisions and realistic scenario generation.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2406.08481","ref_index":1,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Enhancing End-to-End Autonomous Driving with Latent World Model","primary_cat":"cs.CV","submitted_at":"2024-06-12T17:59:21+00:00","verdict":"ACCEPT","verdict_confidence":"MODERATE","novelty_score":6.0,"formal_verification":"none","one_line_summary":"LAW introduces a self-supervised prediction task on latent scene features that boosts end-to-end driving performance on nuScenes, NAVSIM, and CARLA benchmarks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2406.06978","ref_index":3,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Hydra-MDP: End-to-end Multimodal Planning with Multi-target Hydra-Distillation","primary_cat":"cs.CV","submitted_at":"2024-06-11T06:18:26+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Hydra-MDP uses multi-teacher distillation and a multi-head decoder to learn diverse, metric-specific trajectories in an end-to-end autonomous-driving planner, winning the Navsim challenge.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null}],"limit":50,"offset":0}