{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2025:MZ2Q6ZWWEBZ2M4XGYHF72E5SOH","short_pith_number":"pith:MZ2Q6ZWW","canonical_record":{"source":{"id":"2503.07608","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2025-03-10T17:59:42Z","cross_cats_sorted":["cs.RO"],"title_canon_sha256":"ffbef9e71aead583bc8fd0ecc333658dd73e98f7124c6a20764b1750183a9904","abstract_canon_sha256":"251251be26a88dc4ba247abe36d2d3fb3589f84bb6d3ebad2a96ee22e776a2fb"},"schema_version":"1.0"},"canonical_sha256":"66750f66d62073a672e6c1cbfd13b271d5568d74846ccc3c164489e69622a1e9","source":{"kind":"arxiv","id":"2503.07608","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2503.07608","created_at":"2026-05-17T23:38:46Z"},{"alias_kind":"arxiv_version","alias_value":"2503.07608v1","created_at":"2026-05-17T23:38:46Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2503.07608","created_at":"2026-05-17T23:38:46Z"},{"alias_kind":"pith_short_12","alias_value":"MZ2Q6ZWWEBZ2","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"MZ2Q6ZWWEBZ2M4XG","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"MZ2Q6ZWW","created_at":"2026-05-18T12:33:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2025:MZ2Q6ZWWEBZ2M4XGYHF72E5SOH","target":"record","payload":{"canonical_record":{"source":{"id":"2503.07608","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2025-03-10T17:59:42Z","cross_cats_sorted":["cs.RO"],"title_canon_sha256":"ffbef9e71aead583bc8fd0ecc333658dd73e98f7124c6a20764b1750183a9904","abstract_canon_sha256":"251251be26a88dc4ba247abe36d2d3fb3589f84bb6d3ebad2a96ee22e776a2fb"},"schema_version":"1.0"},"canonical_sha256":"66750f66d62073a672e6c1cbfd13b271d5568d74846ccc3c164489e69622a1e9","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:38:46.739040Z","signature_b64":"Ogl9N7mZ0au+lknR6+T89Dx0a5IZoaNfXd0YuwxTL1O/PBjeLZ7Uf4QTKzcD2O+c8DLH3DGAnCvM0FpNbLoIAQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"66750f66d62073a672e6c1cbfd13b271d5568d74846ccc3c164489e69622a1e9","last_reissued_at":"2026-05-17T23:38:46.738477Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:38:46.738477Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2503.07608","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:46Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"LX9alHeK8M7JB7nocJ/ifmEaksccZmG8uixR3iE4dQjLlBGVX/BZnC0LO3XoWo/ulovQJBB4MzI92aD8ZtMbBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T03:03:11.843349Z"},"content_sha256":"ea93d444c44d4cf3565a754f104e474e26c7d48f4186e44500ed98f8296241dd","schema_version":"1.0","event_id":"sha256:ea93d444c44d4cf3565a754f104e474e26c7d48f4186e44500ed98f8296241dd"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2025:MZ2Q6ZWWEBZ2M4XGYHF72E5SOH","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"AlphaDrive: Unleashing the Power of VLMs in Autonomous Driving via Reinforcement Learning and Reasoning","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"Reinforcement learning with tailored rewards and a two-stage strategy improves vision-language models for autonomous driving planning.","cross_cats":["cs.RO"],"primary_cat":"cs.CV","authors_text":"Bo Jiang, Qian Zhang, Shaoyu Chen, Wenyu Liu, Xinggang Wang","submitted_at":"2025-03-10T17:59:42Z","abstract_excerpt":"OpenAI o1 and DeepSeek R1 achieve or even surpass human expert-level performance in complex domains like mathematics and science, with reinforcement learning (RL) and reasoning playing a crucial role. In autonomous driving, recent end-to-end models have greatly improved planning performance but still struggle with long-tailed problems due to limited common sense and reasoning abilities. Some studies integrate vision-language models (VLMs) into autonomous driving, but they typically rely on pre-trained models with simple supervised fine-tuning (SFT) on driving data, without further exploration "},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"AlphaDrive significantly improves both planning performance and training efficiency compared to using only SFT or without reasoning, and following RL training exhibits emergent multimodal planning capabilities.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That the four GRPO-based RL rewards and two-stage training strategy produce generalizable, safe improvements on real-world driving data rather than overfitting to the training distribution.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"AlphaDrive uses GRPO-based RL rewards and two-stage SFT+RL training on VLMs to improve autonomous driving planning performance and efficiency while producing emergent multimodal capabilities.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Reinforcement learning with tailored rewards and a two-stage strategy improves vision-language models for autonomous driving planning.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"0f635f6f218a8fe37b2ed36a843b8b0ed939e4aa0dd1f623f4fea8311164831e"},"source":{"id":"2503.07608","kind":"arxiv","version":1},"verdict":{"id":"8b8c67fa-e1a6-49b9-b03a-3a5416fde7cf","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-16T20:03:30.788130Z","strongest_claim":"AlphaDrive significantly improves both planning performance and training efficiency compared to using only SFT or without reasoning, and following RL training exhibits emergent multimodal planning capabilities.","one_line_summary":"AlphaDrive uses GRPO-based RL rewards and two-stage SFT+RL training on VLMs to improve autonomous driving planning performance and efficiency while producing emergent multimodal capabilities.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That the four GRPO-based RL rewards and two-stage training strategy produce generalizable, safe improvements on real-world driving data rather than overfitting to the training distribution.","pith_extraction_headline":"Reinforcement learning with tailored rewards and a two-stage strategy improves vision-language models for autonomous driving planning."},"references":{"count":49,"sample":[{"doi":"","year":null,"title":"GPT-4 Technical Report","work_id":"b928e041-6991-4c08-8c81-0359e4097c7b","ref_index":1,"cited_arxiv_id":"2303.08774","is_internal_anchor":true},{"doi":"","year":2022,"title":"Flamingo: a visual language model for few-shot learning","work_id":"2d29aa49-7f72-4532-8c66-e33ed3d6d8a8","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2023,"title":"Qwen-VL: A Versatile Vision-Language Model for Understanding, Localization, Text Reading, and Beyond","work_id":"cbc2bb21-b6bb-46c0-80bf-107e195ffe10","ref_index":3,"cited_arxiv_id":"2308.12966","is_internal_anchor":true},{"doi":"","year":2005,"title":"Meteor: An automatic metric for mt evaluation with improved correlation with hu- man judgments","work_id":"90de9967-cc22-427f-91fb-ed50f063376c","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2023,"title":"RT-2: Vision-Language-Action Models Transfer Web Knowledge to Robotic Control","work_id":"ff438a8a-8003-4fae-9131-acd418b3597b","ref_index":5,"cited_arxiv_id":"2307.15818","is_internal_anchor":true}],"resolved_work":49,"snapshot_sha256":"a80272b7b288b32d0835e4f514004ed6eafe32c0ace5e6a9eca451ddc76446f5","internal_anchors":16},"formal_canon":{"evidence_count":2,"snapshot_sha256":"4050fc7d31763a3a0ce57228bfdcbd91b98fa13dafc1b3731d305dae64b84142"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"8b8c67fa-e1a6-49b9-b03a-3a5416fde7cf"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:46Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"nhGvdHN075YxYZ7145161HdKPRfLWTxnXLfiZN32pmUvQZRy/ok5Gr99knpMYJ507QNrlpfazB5mGxMbUlFJCw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T03:03:11.843905Z"},"content_sha256":"f360aca54e5c40145d80c45f772179087b7d066e2c04188409f498cde39dc4c6","schema_version":"1.0","event_id":"sha256:f360aca54e5c40145d80c45f772179087b7d066e2c04188409f498cde39dc4c6"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/MZ2Q6ZWWEBZ2M4XGYHF72E5SOH/bundle.json","state_url":"https://pith.science/pith/MZ2Q6ZWWEBZ2M4XGYHF72E5SOH/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/MZ2Q6ZWWEBZ2M4XGYHF72E5SOH/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-27T03:03:11Z","links":{"resolver":"https://pith.science/pith/MZ2Q6ZWWEBZ2M4XGYHF72E5SOH","bundle":"https://pith.science/pith/MZ2Q6ZWWEBZ2M4XGYHF72E5SOH/bundle.json","state":"https://pith.science/pith/MZ2Q6ZWWEBZ2M4XGYHF72E5SOH/state.json","well_known_bundle":"https://pith.science/.well-known/pith/MZ2Q6ZWWEBZ2M4XGYHF72E5SOH/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:MZ2Q6ZWWEBZ2M4XGYHF72E5SOH","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"251251be26a88dc4ba247abe36d2d3fb3589f84bb6d3ebad2a96ee22e776a2fb","cross_cats_sorted":["cs.RO"],"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2025-03-10T17:59:42Z","title_canon_sha256":"ffbef9e71aead583bc8fd0ecc333658dd73e98f7124c6a20764b1750183a9904"},"schema_version":"1.0","source":{"id":"2503.07608","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2503.07608","created_at":"2026-05-17T23:38:46Z"},{"alias_kind":"arxiv_version","alias_value":"2503.07608v1","created_at":"2026-05-17T23:38:46Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2503.07608","created_at":"2026-05-17T23:38:46Z"},{"alias_kind":"pith_short_12","alias_value":"MZ2Q6ZWWEBZ2","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"MZ2Q6ZWWEBZ2M4XG","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"MZ2Q6ZWW","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:f360aca54e5c40145d80c45f772179087b7d066e2c04188409f498cde39dc4c6","target":"graph","created_at":"2026-05-17T23:38:46Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"AlphaDrive significantly improves both planning performance and training efficiency compared to using only SFT or without reasoning, and following RL training exhibits emergent multimodal planning capabilities."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That the four GRPO-based RL rewards and two-stage training strategy produce generalizable, safe improvements on real-world driving data rather than overfitting to the training distribution."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"AlphaDrive uses GRPO-based RL rewards and two-stage SFT+RL training on VLMs to improve autonomous driving planning performance and efficiency while producing emergent multimodal capabilities."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Reinforcement learning with tailored rewards and a two-stage strategy improves vision-language models for autonomous driving planning."}],"snapshot_sha256":"0f635f6f218a8fe37b2ed36a843b8b0ed939e4aa0dd1f623f4fea8311164831e"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"4050fc7d31763a3a0ce57228bfdcbd91b98fa13dafc1b3731d305dae64b84142"},"paper":{"abstract_excerpt":"OpenAI o1 and DeepSeek R1 achieve or even surpass human expert-level performance in complex domains like mathematics and science, with reinforcement learning (RL) and reasoning playing a crucial role. In autonomous driving, recent end-to-end models have greatly improved planning performance but still struggle with long-tailed problems due to limited common sense and reasoning abilities. Some studies integrate vision-language models (VLMs) into autonomous driving, but they typically rely on pre-trained models with simple supervised fine-tuning (SFT) on driving data, without further exploration ","authors_text":"Bo Jiang, Qian Zhang, Shaoyu Chen, Wenyu Liu, Xinggang Wang","cross_cats":["cs.RO"],"headline":"Reinforcement learning with tailored rewards and a two-stage strategy improves vision-language models for autonomous driving planning.","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2025-03-10T17:59:42Z","title":"AlphaDrive: Unleashing the Power of VLMs in Autonomous Driving via Reinforcement Learning and Reasoning"},"references":{"count":49,"internal_anchors":16,"resolved_work":49,"sample":[{"cited_arxiv_id":"2303.08774","doi":"","is_internal_anchor":true,"ref_index":1,"title":"GPT-4 Technical Report","work_id":"b928e041-6991-4c08-8c81-0359e4097c7b","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"Flamingo: a visual language model for few-shot learning","work_id":"2d29aa49-7f72-4532-8c66-e33ed3d6d8a8","year":2022},{"cited_arxiv_id":"2308.12966","doi":"","is_internal_anchor":true,"ref_index":3,"title":"Qwen-VL: A Versatile Vision-Language Model for Understanding, Localization, Text Reading, and Beyond","work_id":"cbc2bb21-b6bb-46c0-80bf-107e195ffe10","year":2023},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"Meteor: An automatic metric for mt evaluation with improved correlation with hu- man judgments","work_id":"90de9967-cc22-427f-91fb-ed50f063376c","year":2005},{"cited_arxiv_id":"2307.15818","doi":"","is_internal_anchor":true,"ref_index":5,"title":"RT-2: Vision-Language-Action Models Transfer Web Knowledge to Robotic Control","work_id":"ff438a8a-8003-4fae-9131-acd418b3597b","year":2023}],"snapshot_sha256":"a80272b7b288b32d0835e4f514004ed6eafe32c0ace5e6a9eca451ddc76446f5"},"source":{"id":"2503.07608","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-16T20:03:30.788130Z","id":"8b8c67fa-e1a6-49b9-b03a-3a5416fde7cf","model_set":{"reader":"grok-4.3"},"one_line_summary":"AlphaDrive uses GRPO-based RL rewards and two-stage SFT+RL training on VLMs to improve autonomous driving planning performance and efficiency while producing emergent multimodal capabilities.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Reinforcement learning with tailored rewards and a two-stage strategy improves vision-language models for autonomous driving planning.","strongest_claim":"AlphaDrive significantly improves both planning performance and training efficiency compared to using only SFT or without reasoning, and following RL training exhibits emergent multimodal planning capabilities.","weakest_assumption":"That the four GRPO-based RL rewards and two-stage training strategy produce generalizable, safe improvements on real-world driving data rather than overfitting to the training distribution."}},"verdict_id":"8b8c67fa-e1a6-49b9-b03a-3a5416fde7cf"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:ea93d444c44d4cf3565a754f104e474e26c7d48f4186e44500ed98f8296241dd","target":"record","created_at":"2026-05-17T23:38:46Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"251251be26a88dc4ba247abe36d2d3fb3589f84bb6d3ebad2a96ee22e776a2fb","cross_cats_sorted":["cs.RO"],"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2025-03-10T17:59:42Z","title_canon_sha256":"ffbef9e71aead583bc8fd0ecc333658dd73e98f7124c6a20764b1750183a9904"},"schema_version":"1.0","source":{"id":"2503.07608","kind":"arxiv","version":1}},"canonical_sha256":"66750f66d62073a672e6c1cbfd13b271d5568d74846ccc3c164489e69622a1e9","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"66750f66d62073a672e6c1cbfd13b271d5568d74846ccc3c164489e69622a1e9","first_computed_at":"2026-05-17T23:38:46.738477Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:38:46.738477Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"Ogl9N7mZ0au+lknR6+T89Dx0a5IZoaNfXd0YuwxTL1O/PBjeLZ7Uf4QTKzcD2O+c8DLH3DGAnCvM0FpNbLoIAQ==","signature_status":"signed_v1","signed_at":"2026-05-17T23:38:46.739040Z","signed_message":"canonical_sha256_bytes"},"source_id":"2503.07608","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:ea93d444c44d4cf3565a754f104e474e26c7d48f4186e44500ed98f8296241dd","sha256:f360aca54e5c40145d80c45f772179087b7d066e2c04188409f498cde39dc4c6"],"state_sha256":"87aee4ec795c112732e3a2583c5e9c45288c2279c78100cdc4e6976ba38c4582"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"2gn1VCTnJAbmVMSCtmA2BVUkg3Iw9aEqkQof5vNtFZMcw2V0FDggHAHWePMzJq3yS9zjp60sZslsSpHC8XbUAA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-27T03:03:11.847335Z","bundle_sha256":"ee484b3948fdfda6978ce24d1f9221e451ad39246a59e4fe20f175d79cfaf7c4"}}