{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:5GYGPF6OU6TLC3NX7WZJREWADD","short_pith_number":"pith:5GYGPF6O","schema_version":"1.0","canonical_sha256":"e9b06797cea7a6b16db7fdb29892c018f1f8a91728ed07460ceafd697cc3e110","source":{"kind":"arxiv","id":"1812.06120","version":2},"attestation_state":"computed","paper":{"title":"Simulation to Scaled City: Zero-Shot Policy Transfer for Traffic Control via Autonomous Vehicles","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.RO"],"primary_cat":"cs.SY","authors_text":"Alexandre Bayen, Andreas Malikopoulos, Behdad Chalaki, Ben Remer, Eugene Vinitsky, Kathy Jang, Logan Beaver","submitted_at":"2018-12-14T19:20:09Z","abstract_excerpt":"Using deep reinforcement learning, we train control policies for autonomous vehicles leading a platoon of vehicles onto a roundabout. Using Flow, a library for deep reinforcement learning in micro-simulators, we train two policies, one policy with noise injected into the state and action space and one without any injected noise. In simulation, the autonomous vehicle learns an emergent metering behavior for both policies in which it slows to allow for smoother merging. We then directly transfer this policy without any tuning to the University of Delaware Scaled Smart City (UDSSC), a 1:25 scale "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1812.06120","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.SY","submitted_at":"2018-12-14T19:20:09Z","cross_cats_sorted":["cs.AI","cs.RO"],"title_canon_sha256":"29f4a4be72821f3b7d5a011be63022f985276d7bafaa183297ec62767871e9a6","abstract_canon_sha256":"1862ffd5cb50148b20c5673bc01b76be12e756886904d7ca3ef045584ed5e30e"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:52:51.272611Z","signature_b64":"Wn+QWsehXcclBFLNeqEzuJdqkD6Do6XnDpBZD+2tRa8GDrdQn2WmUvoEkpy3lbU2En/Dcht10Kv+3zCCIRfxAQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"e9b06797cea7a6b16db7fdb29892c018f1f8a91728ed07460ceafd697cc3e110","last_reissued_at":"2026-05-17T23:52:51.271856Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:52:51.271856Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Simulation to Scaled City: Zero-Shot Policy Transfer for Traffic Control via Autonomous Vehicles","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.RO"],"primary_cat":"cs.SY","authors_text":"Alexandre Bayen, Andreas Malikopoulos, Behdad Chalaki, Ben Remer, Eugene Vinitsky, Kathy Jang, Logan Beaver","submitted_at":"2018-12-14T19:20:09Z","abstract_excerpt":"Using deep reinforcement learning, we train control policies for autonomous vehicles leading a platoon of vehicles onto a roundabout. Using Flow, a library for deep reinforcement learning in micro-simulators, we train two policies, one policy with noise injected into the state and action space and one without any injected noise. In simulation, the autonomous vehicle learns an emergent metering behavior for both policies in which it slows to allow for smoother merging. We then directly transfer this policy without any tuning to the University of Delaware Scaled Smart City (UDSSC), a 1:25 scale "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1812.06120","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1812.06120","created_at":"2026-05-17T23:52:51.271984+00:00"},{"alias_kind":"arxiv_version","alias_value":"1812.06120v2","created_at":"2026-05-17T23:52:51.271984+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1812.06120","created_at":"2026-05-17T23:52:51.271984+00:00"},{"alias_kind":"pith_short_12","alias_value":"5GYGPF6OU6TL","created_at":"2026-05-18T12:32:08.215937+00:00"},{"alias_kind":"pith_short_16","alias_value":"5GYGPF6OU6TLC3NX","created_at":"2026-05-18T12:32:08.215937+00:00"},{"alias_kind":"pith_short_8","alias_value":"5GYGPF6O","created_at":"2026-05-18T12:32:08.215937+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"2312.09436","citing_title":"Temporal Transfer Learning for Traffic Optimization with Coarse-grained Advisory Autonomy","ref_index":10,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/5GYGPF6OU6TLC3NX7WZJREWADD","json":"https://pith.science/pith/5GYGPF6OU6TLC3NX7WZJREWADD.json","graph_json":"https://pith.science/api/pith-number/5GYGPF6OU6TLC3NX7WZJREWADD/graph.json","events_json":"https://pith.science/api/pith-number/5GYGPF6OU6TLC3NX7WZJREWADD/events.json","paper":"https://pith.science/paper/5GYGPF6O"},"agent_actions":{"view_html":"https://pith.science/pith/5GYGPF6OU6TLC3NX7WZJREWADD","download_json":"https://pith.science/pith/5GYGPF6OU6TLC3NX7WZJREWADD.json","view_paper":"https://pith.science/paper/5GYGPF6O","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1812.06120&json=true","fetch_graph":"https://pith.science/api/pith-number/5GYGPF6OU6TLC3NX7WZJREWADD/graph.json","fetch_events":"https://pith.science/api/pith-number/5GYGPF6OU6TLC3NX7WZJREWADD/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/5GYGPF6OU6TLC3NX7WZJREWADD/action/timestamp_anchor","attest_storage":"https://pith.science/pith/5GYGPF6OU6TLC3NX7WZJREWADD/action/storage_attestation","attest_author":"https://pith.science/pith/5GYGPF6OU6TLC3NX7WZJREWADD/action/author_attestation","sign_citation":"https://pith.science/pith/5GYGPF6OU6TLC3NX7WZJREWADD/action/citation_signature","submit_replication":"https://pith.science/pith/5GYGPF6OU6TLC3NX7WZJREWADD/action/replication_record"}},"created_at":"2026-05-17T23:52:51.271984+00:00","updated_at":"2026-05-17T23:52:51.271984+00:00"}