{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2019:SMWJTN7SBPBDP6FPOZOLT3N3GE","short_pith_number":"pith:SMWJTN7S","schema_version":"1.0","canonical_sha256":"932c99b7f20bc237f8af765cb9edbb31161ba3dedfea4652f670116cf464ab74","source":{"kind":"arxiv","id":"1905.04716","version":1},"attestation_state":"computed","paper":{"title":"Diagnosing Reinforcement Learning for Traffic Signal Control","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Guanjie Zheng, Hua Wei, Kai Xu, Nan Xu, Vikash Gayah, Xinshi Zang, Zhengyao Yu, Zhenhui Li","submitted_at":"2019-05-12T13:03:23Z","abstract_excerpt":"With the increasing availability of traffic data and advance of deep reinforcement learning techniques, there is an emerging trend of employing reinforcement learning (RL) for traffic signal control. A key question for applying RL to traffic signal control is how to define the reward and state. The ultimate objective in traffic signal control is to minimize the travel time, which is difficult to reach directly. Hence, existing studies often define reward as an ad-hoc weighted linear combination of several traffic measures. However, there is no guarantee that the travel time will be optimized w"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1905.04716","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-05-12T13:03:23Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"3f7adc19538c24e3a20f25c9eec9de344e28a4e44bbdc194ad5405e8cc3cf768","abstract_canon_sha256":"c8062a5f7dedd1380c5a581769ba54d3aa7be327ceee091f11a27f4f814d2cd0"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:46:23.816541Z","signature_b64":"KG14xvg5x8K7d3XKwzKFNkB3aBS7LLsfaerlMv8dGqQ0CwMp4J3vH+IsIb+m7pLhgXsnT/hjynQ709tuvUuGBw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"932c99b7f20bc237f8af765cb9edbb31161ba3dedfea4652f670116cf464ab74","last_reissued_at":"2026-05-17T23:46:23.815841Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:46:23.815841Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Diagnosing Reinforcement Learning for Traffic Signal Control","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Guanjie Zheng, Hua Wei, Kai Xu, Nan Xu, Vikash Gayah, Xinshi Zang, Zhengyao Yu, Zhenhui Li","submitted_at":"2019-05-12T13:03:23Z","abstract_excerpt":"With the increasing availability of traffic data and advance of deep reinforcement learning techniques, there is an emerging trend of employing reinforcement learning (RL) for traffic signal control. A key question for applying RL to traffic signal control is how to define the reward and state. The ultimate objective in traffic signal control is to minimize the travel time, which is difficult to reach directly. Hence, existing studies often define reward as an ad-hoc weighted linear combination of several traffic measures. However, there is no guarantee that the travel time will be optimized w"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1905.04716","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1905.04716","created_at":"2026-05-17T23:46:23.815953+00:00"},{"alias_kind":"arxiv_version","alias_value":"1905.04716v1","created_at":"2026-05-17T23:46:23.815953+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1905.04716","created_at":"2026-05-17T23:46:23.815953+00:00"},{"alias_kind":"pith_short_12","alias_value":"SMWJTN7SBPBD","created_at":"2026-05-18T12:33:27.125529+00:00"},{"alias_kind":"pith_short_16","alias_value":"SMWJTN7SBPBDP6FP","created_at":"2026-05-18T12:33:27.125529+00:00"},{"alias_kind":"pith_short_8","alias_value":"SMWJTN7S","created_at":"2026-05-18T12:33:27.125529+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/SMWJTN7SBPBDP6FPOZOLT3N3GE","json":"https://pith.science/pith/SMWJTN7SBPBDP6FPOZOLT3N3GE.json","graph_json":"https://pith.science/api/pith-number/SMWJTN7SBPBDP6FPOZOLT3N3GE/graph.json","events_json":"https://pith.science/api/pith-number/SMWJTN7SBPBDP6FPOZOLT3N3GE/events.json","paper":"https://pith.science/paper/SMWJTN7S"},"agent_actions":{"view_html":"https://pith.science/pith/SMWJTN7SBPBDP6FPOZOLT3N3GE","download_json":"https://pith.science/pith/SMWJTN7SBPBDP6FPOZOLT3N3GE.json","view_paper":"https://pith.science/paper/SMWJTN7S","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1905.04716&json=true","fetch_graph":"https://pith.science/api/pith-number/SMWJTN7SBPBDP6FPOZOLT3N3GE/graph.json","fetch_events":"https://pith.science/api/pith-number/SMWJTN7SBPBDP6FPOZOLT3N3GE/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/SMWJTN7SBPBDP6FPOZOLT3N3GE/action/timestamp_anchor","attest_storage":"https://pith.science/pith/SMWJTN7SBPBDP6FPOZOLT3N3GE/action/storage_attestation","attest_author":"https://pith.science/pith/SMWJTN7SBPBDP6FPOZOLT3N3GE/action/author_attestation","sign_citation":"https://pith.science/pith/SMWJTN7SBPBDP6FPOZOLT3N3GE/action/citation_signature","submit_replication":"https://pith.science/pith/SMWJTN7SBPBDP6FPOZOLT3N3GE/action/replication_record"}},"created_at":"2026-05-17T23:46:23.815953+00:00","updated_at":"2026-05-17T23:46:23.815953+00:00"}