{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2019:HLGTIBNH5E3A5OFTH4JBV645B7","short_pith_number":"pith:HLGTIBNH","schema_version":"1.0","canonical_sha256":"3acd3405a7e9360eb8b33f121afb9d0fea03beef93db0c50575f4fba1a6f1e5f","source":{"kind":"arxiv","id":"1903.06638","version":1},"attestation_state":"computed","paper":{"title":"TrojDRL: Trojan Attacks on Deep Reinforcement Learning Agents","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG","stat.ML"],"primary_cat":"cs.CR","authors_text":"Kacper Wardega, Panagiota Kiourti, Susmit Jha, Wenchao Li","submitted_at":"2019-03-01T04:17:32Z","abstract_excerpt":"Recent work has identified that classification models implemented as neural networks are vulnerable to data-poisoning and Trojan attacks at training time. In this work, we show that these training-time vulnerabilities extend to deep reinforcement learning (DRL) agents and can be exploited by an adversary with access to the training process. In particular, we focus on Trojan attacks that augment the function of reinforcement learning policies with hidden behaviors. We demonstrate that such attacks can be implemented through minuscule data poisoning (as little as 0.025% of the training data) and"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1903.06638","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CR","submitted_at":"2019-03-01T04:17:32Z","cross_cats_sorted":["cs.LG","stat.ML"],"title_canon_sha256":"e978e45e85679d537502a675ae7933356a105ab626bb7cb98fe5208bcdf5a3a7","abstract_canon_sha256":"46025c5aad3af83f8059a46fe745eff79c1bced11dbead972962163710d1fed9"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:51:10.263664Z","signature_b64":"0hr4ijDX+6Hbsil8ZAVig0y/hBVve7D/GlEQrrr+9ScbOvW01dyvpvnnTCvoBFilfWTV1iDAOQSmwWWQ2mVgDw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"3acd3405a7e9360eb8b33f121afb9d0fea03beef93db0c50575f4fba1a6f1e5f","last_reissued_at":"2026-05-17T23:51:10.263063Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:51:10.263063Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"TrojDRL: Trojan Attacks on Deep Reinforcement Learning Agents","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG","stat.ML"],"primary_cat":"cs.CR","authors_text":"Kacper Wardega, Panagiota Kiourti, Susmit Jha, Wenchao Li","submitted_at":"2019-03-01T04:17:32Z","abstract_excerpt":"Recent work has identified that classification models implemented as neural networks are vulnerable to data-poisoning and Trojan attacks at training time. In this work, we show that these training-time vulnerabilities extend to deep reinforcement learning (DRL) agents and can be exploited by an adversary with access to the training process. In particular, we focus on Trojan attacks that augment the function of reinforcement learning policies with hidden behaviors. We demonstrate that such attacks can be implemented through minuscule data poisoning (as little as 0.025% of the training data) and"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1903.06638","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1903.06638","created_at":"2026-05-17T23:51:10.263156+00:00"},{"alias_kind":"arxiv_version","alias_value":"1903.06638v1","created_at":"2026-05-17T23:51:10.263156+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1903.06638","created_at":"2026-05-17T23:51:10.263156+00:00"},{"alias_kind":"pith_short_12","alias_value":"HLGTIBNH5E3A","created_at":"2026-05-18T12:33:18.533446+00:00"},{"alias_kind":"pith_short_16","alias_value":"HLGTIBNH5E3A5OFT","created_at":"2026-05-18T12:33:18.533446+00:00"},{"alias_kind":"pith_short_8","alias_value":"HLGTIBNH","created_at":"2026-05-18T12:33:18.533446+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":2,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"2601.14104","citing_title":"When Backdoors Meet Partial Observability: Attacking Real-World Reinforcement Learning","ref_index":10,"is_internal_anchor":true},{"citing_arxiv_id":"2605.05977","citing_title":"BehaviorGuard: Online Backdoor Defense for Deep Reinforcement Learning","ref_index":18,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/HLGTIBNH5E3A5OFTH4JBV645B7","json":"https://pith.science/pith/HLGTIBNH5E3A5OFTH4JBV645B7.json","graph_json":"https://pith.science/api/pith-number/HLGTIBNH5E3A5OFTH4JBV645B7/graph.json","events_json":"https://pith.science/api/pith-number/HLGTIBNH5E3A5OFTH4JBV645B7/events.json","paper":"https://pith.science/paper/HLGTIBNH"},"agent_actions":{"view_html":"https://pith.science/pith/HLGTIBNH5E3A5OFTH4JBV645B7","download_json":"https://pith.science/pith/HLGTIBNH5E3A5OFTH4JBV645B7.json","view_paper":"https://pith.science/paper/HLGTIBNH","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1903.06638&json=true","fetch_graph":"https://pith.science/api/pith-number/HLGTIBNH5E3A5OFTH4JBV645B7/graph.json","fetch_events":"https://pith.science/api/pith-number/HLGTIBNH5E3A5OFTH4JBV645B7/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/HLGTIBNH5E3A5OFTH4JBV645B7/action/timestamp_anchor","attest_storage":"https://pith.science/pith/HLGTIBNH5E3A5OFTH4JBV645B7/action/storage_attestation","attest_author":"https://pith.science/pith/HLGTIBNH5E3A5OFTH4JBV645B7/action/author_attestation","sign_citation":"https://pith.science/pith/HLGTIBNH5E3A5OFTH4JBV645B7/action/citation_signature","submit_replication":"https://pith.science/pith/HLGTIBNH5E3A5OFTH4JBV645B7/action/replication_record"}},"created_at":"2026-05-17T23:51:10.263156+00:00","updated_at":"2026-05-17T23:51:10.263156+00:00"}