{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:T6K6YKALC5BY24OGCXNXHXEOWY","short_pith_number":"pith:T6K6YKAL","schema_version":"1.0","canonical_sha256":"9f95ec280b17438d71c615db73dc8eb60ac647f8d4bdc369ee49655ed3dec538","source":{"kind":"arxiv","id":"2606.12896","version":1},"attestation_state":"computed","paper":{"title":"PolicyGuard: Towards Test-time and Step-level Adversary Defense for Reinforcement Learning Agent","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.CR"],"primary_cat":"cs.LG","authors_text":"Junfeng Guo Heng Huang","submitted_at":"2026-06-11T04:54:28Z","abstract_excerpt":"While real-world applications of reinforcement learning (RL) are becoming increasingly popular, the security of RL systems deserve more attention and exploration. In particular, recent work has revealed that RL agents are vulnerable to backdoor attacks, where a victim agent behaves normally under standard conditions but executes malicious actions when a specific trigger is activated. Existing backdoor defenses for RL either require access to the agent's internal parameters, operate only at the model or trajectory level, or are limited to specific attack types. To ensure the security of RL agen"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.12896","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-06-11T04:54:28Z","cross_cats_sorted":["cs.AI","cs.CR"],"title_canon_sha256":"74371a7260b9a59cd67c0dd43a31fc23ecd6c958120a35061d7ca98188a0158b","abstract_canon_sha256":"8eccb9c476d991272bdbee1cc22eeb749ccdbb802fecece8be8bf063da9f96b5"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-12T01:08:57.407387Z","signature_b64":"okDVw3Vy895HUWP+SjUUloqEVOp9HxW+ZStnuF/MAbCx0seEgNzwTRTDZoO8GbtbQR8/pPix7iTccA3QN+zZDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"9f95ec280b17438d71c615db73dc8eb60ac647f8d4bdc369ee49655ed3dec538","last_reissued_at":"2026-06-12T01:08:57.406654Z","signature_status":"signed_v1","first_computed_at":"2026-06-12T01:08:57.406654Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"PolicyGuard: Towards Test-time and Step-level Adversary Defense for Reinforcement Learning Agent","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.CR"],"primary_cat":"cs.LG","authors_text":"Junfeng Guo Heng Huang","submitted_at":"2026-06-11T04:54:28Z","abstract_excerpt":"While real-world applications of reinforcement learning (RL) are becoming increasingly popular, the security of RL systems deserve more attention and exploration. In particular, recent work has revealed that RL agents are vulnerable to backdoor attacks, where a victim agent behaves normally under standard conditions but executes malicious actions when a specific trigger is activated. Existing backdoor defenses for RL either require access to the agent's internal parameters, operate only at the model or trajectory level, or are limited to specific attack types. To ensure the security of RL agen"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.12896","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.12896/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.12896","created_at":"2026-06-12T01:08:57.406756+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.12896v1","created_at":"2026-06-12T01:08:57.406756+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.12896","created_at":"2026-06-12T01:08:57.406756+00:00"},{"alias_kind":"pith_short_12","alias_value":"T6K6YKALC5BY","created_at":"2026-06-12T01:08:57.406756+00:00"},{"alias_kind":"pith_short_16","alias_value":"T6K6YKALC5BY24OG","created_at":"2026-06-12T01:08:57.406756+00:00"},{"alias_kind":"pith_short_8","alias_value":"T6K6YKAL","created_at":"2026-06-12T01:08:57.406756+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/T6K6YKALC5BY24OGCXNXHXEOWY","json":"https://pith.science/pith/T6K6YKALC5BY24OGCXNXHXEOWY.json","graph_json":"https://pith.science/api/pith-number/T6K6YKALC5BY24OGCXNXHXEOWY/graph.json","events_json":"https://pith.science/api/pith-number/T6K6YKALC5BY24OGCXNXHXEOWY/events.json","paper":"https://pith.science/paper/T6K6YKAL"},"agent_actions":{"view_html":"https://pith.science/pith/T6K6YKALC5BY24OGCXNXHXEOWY","download_json":"https://pith.science/pith/T6K6YKALC5BY24OGCXNXHXEOWY.json","view_paper":"https://pith.science/paper/T6K6YKAL","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.12896&json=true","fetch_graph":"https://pith.science/api/pith-number/T6K6YKALC5BY24OGCXNXHXEOWY/graph.json","fetch_events":"https://pith.science/api/pith-number/T6K6YKALC5BY24OGCXNXHXEOWY/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/T6K6YKALC5BY24OGCXNXHXEOWY/action/timestamp_anchor","attest_storage":"https://pith.science/pith/T6K6YKALC5BY24OGCXNXHXEOWY/action/storage_attestation","attest_author":"https://pith.science/pith/T6K6YKALC5BY24OGCXNXHXEOWY/action/author_attestation","sign_citation":"https://pith.science/pith/T6K6YKALC5BY24OGCXNXHXEOWY/action/citation_signature","submit_replication":"https://pith.science/pith/T6K6YKALC5BY24OGCXNXHXEOWY/action/replication_record"}},"created_at":"2026-06-12T01:08:57.406756+00:00","updated_at":"2026-06-12T01:08:57.406756+00:00"}