{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:OVTFYVE7PK55U6NQBTSSUZRZ7K","short_pith_number":"pith:OVTFYVE7","schema_version":"1.0","canonical_sha256":"75665c549f7abbda79b00ce52a6639fab45230fec3dbae578ad17422938d3fb3","source":{"kind":"arxiv","id":"1812.01647","version":1},"attestation_state":"computed","paper":{"title":"Rigorous Agent Evaluation: An Adversarial Approach to Uncover Catastrophic Failures","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CR","stat.ML"],"primary_cat":"cs.LG","authors_text":"Ananya Kumar, Avraham Ruderman, Csaba Szepesvari, Jonathan Uesato, Keith Anderson, Krishmamurthy (Dj) Dvijotham, Nicolas Heess, Pushmeet Kohli, Tom Erez","submitted_at":"2018-12-04T19:39:53Z","abstract_excerpt":"This paper addresses the problem of evaluating learning systems in safety critical domains such as autonomous driving, where failures can have catastrophic consequences. We focus on two problems: searching for scenarios when learned agents fail and assessing their probability of failure. The standard method for agent evaluation in reinforcement learning, Vanilla Monte Carlo, can miss failures entirely, leading to the deployment of unsafe agents. We demonstrate this is an issue for current agents, where even matching the compute used for training is sometimes insufficient for evaluation. To add"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1812.01647","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-12-04T19:39:53Z","cross_cats_sorted":["cs.CR","stat.ML"],"title_canon_sha256":"7a75396250e799f548a48e58739f29b06523483491e7856e7211b0e08051d02f","abstract_canon_sha256":"824e70075aff97c97f3f57aa829db2ab6d643f568c3bc6006c4ea24f79167936"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:59:00.998106Z","signature_b64":"3xlOYCcXR+KKgUbC0Aru/q5A9/8TNdzZP8oX/X/gm6LyiWSiusVd9tSw3H8j53rduvRIXXmLl1ZT2LAklEyYAA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"75665c549f7abbda79b00ce52a6639fab45230fec3dbae578ad17422938d3fb3","last_reissued_at":"2026-05-17T23:59:00.997639Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:59:00.997639Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Rigorous Agent Evaluation: An Adversarial Approach to Uncover Catastrophic Failures","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CR","stat.ML"],"primary_cat":"cs.LG","authors_text":"Ananya Kumar, Avraham Ruderman, Csaba Szepesvari, Jonathan Uesato, Keith Anderson, Krishmamurthy (Dj) Dvijotham, Nicolas Heess, Pushmeet Kohli, Tom Erez","submitted_at":"2018-12-04T19:39:53Z","abstract_excerpt":"This paper addresses the problem of evaluating learning systems in safety critical domains such as autonomous driving, where failures can have catastrophic consequences. We focus on two problems: searching for scenarios when learned agents fail and assessing their probability of failure. The standard method for agent evaluation in reinforcement learning, Vanilla Monte Carlo, can miss failures entirely, leading to the deployment of unsafe agents. We demonstrate this is an issue for current agents, where even matching the compute used for training is sometimes insufficient for evaluation. To add"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1812.01647","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1812.01647","created_at":"2026-05-17T23:59:00.997720+00:00"},{"alias_kind":"arxiv_version","alias_value":"1812.01647v1","created_at":"2026-05-17T23:59:00.997720+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1812.01647","created_at":"2026-05-17T23:59:00.997720+00:00"},{"alias_kind":"pith_short_12","alias_value":"OVTFYVE7PK55","created_at":"2026-05-18T12:32:43.782077+00:00"},{"alias_kind":"pith_short_16","alias_value":"OVTFYVE7PK55U6NQ","created_at":"2026-05-18T12:32:43.782077+00:00"},{"alias_kind":"pith_short_8","alias_value":"OVTFYVE7","created_at":"2026-05-18T12:32:43.782077+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":2,"internal_anchor_count":0,"sample":[{"citing_arxiv_id":"2604.08036","citing_title":"PriPG-RL: Privileged Planner-Guided Reinforcement Learning for Partially Observable Systems with Anytime-Feasible MPC","ref_index":11,"is_internal_anchor":false},{"citing_arxiv_id":"2604.19018","citing_title":"Local Linearity of LLMs Enables Activation Steering via Model-Based Linear Optimal Control","ref_index":9,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/OVTFYVE7PK55U6NQBTSSUZRZ7K","json":"https://pith.science/pith/OVTFYVE7PK55U6NQBTSSUZRZ7K.json","graph_json":"https://pith.science/api/pith-number/OVTFYVE7PK55U6NQBTSSUZRZ7K/graph.json","events_json":"https://pith.science/api/pith-number/OVTFYVE7PK55U6NQBTSSUZRZ7K/events.json","paper":"https://pith.science/paper/OVTFYVE7"},"agent_actions":{"view_html":"https://pith.science/pith/OVTFYVE7PK55U6NQBTSSUZRZ7K","download_json":"https://pith.science/pith/OVTFYVE7PK55U6NQBTSSUZRZ7K.json","view_paper":"https://pith.science/paper/OVTFYVE7","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1812.01647&json=true","fetch_graph":"https://pith.science/api/pith-number/OVTFYVE7PK55U6NQBTSSUZRZ7K/graph.json","fetch_events":"https://pith.science/api/pith-number/OVTFYVE7PK55U6NQBTSSUZRZ7K/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/OVTFYVE7PK55U6NQBTSSUZRZ7K/action/timestamp_anchor","attest_storage":"https://pith.science/pith/OVTFYVE7PK55U6NQBTSSUZRZ7K/action/storage_attestation","attest_author":"https://pith.science/pith/OVTFYVE7PK55U6NQBTSSUZRZ7K/action/author_attestation","sign_citation":"https://pith.science/pith/OVTFYVE7PK55U6NQBTSSUZRZ7K/action/citation_signature","submit_replication":"https://pith.science/pith/OVTFYVE7PK55U6NQBTSSUZRZ7K/action/replication_record"}},"created_at":"2026-05-17T23:59:00.997720+00:00","updated_at":"2026-05-17T23:59:00.997720+00:00"}