{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:XFLLO2OX2NLWL33DMXZZH4QV5P","short_pith_number":"pith:XFLLO2OX","schema_version":"1.0","canonical_sha256":"b956b769d7d35765ef6365f393f215ebdee5982b63723a28dd1c3fff2ed2fc41","source":{"kind":"arxiv","id":"1703.02702","version":1},"attestation_state":"computed","paper":{"title":"Robust Adversarial Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.MA","cs.RO"],"primary_cat":"cs.LG","authors_text":"Abhinav Gupta, James Davidson, Lerrel Pinto, Rahul Sukthankar","submitted_at":"2017-03-08T04:58:51Z","abstract_excerpt":"Deep neural networks coupled with fast simulation and improved computation have led to recent successes in the field of reinforcement learning (RL). However, most current RL-based approaches fail to generalize since: (a) the gap between simulation and real world is so large that policy-learning approaches fail to transfer; (b) even if policy learning is done in real world, the data scarcity leads to failed generalization from training to test scenarios (e.g., due to different friction or object masses). Inspired from H-infinity control methods, we note that both modeling errors and differences"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1703.02702","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-03-08T04:58:51Z","cross_cats_sorted":["cs.AI","cs.MA","cs.RO"],"title_canon_sha256":"7be0479d91666ac67f703f9d81265ef867d14aa3c95a0be5833b60ef6ec8d3f6","abstract_canon_sha256":"f23388eb4880ebeea887ea458f5099f59cdf740d15a688467fbe81e718a80a54"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:49:05.832769Z","signature_b64":"Q4/OtZZxXC3KXiDEdVkWxIfBUTmuQMEbiGAmhlnlY96Zw9PHnan6v786MnmcPFwfSgyd7W7WIQsINBYZgxBeAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"b956b769d7d35765ef6365f393f215ebdee5982b63723a28dd1c3fff2ed2fc41","last_reissued_at":"2026-05-18T00:49:05.832246Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:49:05.832246Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Robust Adversarial Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.MA","cs.RO"],"primary_cat":"cs.LG","authors_text":"Abhinav Gupta, James Davidson, Lerrel Pinto, Rahul Sukthankar","submitted_at":"2017-03-08T04:58:51Z","abstract_excerpt":"Deep neural networks coupled with fast simulation and improved computation have led to recent successes in the field of reinforcement learning (RL). However, most current RL-based approaches fail to generalize since: (a) the gap between simulation and real world is so large that policy-learning approaches fail to transfer; (b) even if policy learning is done in real world, the data scarcity leads to failed generalization from training to test scenarios (e.g., due to different friction or object masses). Inspired from H-infinity control methods, we note that both modeling errors and differences"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1703.02702","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1703.02702","created_at":"2026-05-18T00:49:05.832331+00:00"},{"alias_kind":"arxiv_version","alias_value":"1703.02702v1","created_at":"2026-05-18T00:49:05.832331+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1703.02702","created_at":"2026-05-18T00:49:05.832331+00:00"},{"alias_kind":"pith_short_12","alias_value":"XFLLO2OX2NLW","created_at":"2026-05-18T12:31:53.515858+00:00"},{"alias_kind":"pith_short_16","alias_value":"XFLLO2OX2NLWL33D","created_at":"2026-05-18T12:31:53.515858+00:00"},{"alias_kind":"pith_short_8","alias_value":"XFLLO2OX","created_at":"2026-05-18T12:31:53.515858+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"2605.18803","citing_title":"PROWL: Prioritized Regret-Driven Optimization for World Model Learning","ref_index":25,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/XFLLO2OX2NLWL33DMXZZH4QV5P","json":"https://pith.science/pith/XFLLO2OX2NLWL33DMXZZH4QV5P.json","graph_json":"https://pith.science/api/pith-number/XFLLO2OX2NLWL33DMXZZH4QV5P/graph.json","events_json":"https://pith.science/api/pith-number/XFLLO2OX2NLWL33DMXZZH4QV5P/events.json","paper":"https://pith.science/paper/XFLLO2OX"},"agent_actions":{"view_html":"https://pith.science/pith/XFLLO2OX2NLWL33DMXZZH4QV5P","download_json":"https://pith.science/pith/XFLLO2OX2NLWL33DMXZZH4QV5P.json","view_paper":"https://pith.science/paper/XFLLO2OX","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1703.02702&json=true","fetch_graph":"https://pith.science/api/pith-number/XFLLO2OX2NLWL33DMXZZH4QV5P/graph.json","fetch_events":"https://pith.science/api/pith-number/XFLLO2OX2NLWL33DMXZZH4QV5P/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/XFLLO2OX2NLWL33DMXZZH4QV5P/action/timestamp_anchor","attest_storage":"https://pith.science/pith/XFLLO2OX2NLWL33DMXZZH4QV5P/action/storage_attestation","attest_author":"https://pith.science/pith/XFLLO2OX2NLWL33DMXZZH4QV5P/action/author_attestation","sign_citation":"https://pith.science/pith/XFLLO2OX2NLWL33DMXZZH4QV5P/action/citation_signature","submit_replication":"https://pith.science/pith/XFLLO2OX2NLWL33DMXZZH4QV5P/action/replication_record"}},"created_at":"2026-05-18T00:49:05.832331+00:00","updated_at":"2026-05-18T00:49:05.832331+00:00"}