{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:XFLLO2OX2NLWL33DMXZZH4QV5P","short_pith_number":"pith:XFLLO2OX","canonical_record":{"source":{"id":"1703.02702","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-03-08T04:58:51Z","cross_cats_sorted":["cs.AI","cs.MA","cs.RO"],"title_canon_sha256":"7be0479d91666ac67f703f9d81265ef867d14aa3c95a0be5833b60ef6ec8d3f6","abstract_canon_sha256":"f23388eb4880ebeea887ea458f5099f59cdf740d15a688467fbe81e718a80a54"},"schema_version":"1.0"},"canonical_sha256":"b956b769d7d35765ef6365f393f215ebdee5982b63723a28dd1c3fff2ed2fc41","source":{"kind":"arxiv","id":"1703.02702","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1703.02702","created_at":"2026-05-18T00:49:05Z"},{"alias_kind":"arxiv_version","alias_value":"1703.02702v1","created_at":"2026-05-18T00:49:05Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1703.02702","created_at":"2026-05-18T00:49:05Z"},{"alias_kind":"pith_short_12","alias_value":"XFLLO2OX2NLW","created_at":"2026-05-18T12:31:53Z"},{"alias_kind":"pith_short_16","alias_value":"XFLLO2OX2NLWL33D","created_at":"2026-05-18T12:31:53Z"},{"alias_kind":"pith_short_8","alias_value":"XFLLO2OX","created_at":"2026-05-18T12:31:53Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:XFLLO2OX2NLWL33DMXZZH4QV5P","target":"record","payload":{"canonical_record":{"source":{"id":"1703.02702","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-03-08T04:58:51Z","cross_cats_sorted":["cs.AI","cs.MA","cs.RO"],"title_canon_sha256":"7be0479d91666ac67f703f9d81265ef867d14aa3c95a0be5833b60ef6ec8d3f6","abstract_canon_sha256":"f23388eb4880ebeea887ea458f5099f59cdf740d15a688467fbe81e718a80a54"},"schema_version":"1.0"},"canonical_sha256":"b956b769d7d35765ef6365f393f215ebdee5982b63723a28dd1c3fff2ed2fc41","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:49:05.832769Z","signature_b64":"Q4/OtZZxXC3KXiDEdVkWxIfBUTmuQMEbiGAmhlnlY96Zw9PHnan6v786MnmcPFwfSgyd7W7WIQsINBYZgxBeAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"b956b769d7d35765ef6365f393f215ebdee5982b63723a28dd1c3fff2ed2fc41","last_reissued_at":"2026-05-18T00:49:05.832246Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:49:05.832246Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1703.02702","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:49:05Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"d5PlXFvLcCt5cSNU77Lh//uCQwKVMYg4K4mBZ19YpCo3TQou+NJ4VUQf8PWEgYKLZRvnHTFDeucksQrU9jU7DQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-21T06:49:28.997330Z"},"content_sha256":"8c973ddcb94ae42f68c16c17fe072da60c98a383c329c599f1843d81028afe2f","schema_version":"1.0","event_id":"sha256:8c973ddcb94ae42f68c16c17fe072da60c98a383c329c599f1843d81028afe2f"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:XFLLO2OX2NLWL33DMXZZH4QV5P","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Robust Adversarial Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.MA","cs.RO"],"primary_cat":"cs.LG","authors_text":"Abhinav Gupta, James Davidson, Lerrel Pinto, Rahul Sukthankar","submitted_at":"2017-03-08T04:58:51Z","abstract_excerpt":"Deep neural networks coupled with fast simulation and improved computation have led to recent successes in the field of reinforcement learning (RL). However, most current RL-based approaches fail to generalize since: (a) the gap between simulation and real world is so large that policy-learning approaches fail to transfer; (b) even if policy learning is done in real world, the data scarcity leads to failed generalization from training to test scenarios (e.g., due to different friction or object masses). Inspired from H-infinity control methods, we note that both modeling errors and differences"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1703.02702","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:49:05Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"gQ2B8jAH6lcJupypQLEIirjVeRhNUkoKoSt8Gfro3WzRT4zR7uIKXzrjtTgmEUC/D5Lplqh86gIgNJEDySNPDw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-21T06:49:28.997686Z"},"content_sha256":"6b83524c59cfec4c4feee06ff392e9285770fcac311cf4c9911c42716f148ac2","schema_version":"1.0","event_id":"sha256:6b83524c59cfec4c4feee06ff392e9285770fcac311cf4c9911c42716f148ac2"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/XFLLO2OX2NLWL33DMXZZH4QV5P/bundle.json","state_url":"https://pith.science/pith/XFLLO2OX2NLWL33DMXZZH4QV5P/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/XFLLO2OX2NLWL33DMXZZH4QV5P/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-21T06:49:28Z","links":{"resolver":"https://pith.science/pith/XFLLO2OX2NLWL33DMXZZH4QV5P","bundle":"https://pith.science/pith/XFLLO2OX2NLWL33DMXZZH4QV5P/bundle.json","state":"https://pith.science/pith/XFLLO2OX2NLWL33DMXZZH4QV5P/state.json","well_known_bundle":"https://pith.science/.well-known/pith/XFLLO2OX2NLWL33DMXZZH4QV5P/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:XFLLO2OX2NLWL33DMXZZH4QV5P","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"f23388eb4880ebeea887ea458f5099f59cdf740d15a688467fbe81e718a80a54","cross_cats_sorted":["cs.AI","cs.MA","cs.RO"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-03-08T04:58:51Z","title_canon_sha256":"7be0479d91666ac67f703f9d81265ef867d14aa3c95a0be5833b60ef6ec8d3f6"},"schema_version":"1.0","source":{"id":"1703.02702","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1703.02702","created_at":"2026-05-18T00:49:05Z"},{"alias_kind":"arxiv_version","alias_value":"1703.02702v1","created_at":"2026-05-18T00:49:05Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1703.02702","created_at":"2026-05-18T00:49:05Z"},{"alias_kind":"pith_short_12","alias_value":"XFLLO2OX2NLW","created_at":"2026-05-18T12:31:53Z"},{"alias_kind":"pith_short_16","alias_value":"XFLLO2OX2NLWL33D","created_at":"2026-05-18T12:31:53Z"},{"alias_kind":"pith_short_8","alias_value":"XFLLO2OX","created_at":"2026-05-18T12:31:53Z"}],"graph_snapshots":[{"event_id":"sha256:6b83524c59cfec4c4feee06ff392e9285770fcac311cf4c9911c42716f148ac2","target":"graph","created_at":"2026-05-18T00:49:05Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Deep neural networks coupled with fast simulation and improved computation have led to recent successes in the field of reinforcement learning (RL). However, most current RL-based approaches fail to generalize since: (a) the gap between simulation and real world is so large that policy-learning approaches fail to transfer; (b) even if policy learning is done in real world, the data scarcity leads to failed generalization from training to test scenarios (e.g., due to different friction or object masses). Inspired from H-infinity control methods, we note that both modeling errors and differences","authors_text":"Abhinav Gupta, James Davidson, Lerrel Pinto, Rahul Sukthankar","cross_cats":["cs.AI","cs.MA","cs.RO"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-03-08T04:58:51Z","title":"Robust Adversarial Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1703.02702","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:8c973ddcb94ae42f68c16c17fe072da60c98a383c329c599f1843d81028afe2f","target":"record","created_at":"2026-05-18T00:49:05Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"f23388eb4880ebeea887ea458f5099f59cdf740d15a688467fbe81e718a80a54","cross_cats_sorted":["cs.AI","cs.MA","cs.RO"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-03-08T04:58:51Z","title_canon_sha256":"7be0479d91666ac67f703f9d81265ef867d14aa3c95a0be5833b60ef6ec8d3f6"},"schema_version":"1.0","source":{"id":"1703.02702","kind":"arxiv","version":1}},"canonical_sha256":"b956b769d7d35765ef6365f393f215ebdee5982b63723a28dd1c3fff2ed2fc41","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"b956b769d7d35765ef6365f393f215ebdee5982b63723a28dd1c3fff2ed2fc41","first_computed_at":"2026-05-18T00:49:05.832246Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:49:05.832246Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"Q4/OtZZxXC3KXiDEdVkWxIfBUTmuQMEbiGAmhlnlY96Zw9PHnan6v786MnmcPFwfSgyd7W7WIQsINBYZgxBeAg==","signature_status":"signed_v1","signed_at":"2026-05-18T00:49:05.832769Z","signed_message":"canonical_sha256_bytes"},"source_id":"1703.02702","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:8c973ddcb94ae42f68c16c17fe072da60c98a383c329c599f1843d81028afe2f","sha256:6b83524c59cfec4c4feee06ff392e9285770fcac311cf4c9911c42716f148ac2"],"state_sha256":"bdaea6a686d65924992ee492955cd9784186a66a6f39825e29a954fddd59967c"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"a3FWROlIYd2Kn5DPlCPJ5XxBZnAt6cT0XMWxIC6Rd9jBYxqlTMUx3e0g9COfpPV4QJhFGqWaKouT4hKxPSbVAA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-21T06:49:28.999717Z","bundle_sha256":"e369225e118fbf849d15ffc813527f2f802f9ff401769bbd78eb1c04898017c8"}}