{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:6RF3RVKLZXBGSYI5RFHQ3FD3SR","short_pith_number":"pith:6RF3RVKL","canonical_record":{"source":{"id":"1903.11524","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-03-27T16:22:48Z","cross_cats_sorted":["cs.AI","cs.RO","stat.ML"],"title_canon_sha256":"e6de3764e0742475d8e015133b7ecedf0554a408517649335d40f2a4e619f925","abstract_canon_sha256":"4370f4fccac71ca15d5b9668600dffdcf229d42dbe2fbaad93cecee0b4614576"},"schema_version":"1.0"},"canonical_sha256":"f44bb8d54bcdc269611d894f0d947b947fa1174f133b27ec6d0d9eca28841ece","source":{"kind":"arxiv","id":"1903.11524","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1903.11524","created_at":"2026-05-17T23:50:03Z"},{"alias_kind":"arxiv_version","alias_value":"1903.11524v1","created_at":"2026-05-17T23:50:03Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1903.11524","created_at":"2026-05-17T23:50:03Z"},{"alias_kind":"pith_short_12","alias_value":"6RF3RVKLZXBG","created_at":"2026-05-18T12:33:10Z"},{"alias_kind":"pith_short_16","alias_value":"6RF3RVKLZXBGSYI5","created_at":"2026-05-18T12:33:10Z"},{"alias_kind":"pith_short_8","alias_value":"6RF3RVKL","created_at":"2026-05-18T12:33:10Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:6RF3RVKLZXBGSYI5RFHQ3FD3SR","target":"record","payload":{"canonical_record":{"source":{"id":"1903.11524","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-03-27T16:22:48Z","cross_cats_sorted":["cs.AI","cs.RO","stat.ML"],"title_canon_sha256":"e6de3764e0742475d8e015133b7ecedf0554a408517649335d40f2a4e619f925","abstract_canon_sha256":"4370f4fccac71ca15d5b9668600dffdcf229d42dbe2fbaad93cecee0b4614576"},"schema_version":"1.0"},"canonical_sha256":"f44bb8d54bcdc269611d894f0d947b947fa1174f133b27ec6d0d9eca28841ece","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:50:03.067027Z","signature_b64":"oX4lBDkrqIi5wkT6n5m+wq+Sdx5VVusvOxkCW/IyiTTg4UIS+1U6IWDlTgRV7PhMmUiLX0xWwA2+F1nU6BhBCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"f44bb8d54bcdc269611d894f0d947b947fa1174f133b27ec6d0d9eca28841ece","last_reissued_at":"2026-05-17T23:50:03.066528Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:50:03.066528Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1903.11524","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:50:03Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"GnRqPID1ogNLZ6gz1ZGXpRkkw3btuKdEVoUjsCx0AlJzLbpyQTca+GKI6zEvUPfY6/zUbKpvYS9UNzuHFlCsDw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T10:54:25.005091Z"},"content_sha256":"594e350621cf2735dd20d4fa5097e43545b50953c103fdb5e76f5fd720704b48","schema_version":"1.0","event_id":"sha256:594e350621cf2735dd20d4fa5097e43545b50953c103fdb5e76f5fd720704b48"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:6RF3RVKLZXBGSYI5RFHQ3FD3SR","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Autoregressive Policies for Continuous Control Deep Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.RO","stat.ML"],"primary_cat":"cs.LG","authors_text":"A. Rupam Mahmood, Dmytro Korenkevych, Gautham Vasan, James Bergstra","submitted_at":"2019-03-27T16:22:48Z","abstract_excerpt":"Reinforcement learning algorithms rely on exploration to discover new behaviors, which is typically achieved by following a stochastic policy. In continuous control tasks, policies with a Gaussian distribution have been widely adopted. Gaussian exploration however does not result in smooth trajectories that generally correspond to safe and rewarding behaviors in practical tasks. In addition, Gaussian policies do not result in an effective exploration of an environment and become increasingly inefficient as the action rate increases. This contributes to a low sample efficiency often observed in"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1903.11524","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:50:03Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"TPzKWQR3+t8trQuJX2k8z9dseqNODSR8oGqcEUE3efcy9JWh+rrsrEM86KshOPL0IFxmjOozo1lhN5nNO0hDCw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T10:54:25.005764Z"},"content_sha256":"3160c0bf4700d56ec12f0b78d18d31fadfe514ec5eacf7a7cc0ed0d280bac80f","schema_version":"1.0","event_id":"sha256:3160c0bf4700d56ec12f0b78d18d31fadfe514ec5eacf7a7cc0ed0d280bac80f"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/6RF3RVKLZXBGSYI5RFHQ3FD3SR/bundle.json","state_url":"https://pith.science/pith/6RF3RVKLZXBGSYI5RFHQ3FD3SR/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/6RF3RVKLZXBGSYI5RFHQ3FD3SR/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-27T10:54:25Z","links":{"resolver":"https://pith.science/pith/6RF3RVKLZXBGSYI5RFHQ3FD3SR","bundle":"https://pith.science/pith/6RF3RVKLZXBGSYI5RFHQ3FD3SR/bundle.json","state":"https://pith.science/pith/6RF3RVKLZXBGSYI5RFHQ3FD3SR/state.json","well_known_bundle":"https://pith.science/.well-known/pith/6RF3RVKLZXBGSYI5RFHQ3FD3SR/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:6RF3RVKLZXBGSYI5RFHQ3FD3SR","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"4370f4fccac71ca15d5b9668600dffdcf229d42dbe2fbaad93cecee0b4614576","cross_cats_sorted":["cs.AI","cs.RO","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-03-27T16:22:48Z","title_canon_sha256":"e6de3764e0742475d8e015133b7ecedf0554a408517649335d40f2a4e619f925"},"schema_version":"1.0","source":{"id":"1903.11524","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1903.11524","created_at":"2026-05-17T23:50:03Z"},{"alias_kind":"arxiv_version","alias_value":"1903.11524v1","created_at":"2026-05-17T23:50:03Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1903.11524","created_at":"2026-05-17T23:50:03Z"},{"alias_kind":"pith_short_12","alias_value":"6RF3RVKLZXBG","created_at":"2026-05-18T12:33:10Z"},{"alias_kind":"pith_short_16","alias_value":"6RF3RVKLZXBGSYI5","created_at":"2026-05-18T12:33:10Z"},{"alias_kind":"pith_short_8","alias_value":"6RF3RVKL","created_at":"2026-05-18T12:33:10Z"}],"graph_snapshots":[{"event_id":"sha256:3160c0bf4700d56ec12f0b78d18d31fadfe514ec5eacf7a7cc0ed0d280bac80f","target":"graph","created_at":"2026-05-17T23:50:03Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Reinforcement learning algorithms rely on exploration to discover new behaviors, which is typically achieved by following a stochastic policy. In continuous control tasks, policies with a Gaussian distribution have been widely adopted. Gaussian exploration however does not result in smooth trajectories that generally correspond to safe and rewarding behaviors in practical tasks. In addition, Gaussian policies do not result in an effective exploration of an environment and become increasingly inefficient as the action rate increases. This contributes to a low sample efficiency often observed in","authors_text":"A. Rupam Mahmood, Dmytro Korenkevych, Gautham Vasan, James Bergstra","cross_cats":["cs.AI","cs.RO","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-03-27T16:22:48Z","title":"Autoregressive Policies for Continuous Control Deep Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1903.11524","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:594e350621cf2735dd20d4fa5097e43545b50953c103fdb5e76f5fd720704b48","target":"record","created_at":"2026-05-17T23:50:03Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"4370f4fccac71ca15d5b9668600dffdcf229d42dbe2fbaad93cecee0b4614576","cross_cats_sorted":["cs.AI","cs.RO","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-03-27T16:22:48Z","title_canon_sha256":"e6de3764e0742475d8e015133b7ecedf0554a408517649335d40f2a4e619f925"},"schema_version":"1.0","source":{"id":"1903.11524","kind":"arxiv","version":1}},"canonical_sha256":"f44bb8d54bcdc269611d894f0d947b947fa1174f133b27ec6d0d9eca28841ece","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"f44bb8d54bcdc269611d894f0d947b947fa1174f133b27ec6d0d9eca28841ece","first_computed_at":"2026-05-17T23:50:03.066528Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:50:03.066528Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"oX4lBDkrqIi5wkT6n5m+wq+Sdx5VVusvOxkCW/IyiTTg4UIS+1U6IWDlTgRV7PhMmUiLX0xWwA2+F1nU6BhBCg==","signature_status":"signed_v1","signed_at":"2026-05-17T23:50:03.067027Z","signed_message":"canonical_sha256_bytes"},"source_id":"1903.11524","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:594e350621cf2735dd20d4fa5097e43545b50953c103fdb5e76f5fd720704b48","sha256:3160c0bf4700d56ec12f0b78d18d31fadfe514ec5eacf7a7cc0ed0d280bac80f"],"state_sha256":"b6a6c53167096e31e5db2756d3b771804977dbb4002c020c87707fa59dafff6e"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Ziq61S80t+FS9pBbxE3JsBCBbujG4Gsnp7PWuHctI0BMWzQ4FdUHD/k6bJmKzS6JyxgdksBp6grWnQWCE5P4Bw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-27T10:54:25.009047Z","bundle_sha256":"d62561ea4aa4a241ff7e9df1f2892686a23880f56a7a41d25f3f305ce6ef8e7d"}}