{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2016:STRRDPGA6UNJ2ZVKKHZQBZC65N","short_pith_number":"pith:STRRDPGA","canonical_record":{"source":{"id":"1603.00748","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-03-02T15:28:25Z","cross_cats_sorted":["cs.AI","cs.RO","cs.SY"],"title_canon_sha256":"5ab3cd4c04ef93ad1d5683273651b09a42347d8aefcdfc5ae801a656ef944740","abstract_canon_sha256":"499d2d5de62607d2621f2d3becc68a95078e4a2e97619b5b81f95216ad74153c"},"schema_version":"1.0"},"canonical_sha256":"94e311bcc0f51a9d66aa51f300e45eeb5481e40d32283a9b2695723c58aa6218","source":{"kind":"arxiv","id":"1603.00748","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1603.00748","created_at":"2026-05-18T01:19:43Z"},{"alias_kind":"arxiv_version","alias_value":"1603.00748v1","created_at":"2026-05-18T01:19:43Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1603.00748","created_at":"2026-05-18T01:19:43Z"},{"alias_kind":"pith_short_12","alias_value":"STRRDPGA6UNJ","created_at":"2026-05-18T12:30:44Z"},{"alias_kind":"pith_short_16","alias_value":"STRRDPGA6UNJ2ZVK","created_at":"2026-05-18T12:30:44Z"},{"alias_kind":"pith_short_8","alias_value":"STRRDPGA","created_at":"2026-05-18T12:30:44Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2016:STRRDPGA6UNJ2ZVKKHZQBZC65N","target":"record","payload":{"canonical_record":{"source":{"id":"1603.00748","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-03-02T15:28:25Z","cross_cats_sorted":["cs.AI","cs.RO","cs.SY"],"title_canon_sha256":"5ab3cd4c04ef93ad1d5683273651b09a42347d8aefcdfc5ae801a656ef944740","abstract_canon_sha256":"499d2d5de62607d2621f2d3becc68a95078e4a2e97619b5b81f95216ad74153c"},"schema_version":"1.0"},"canonical_sha256":"94e311bcc0f51a9d66aa51f300e45eeb5481e40d32283a9b2695723c58aa6218","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:19:43.058969Z","signature_b64":"eAB1NPL1FNTTvr3wRVevFoEO2ac1Bkoj+AxLgVZMEybuAI74nRZvzRCZkDfxNA4eersPSgSHCmd7As1gspFhBw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"94e311bcc0f51a9d66aa51f300e45eeb5481e40d32283a9b2695723c58aa6218","last_reissued_at":"2026-05-18T01:19:43.058337Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:19:43.058337Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1603.00748","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:19:43Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"SQh30ktRRj5+XPnVDtgcfqhU+1lY20oMOen2WQGbAl6qokIGbWnyUnWhNHGndOs+u984scnJbwtem6U/inhTDQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T22:59:47.274999Z"},"content_sha256":"7ff860c5976fa5b6b03214413d810fd698dae37e7436394bb43ee1f5607c9e17","schema_version":"1.0","event_id":"sha256:7ff860c5976fa5b6b03214413d810fd698dae37e7436394bb43ee1f5607c9e17"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2016:STRRDPGA6UNJ2ZVKKHZQBZC65N","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Continuous Deep Q-Learning with Model-based Acceleration","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.RO","cs.SY"],"primary_cat":"cs.LG","authors_text":"Ilya Sutskever, Sergey Levine, Shixiang Gu, Timothy Lillicrap","submitted_at":"2016-03-02T15:28:25Z","abstract_excerpt":"Model-free reinforcement learning has been successfully applied to a range of challenging problems, and has recently been extended to handle large neural network policies and value functions. However, the sample complexity of model-free algorithms, particularly when using high-dimensional function approximators, tends to limit their applicability to physical systems. In this paper, we explore algorithms and representations to reduce the sample complexity of deep reinforcement learning for continuous control tasks. We propose two complementary techniques for improving the efficiency of such alg"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1603.00748","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:19:43Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"FwIk9tPje+lJ/17+W43eZZETK4DOkvZEqnYthH13R4PIxDx+dMC+3fdZpiqDcGDATw3zAojhH3mCZPTRNyoBCQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T22:59:47.275723Z"},"content_sha256":"ee424c63148c5e428aa0dd952de2535851b0d8b3a8afea8ddebc9b979c1d49c0","schema_version":"1.0","event_id":"sha256:ee424c63148c5e428aa0dd952de2535851b0d8b3a8afea8ddebc9b979c1d49c0"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/STRRDPGA6UNJ2ZVKKHZQBZC65N/bundle.json","state_url":"https://pith.science/pith/STRRDPGA6UNJ2ZVKKHZQBZC65N/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/STRRDPGA6UNJ2ZVKKHZQBZC65N/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-26T22:59:47Z","links":{"resolver":"https://pith.science/pith/STRRDPGA6UNJ2ZVKKHZQBZC65N","bundle":"https://pith.science/pith/STRRDPGA6UNJ2ZVKKHZQBZC65N/bundle.json","state":"https://pith.science/pith/STRRDPGA6UNJ2ZVKKHZQBZC65N/state.json","well_known_bundle":"https://pith.science/.well-known/pith/STRRDPGA6UNJ2ZVKKHZQBZC65N/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2016:STRRDPGA6UNJ2ZVKKHZQBZC65N","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"499d2d5de62607d2621f2d3becc68a95078e4a2e97619b5b81f95216ad74153c","cross_cats_sorted":["cs.AI","cs.RO","cs.SY"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-03-02T15:28:25Z","title_canon_sha256":"5ab3cd4c04ef93ad1d5683273651b09a42347d8aefcdfc5ae801a656ef944740"},"schema_version":"1.0","source":{"id":"1603.00748","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1603.00748","created_at":"2026-05-18T01:19:43Z"},{"alias_kind":"arxiv_version","alias_value":"1603.00748v1","created_at":"2026-05-18T01:19:43Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1603.00748","created_at":"2026-05-18T01:19:43Z"},{"alias_kind":"pith_short_12","alias_value":"STRRDPGA6UNJ","created_at":"2026-05-18T12:30:44Z"},{"alias_kind":"pith_short_16","alias_value":"STRRDPGA6UNJ2ZVK","created_at":"2026-05-18T12:30:44Z"},{"alias_kind":"pith_short_8","alias_value":"STRRDPGA","created_at":"2026-05-18T12:30:44Z"}],"graph_snapshots":[{"event_id":"sha256:ee424c63148c5e428aa0dd952de2535851b0d8b3a8afea8ddebc9b979c1d49c0","target":"graph","created_at":"2026-05-18T01:19:43Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Model-free reinforcement learning has been successfully applied to a range of challenging problems, and has recently been extended to handle large neural network policies and value functions. However, the sample complexity of model-free algorithms, particularly when using high-dimensional function approximators, tends to limit their applicability to physical systems. In this paper, we explore algorithms and representations to reduce the sample complexity of deep reinforcement learning for continuous control tasks. We propose two complementary techniques for improving the efficiency of such alg","authors_text":"Ilya Sutskever, Sergey Levine, Shixiang Gu, Timothy Lillicrap","cross_cats":["cs.AI","cs.RO","cs.SY"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-03-02T15:28:25Z","title":"Continuous Deep Q-Learning with Model-based Acceleration"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1603.00748","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:7ff860c5976fa5b6b03214413d810fd698dae37e7436394bb43ee1f5607c9e17","target":"record","created_at":"2026-05-18T01:19:43Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"499d2d5de62607d2621f2d3becc68a95078e4a2e97619b5b81f95216ad74153c","cross_cats_sorted":["cs.AI","cs.RO","cs.SY"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-03-02T15:28:25Z","title_canon_sha256":"5ab3cd4c04ef93ad1d5683273651b09a42347d8aefcdfc5ae801a656ef944740"},"schema_version":"1.0","source":{"id":"1603.00748","kind":"arxiv","version":1}},"canonical_sha256":"94e311bcc0f51a9d66aa51f300e45eeb5481e40d32283a9b2695723c58aa6218","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"94e311bcc0f51a9d66aa51f300e45eeb5481e40d32283a9b2695723c58aa6218","first_computed_at":"2026-05-18T01:19:43.058337Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T01:19:43.058337Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"eAB1NPL1FNTTvr3wRVevFoEO2ac1Bkoj+AxLgVZMEybuAI74nRZvzRCZkDfxNA4eersPSgSHCmd7As1gspFhBw==","signature_status":"signed_v1","signed_at":"2026-05-18T01:19:43.058969Z","signed_message":"canonical_sha256_bytes"},"source_id":"1603.00748","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:7ff860c5976fa5b6b03214413d810fd698dae37e7436394bb43ee1f5607c9e17","sha256:ee424c63148c5e428aa0dd952de2535851b0d8b3a8afea8ddebc9b979c1d49c0"],"state_sha256":"188a4e8811278300ec4f893b8947a868ad1f672b81526d51be63a8257f22e1e4"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"egSZxgZq9UDuYalMbSrdFHvsWaKFW3LeO5CdVn+Go+GnnChUDAbv45dKL0Y19YbD64zOBQCX3UdQBUGjcV7wDg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-26T22:59:47.279493Z","bundle_sha256":"0f862c509b613fd58b1eddf30f328bf8463b3f1c6ba2fcd2f9e2983d5ab20d6f"}}