{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:IVL227MK36UFQX2YHBWISEOATN","short_pith_number":"pith:IVL227MK","canonical_record":{"source":{"id":"1709.04909","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-09-14T17:54:05Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"31e9085088295821a47d13b2341e90d3a39c4570f69e05a59cfadc1430d02cb5","abstract_canon_sha256":"b65efba3b6a23380073f4f583f2fe5eac4744eb74a9a1f14541270b56acac7e7"},"schema_version":"1.0"},"canonical_sha256":"4557ad7d8adfa8585f58386c8911c09b7aadd7fc1781d96d12f273c9dcbb3e24","source":{"kind":"arxiv","id":"1709.04909","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1709.04909","created_at":"2026-05-18T00:35:09Z"},{"alias_kind":"arxiv_version","alias_value":"1709.04909v1","created_at":"2026-05-18T00:35:09Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1709.04909","created_at":"2026-05-18T00:35:09Z"},{"alias_kind":"pith_short_12","alias_value":"IVL227MK36UF","created_at":"2026-05-18T12:31:21Z"},{"alias_kind":"pith_short_16","alias_value":"IVL227MK36UFQX2Y","created_at":"2026-05-18T12:31:21Z"},{"alias_kind":"pith_short_8","alias_value":"IVL227MK","created_at":"2026-05-18T12:31:21Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:IVL227MK36UFQX2YHBWISEOATN","target":"record","payload":{"canonical_record":{"source":{"id":"1709.04909","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-09-14T17:54:05Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"31e9085088295821a47d13b2341e90d3a39c4570f69e05a59cfadc1430d02cb5","abstract_canon_sha256":"b65efba3b6a23380073f4f583f2fe5eac4744eb74a9a1f14541270b56acac7e7"},"schema_version":"1.0"},"canonical_sha256":"4557ad7d8adfa8585f58386c8911c09b7aadd7fc1781d96d12f273c9dcbb3e24","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:35:09.767499Z","signature_b64":"i6patLZKlwpPTtcHxw7CUtb72hEMBtA2csmsIiPPsyNH/sJ7AVsjL/qkJgOpjmoNEZLYMMJtmdIdYgUjspKWDA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"4557ad7d8adfa8585f58386c8911c09b7aadd7fc1781d96d12f273c9dcbb3e24","last_reissued_at":"2026-05-18T00:35:09.767092Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:35:09.767092Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1709.04909","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:35:09Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ordFLs1Z2UPbRGF204GBUPixnZryFuEmOwn0TPguC6K4kc0UahFlGMH8Fy9/uPAjq3Bk/J0dTmKXkmKPy6L/Bw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-01T19:13:31.479491Z"},"content_sha256":"9ff4d37ffe6049a3f04f462be8d3ac237375454daa417e9603741aa68c9bbb97","schema_version":"1.0","event_id":"sha256:9ff4d37ffe6049a3f04f462be8d3ac237375454daa417e9603741aa68c9bbb97"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:IVL227MK36UFQX2YHBWISEOATN","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Shared Learning : Enhancing Reinforcement in $Q$-Ensembles","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Balaraman Ravindran, Rakesh R Menon","submitted_at":"2017-09-14T17:54:05Z","abstract_excerpt":"Deep Reinforcement Learning has been able to achieve amazing successes in a variety of domains from video games to continuous control by trying to maximize the cumulative reward. However, most of these successes rely on algorithms that require a large amount of data to train in order to obtain results on par with human-level performance. This is not feasible if we are to deploy these systems on real world tasks and hence there has been an increased thrust in exploring data efficient algorithms. To this end, we propose the Shared Learning framework aimed at making $Q$-ensemble algorithms data-e"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1709.04909","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:35:09Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"NWdFNUbRRlBzfk8VVFbwfiZ5kP0UYM0iRJuC15jrojMmsEdD6E9WNT0VNTn6fxLmcqZgpgdYVuqzvrLqoBv6Cg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-01T19:13:31.479835Z"},"content_sha256":"a36e61e1fca764c6db52bd7f7dc74c093655bb47c2cd1097c449c82d6295f5ef","schema_version":"1.0","event_id":"sha256:a36e61e1fca764c6db52bd7f7dc74c093655bb47c2cd1097c449c82d6295f5ef"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/IVL227MK36UFQX2YHBWISEOATN/bundle.json","state_url":"https://pith.science/pith/IVL227MK36UFQX2YHBWISEOATN/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/IVL227MK36UFQX2YHBWISEOATN/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-01T19:13:31Z","links":{"resolver":"https://pith.science/pith/IVL227MK36UFQX2YHBWISEOATN","bundle":"https://pith.science/pith/IVL227MK36UFQX2YHBWISEOATN/bundle.json","state":"https://pith.science/pith/IVL227MK36UFQX2YHBWISEOATN/state.json","well_known_bundle":"https://pith.science/.well-known/pith/IVL227MK36UFQX2YHBWISEOATN/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:IVL227MK36UFQX2YHBWISEOATN","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"b65efba3b6a23380073f4f583f2fe5eac4744eb74a9a1f14541270b56acac7e7","cross_cats_sorted":["cs.AI"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-09-14T17:54:05Z","title_canon_sha256":"31e9085088295821a47d13b2341e90d3a39c4570f69e05a59cfadc1430d02cb5"},"schema_version":"1.0","source":{"id":"1709.04909","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1709.04909","created_at":"2026-05-18T00:35:09Z"},{"alias_kind":"arxiv_version","alias_value":"1709.04909v1","created_at":"2026-05-18T00:35:09Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1709.04909","created_at":"2026-05-18T00:35:09Z"},{"alias_kind":"pith_short_12","alias_value":"IVL227MK36UF","created_at":"2026-05-18T12:31:21Z"},{"alias_kind":"pith_short_16","alias_value":"IVL227MK36UFQX2Y","created_at":"2026-05-18T12:31:21Z"},{"alias_kind":"pith_short_8","alias_value":"IVL227MK","created_at":"2026-05-18T12:31:21Z"}],"graph_snapshots":[{"event_id":"sha256:a36e61e1fca764c6db52bd7f7dc74c093655bb47c2cd1097c449c82d6295f5ef","target":"graph","created_at":"2026-05-18T00:35:09Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Deep Reinforcement Learning has been able to achieve amazing successes in a variety of domains from video games to continuous control by trying to maximize the cumulative reward. However, most of these successes rely on algorithms that require a large amount of data to train in order to obtain results on par with human-level performance. This is not feasible if we are to deploy these systems on real world tasks and hence there has been an increased thrust in exploring data efficient algorithms. To this end, we propose the Shared Learning framework aimed at making $Q$-ensemble algorithms data-e","authors_text":"Balaraman Ravindran, Rakesh R Menon","cross_cats":["cs.AI"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-09-14T17:54:05Z","title":"Shared Learning : Enhancing Reinforcement in $Q$-Ensembles"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1709.04909","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:9ff4d37ffe6049a3f04f462be8d3ac237375454daa417e9603741aa68c9bbb97","target":"record","created_at":"2026-05-18T00:35:09Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"b65efba3b6a23380073f4f583f2fe5eac4744eb74a9a1f14541270b56acac7e7","cross_cats_sorted":["cs.AI"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-09-14T17:54:05Z","title_canon_sha256":"31e9085088295821a47d13b2341e90d3a39c4570f69e05a59cfadc1430d02cb5"},"schema_version":"1.0","source":{"id":"1709.04909","kind":"arxiv","version":1}},"canonical_sha256":"4557ad7d8adfa8585f58386c8911c09b7aadd7fc1781d96d12f273c9dcbb3e24","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"4557ad7d8adfa8585f58386c8911c09b7aadd7fc1781d96d12f273c9dcbb3e24","first_computed_at":"2026-05-18T00:35:09.767092Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:35:09.767092Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"i6patLZKlwpPTtcHxw7CUtb72hEMBtA2csmsIiPPsyNH/sJ7AVsjL/qkJgOpjmoNEZLYMMJtmdIdYgUjspKWDA==","signature_status":"signed_v1","signed_at":"2026-05-18T00:35:09.767499Z","signed_message":"canonical_sha256_bytes"},"source_id":"1709.04909","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:9ff4d37ffe6049a3f04f462be8d3ac237375454daa417e9603741aa68c9bbb97","sha256:a36e61e1fca764c6db52bd7f7dc74c093655bb47c2cd1097c449c82d6295f5ef"],"state_sha256":"c17e4ee2a3194c3da13ad162afee3e2468001e449ace7fe88650e71f4f04a1ee"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"rg8gKrTvBDrP0qv72YUo0gqpmFXQUcqPiZX0F1MWTAJhWw/2VDKEjZHRNPz9ZK2bDbAn3ro78ayDJJs8Va28Bw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-01T19:13:31.481796Z","bundle_sha256":"9d303237f44dafd957e577901b1fdbbbd584246b00c25d7a78396f0499887d3d"}}