{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:UQ3FC4NUIJD6INORST5MM7L3M3","short_pith_number":"pith:UQ3FC4NU","canonical_record":{"source":{"id":"1704.00756","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-04-03T18:37:12Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"f37ca31378ebd1c182da01683756d1ef934c0e2b7f895a94d1f16d5dea045f93","abstract_canon_sha256":"07dfd51463b0df0de195ce92222065332da61a27fbe06400f4fa5356d4e09dd4"},"schema_version":"1.0"},"canonical_sha256":"a4365171b44247e435d194fac67d7b66c1d870cce17db734aa1b070ba13a3738","source":{"kind":"arxiv","id":"1704.00756","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1704.00756","created_at":"2026-05-18T00:30:34Z"},{"alias_kind":"arxiv_version","alias_value":"1704.00756v2","created_at":"2026-05-18T00:30:34Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1704.00756","created_at":"2026-05-18T00:30:34Z"},{"alias_kind":"pith_short_12","alias_value":"UQ3FC4NUIJD6","created_at":"2026-05-18T12:31:46Z"},{"alias_kind":"pith_short_16","alias_value":"UQ3FC4NUIJD6INOR","created_at":"2026-05-18T12:31:46Z"},{"alias_kind":"pith_short_8","alias_value":"UQ3FC4NU","created_at":"2026-05-18T12:31:46Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:UQ3FC4NUIJD6INORST5MM7L3M3","target":"record","payload":{"canonical_record":{"source":{"id":"1704.00756","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-04-03T18:37:12Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"f37ca31378ebd1c182da01683756d1ef934c0e2b7f895a94d1f16d5dea045f93","abstract_canon_sha256":"07dfd51463b0df0de195ce92222065332da61a27fbe06400f4fa5356d4e09dd4"},"schema_version":"1.0"},"canonical_sha256":"a4365171b44247e435d194fac67d7b66c1d870cce17db734aa1b070ba13a3738","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:30:34.177303Z","signature_b64":"JHWMDYDU1zvfh9e7Ifu9xrNQr/ZZ5lMpkTZzH33XsuhvK9lLr32MFms0XqB27hUrB2JvVxp4jpXVZJ00R7q5Dg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"a4365171b44247e435d194fac67d7b66c1d870cce17db734aa1b070ba13a3738","last_reissued_at":"2026-05-18T00:30:34.176575Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:30:34.176575Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1704.00756","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:30:34Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"quHkgCoDEl+TlNaRymyTvBrs4yglEsvkcQZ4mbRJps5pfwDEHGWjNDHzY9XgbS4jUqqb0gF78tAuEsCPH5cyDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T22:14:46.724198Z"},"content_sha256":"cee4bb220191f47c12959fba48ffa385d53656a3db42839d8b3bd90af7c22446","schema_version":"1.0","event_id":"sha256:cee4bb220191f47c12959fba48ffa385d53656a3db42839d8b3bd90af7c22446"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:UQ3FC4NUIJD6INORST5MM7L3M3","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Multi-Advisor Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Harm van Seijen, Joshua Romoff, Mehdi Fatemi, Romain Laroche","submitted_at":"2017-04-03T18:37:12Z","abstract_excerpt":"We consider tackling a single-agent RL problem by distributing it to $n$ learners. These learners, called advisors, endeavour to solve the problem from a different focus. Their advice, taking the form of action values, is then communicated to an aggregator, which is in control of the system. We show that the local planning method for the advisors is critical and that none of the ones found in the literature is flawless: the egocentric planning overestimates values of states where the other advisors disagree, and the agnostic planning is inefficient around danger zones. We introduce a novel app"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1704.00756","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:30:34Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"T6EZzOGVbQuNwbByVMT819zVKR/1TR91W5ZHdIK4HTlytmJhNsjY/cMnANZiXwUqcnRYnow62ZLpmeh6HPB+AA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T22:14:46.724869Z"},"content_sha256":"6d02d35f0afbc5eeb9c72d002ae55c9410fc0e11fa07c1a023d2773b662dba38","schema_version":"1.0","event_id":"sha256:6d02d35f0afbc5eeb9c72d002ae55c9410fc0e11fa07c1a023d2773b662dba38"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/UQ3FC4NUIJD6INORST5MM7L3M3/bundle.json","state_url":"https://pith.science/pith/UQ3FC4NUIJD6INORST5MM7L3M3/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/UQ3FC4NUIJD6INORST5MM7L3M3/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-25T22:14:46Z","links":{"resolver":"https://pith.science/pith/UQ3FC4NUIJD6INORST5MM7L3M3","bundle":"https://pith.science/pith/UQ3FC4NUIJD6INORST5MM7L3M3/bundle.json","state":"https://pith.science/pith/UQ3FC4NUIJD6INORST5MM7L3M3/state.json","well_known_bundle":"https://pith.science/.well-known/pith/UQ3FC4NUIJD6INORST5MM7L3M3/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:UQ3FC4NUIJD6INORST5MM7L3M3","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"07dfd51463b0df0de195ce92222065332da61a27fbe06400f4fa5356d4e09dd4","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-04-03T18:37:12Z","title_canon_sha256":"f37ca31378ebd1c182da01683756d1ef934c0e2b7f895a94d1f16d5dea045f93"},"schema_version":"1.0","source":{"id":"1704.00756","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1704.00756","created_at":"2026-05-18T00:30:34Z"},{"alias_kind":"arxiv_version","alias_value":"1704.00756v2","created_at":"2026-05-18T00:30:34Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1704.00756","created_at":"2026-05-18T00:30:34Z"},{"alias_kind":"pith_short_12","alias_value":"UQ3FC4NUIJD6","created_at":"2026-05-18T12:31:46Z"},{"alias_kind":"pith_short_16","alias_value":"UQ3FC4NUIJD6INOR","created_at":"2026-05-18T12:31:46Z"},{"alias_kind":"pith_short_8","alias_value":"UQ3FC4NU","created_at":"2026-05-18T12:31:46Z"}],"graph_snapshots":[{"event_id":"sha256:6d02d35f0afbc5eeb9c72d002ae55c9410fc0e11fa07c1a023d2773b662dba38","target":"graph","created_at":"2026-05-18T00:30:34Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We consider tackling a single-agent RL problem by distributing it to $n$ learners. These learners, called advisors, endeavour to solve the problem from a different focus. Their advice, taking the form of action values, is then communicated to an aggregator, which is in control of the system. We show that the local planning method for the advisors is critical and that none of the ones found in the literature is flawless: the egocentric planning overestimates values of states where the other advisors disagree, and the agnostic planning is inefficient around danger zones. We introduce a novel app","authors_text":"Harm van Seijen, Joshua Romoff, Mehdi Fatemi, Romain Laroche","cross_cats":["cs.AI","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-04-03T18:37:12Z","title":"Multi-Advisor Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1704.00756","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:cee4bb220191f47c12959fba48ffa385d53656a3db42839d8b3bd90af7c22446","target":"record","created_at":"2026-05-18T00:30:34Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"07dfd51463b0df0de195ce92222065332da61a27fbe06400f4fa5356d4e09dd4","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-04-03T18:37:12Z","title_canon_sha256":"f37ca31378ebd1c182da01683756d1ef934c0e2b7f895a94d1f16d5dea045f93"},"schema_version":"1.0","source":{"id":"1704.00756","kind":"arxiv","version":2}},"canonical_sha256":"a4365171b44247e435d194fac67d7b66c1d870cce17db734aa1b070ba13a3738","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"a4365171b44247e435d194fac67d7b66c1d870cce17db734aa1b070ba13a3738","first_computed_at":"2026-05-18T00:30:34.176575Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:30:34.176575Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"JHWMDYDU1zvfh9e7Ifu9xrNQr/ZZ5lMpkTZzH33XsuhvK9lLr32MFms0XqB27hUrB2JvVxp4jpXVZJ00R7q5Dg==","signature_status":"signed_v1","signed_at":"2026-05-18T00:30:34.177303Z","signed_message":"canonical_sha256_bytes"},"source_id":"1704.00756","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:cee4bb220191f47c12959fba48ffa385d53656a3db42839d8b3bd90af7c22446","sha256:6d02d35f0afbc5eeb9c72d002ae55c9410fc0e11fa07c1a023d2773b662dba38"],"state_sha256":"0ac1f78fe4558906ece95d37692c552eae1feb25ef142decbbd583ab52a76325"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"AslSCwq6FD70x+T2YUGisYPOfoJLcS7atlhafGo6hre/S7SwcDAOZFck+lF4QBm2vXdEkXnWsJQxakXU9myDAQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-25T22:14:46.728342Z","bundle_sha256":"df0c5381884317950727771ac8ef55706b6da897739ba359cf96d437255ff6d0"}}