{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2016:HPVOUD3QPGRJYG4R5R6AYOALWZ","short_pith_number":"pith:HPVOUD3Q","canonical_record":{"source":{"id":"1603.04119","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2016-03-14T03:16:25Z","cross_cats_sorted":["cs.LG","stat.ML"],"title_canon_sha256":"89b57b0debf501416f83223988ef51674e401f0a1c2a6abcf06955dc8d1b5ec9","abstract_canon_sha256":"d1c452a9e1c4541abfbda27e351e5324acaa08a67d5234137b9dc0bf415474a7"},"schema_version":"1.0"},"canonical_sha256":"3beaea0f7079a29c1b91ec7c0c380bb65e3d29324ac6af43ae54bbdb4f67fc19","source":{"kind":"arxiv","id":"1603.04119","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1603.04119","created_at":"2026-05-18T01:19:09Z"},{"alias_kind":"arxiv_version","alias_value":"1603.04119v1","created_at":"2026-05-18T01:19:09Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1603.04119","created_at":"2026-05-18T01:19:09Z"},{"alias_kind":"pith_short_12","alias_value":"HPVOUD3QPGRJ","created_at":"2026-05-18T12:30:19Z"},{"alias_kind":"pith_short_16","alias_value":"HPVOUD3QPGRJYG4R","created_at":"2026-05-18T12:30:19Z"},{"alias_kind":"pith_short_8","alias_value":"HPVOUD3Q","created_at":"2026-05-18T12:30:19Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2016:HPVOUD3QPGRJYG4R5R6AYOALWZ","target":"record","payload":{"canonical_record":{"source":{"id":"1603.04119","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2016-03-14T03:16:25Z","cross_cats_sorted":["cs.LG","stat.ML"],"title_canon_sha256":"89b57b0debf501416f83223988ef51674e401f0a1c2a6abcf06955dc8d1b5ec9","abstract_canon_sha256":"d1c452a9e1c4541abfbda27e351e5324acaa08a67d5234137b9dc0bf415474a7"},"schema_version":"1.0"},"canonical_sha256":"3beaea0f7079a29c1b91ec7c0c380bb65e3d29324ac6af43ae54bbdb4f67fc19","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:19:09.368907Z","signature_b64":"3r71BLalFk7vEIPUBae7doTK2/vn8r6AnaAwFBpXYJfvPL6j3GtUiySPG++FEeLRoOzJHMgiF67zCft6OOC3CA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"3beaea0f7079a29c1b91ec7c0c380bb65e3d29324ac6af43ae54bbdb4f67fc19","last_reissued_at":"2026-05-18T01:19:09.368024Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:19:09.368024Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1603.04119","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:19:09Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"sPjEgB8I9Fcu5UNmhnYc5+pdMmt7+dlBzXh8MXH6LenD3eVu8jWzqej0lL91kU/IBeNFhnsaK3rkxaxfcZzUBQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T20:44:51.815251Z"},"content_sha256":"bfad7a924595676fc6dbbb8b6822d912714aa2ba7904c9e5e85b40f370d95cb7","schema_version":"1.0","event_id":"sha256:bfad7a924595676fc6dbbb8b6822d912714aa2ba7904c9e5e85b40f370d95cb7"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2016:HPVOUD3QPGRJYG4R5R6AYOALWZ","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Exploratory Gradient Boosting for Reinforcement Learning in Complex Domains","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG","stat.ML"],"primary_cat":"cs.AI","authors_text":"Akshay Krishnamurthy, Alekh Agarwal, David Abel, Fernando Diaz, Robert E. Schapire","submitted_at":"2016-03-14T03:16:25Z","abstract_excerpt":"High-dimensional observations and complex real-world dynamics present major challenges in reinforcement learning for both function approximation and exploration. We address both of these challenges with two complementary techniques: First, we develop a gradient-boosting style, non-parametric function approximator for learning on $Q$-function residuals. And second, we propose an exploration strategy inspired by the principles of state abstraction and information acquisition under uncertainty. We demonstrate the empirical effectiveness of these techniques, first, as a preliminary check, on two s"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1603.04119","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:19:09Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"QTZAnULiUlSFF4H4UAv6R3+9iTVeFqpeKnKP4q9FdaoWm3+H/qpYOvxMkztbijG57oUFw6CIunGjVElG/TCbCA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T20:44:51.815968Z"},"content_sha256":"1a09edf679f959f154d70b087c0cb2b9e26b439798f307028c54b4101d333a68","schema_version":"1.0","event_id":"sha256:1a09edf679f959f154d70b087c0cb2b9e26b439798f307028c54b4101d333a68"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/HPVOUD3QPGRJYG4R5R6AYOALWZ/bundle.json","state_url":"https://pith.science/pith/HPVOUD3QPGRJYG4R5R6AYOALWZ/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/HPVOUD3QPGRJYG4R5R6AYOALWZ/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-28T20:44:51Z","links":{"resolver":"https://pith.science/pith/HPVOUD3QPGRJYG4R5R6AYOALWZ","bundle":"https://pith.science/pith/HPVOUD3QPGRJYG4R5R6AYOALWZ/bundle.json","state":"https://pith.science/pith/HPVOUD3QPGRJYG4R5R6AYOALWZ/state.json","well_known_bundle":"https://pith.science/.well-known/pith/HPVOUD3QPGRJYG4R5R6AYOALWZ/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2016:HPVOUD3QPGRJYG4R5R6AYOALWZ","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"d1c452a9e1c4541abfbda27e351e5324acaa08a67d5234137b9dc0bf415474a7","cross_cats_sorted":["cs.LG","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2016-03-14T03:16:25Z","title_canon_sha256":"89b57b0debf501416f83223988ef51674e401f0a1c2a6abcf06955dc8d1b5ec9"},"schema_version":"1.0","source":{"id":"1603.04119","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1603.04119","created_at":"2026-05-18T01:19:09Z"},{"alias_kind":"arxiv_version","alias_value":"1603.04119v1","created_at":"2026-05-18T01:19:09Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1603.04119","created_at":"2026-05-18T01:19:09Z"},{"alias_kind":"pith_short_12","alias_value":"HPVOUD3QPGRJ","created_at":"2026-05-18T12:30:19Z"},{"alias_kind":"pith_short_16","alias_value":"HPVOUD3QPGRJYG4R","created_at":"2026-05-18T12:30:19Z"},{"alias_kind":"pith_short_8","alias_value":"HPVOUD3Q","created_at":"2026-05-18T12:30:19Z"}],"graph_snapshots":[{"event_id":"sha256:1a09edf679f959f154d70b087c0cb2b9e26b439798f307028c54b4101d333a68","target":"graph","created_at":"2026-05-18T01:19:09Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"High-dimensional observations and complex real-world dynamics present major challenges in reinforcement learning for both function approximation and exploration. We address both of these challenges with two complementary techniques: First, we develop a gradient-boosting style, non-parametric function approximator for learning on $Q$-function residuals. And second, we propose an exploration strategy inspired by the principles of state abstraction and information acquisition under uncertainty. We demonstrate the empirical effectiveness of these techniques, first, as a preliminary check, on two s","authors_text":"Akshay Krishnamurthy, Alekh Agarwal, David Abel, Fernando Diaz, Robert E. Schapire","cross_cats":["cs.LG","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2016-03-14T03:16:25Z","title":"Exploratory Gradient Boosting for Reinforcement Learning in Complex Domains"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1603.04119","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:bfad7a924595676fc6dbbb8b6822d912714aa2ba7904c9e5e85b40f370d95cb7","target":"record","created_at":"2026-05-18T01:19:09Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"d1c452a9e1c4541abfbda27e351e5324acaa08a67d5234137b9dc0bf415474a7","cross_cats_sorted":["cs.LG","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2016-03-14T03:16:25Z","title_canon_sha256":"89b57b0debf501416f83223988ef51674e401f0a1c2a6abcf06955dc8d1b5ec9"},"schema_version":"1.0","source":{"id":"1603.04119","kind":"arxiv","version":1}},"canonical_sha256":"3beaea0f7079a29c1b91ec7c0c380bb65e3d29324ac6af43ae54bbdb4f67fc19","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"3beaea0f7079a29c1b91ec7c0c380bb65e3d29324ac6af43ae54bbdb4f67fc19","first_computed_at":"2026-05-18T01:19:09.368024Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T01:19:09.368024Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"3r71BLalFk7vEIPUBae7doTK2/vn8r6AnaAwFBpXYJfvPL6j3GtUiySPG++FEeLRoOzJHMgiF67zCft6OOC3CA==","signature_status":"signed_v1","signed_at":"2026-05-18T01:19:09.368907Z","signed_message":"canonical_sha256_bytes"},"source_id":"1603.04119","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:bfad7a924595676fc6dbbb8b6822d912714aa2ba7904c9e5e85b40f370d95cb7","sha256:1a09edf679f959f154d70b087c0cb2b9e26b439798f307028c54b4101d333a68"],"state_sha256":"068137548e301858366819b261b57bb1c6b78b710eaa4d8a174425bd884f5a9b"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"fdxbABbSt2ZEK/QhGZwlUh5gW4Cu930tb1EvdNVgZC1wBAUbISxuC2k/mSo3eTwf92d6wNCL6eSgQZwwXUQZAg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-28T20:44:51.820006Z","bundle_sha256":"ef282c010e681f6090d4ffa40c7130eb392a4de2223367e48adfe6a5204da414"}}