{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2015:EJJLI4IZPVFRSSDJCCBWV7VZR7","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"8529c0f2fccfd09503468004c55068c6194fe5717505c79a6d2d7a0bec7a6288","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2015-02-11T10:27:15Z","title_canon_sha256":"b542f8522a718e3de2040996693c4f16d66fe2a68fbab60d49bc5c2cca35cfc4"},"schema_version":"1.0","source":{"id":"1502.03248","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1502.03248","created_at":"2026-05-18T02:20:39Z"},{"alias_kind":"arxiv_version","alias_value":"1502.03248v2","created_at":"2026-05-18T02:20:39Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1502.03248","created_at":"2026-05-18T02:20:39Z"},{"alias_kind":"pith_short_12","alias_value":"EJJLI4IZPVFR","created_at":"2026-05-18T12:29:19Z"},{"alias_kind":"pith_short_16","alias_value":"EJJLI4IZPVFRSSDJ","created_at":"2026-05-18T12:29:19Z"},{"alias_kind":"pith_short_8","alias_value":"EJJLI4IZ","created_at":"2026-05-18T12:29:19Z"}],"graph_snapshots":[{"event_id":"sha256:f56195e5cea0b92cec591f3a3362ac9318ad29c8cf46ceae3d9054b0c9fdbb79","target":"graph","created_at":"2026-05-18T02:20:39Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Potential-based reward shaping (PBRS) is an effective and popular technique to speed up reinforcement learning by leveraging domain knowledge. While PBRS is proven to always preserve optimal policies, its effect on learning speed is determined by the quality of its potential function, which, in turn, depends on both the underlying heuristic and the scale. Knowing which heuristic will prove effective requires testing the options beforehand, and determining the appropriate scale requires tuning, both of which introduce additional sample complexity. We formulate a PBRS framework that reduces lear","authors_text":"Anna Harutyunyan, Ann Nowe, Peter Vrancx, Tim Brys","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2015-02-11T10:27:15Z","title":"Off-Policy Reward Shaping with Ensembles"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1502.03248","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:79f5c5e86758a34d7055fbee7e2f7abe4be527341d6009142a8707ab298ff982","target":"record","created_at":"2026-05-18T02:20:39Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"8529c0f2fccfd09503468004c55068c6194fe5717505c79a6d2d7a0bec7a6288","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2015-02-11T10:27:15Z","title_canon_sha256":"b542f8522a718e3de2040996693c4f16d66fe2a68fbab60d49bc5c2cca35cfc4"},"schema_version":"1.0","source":{"id":"1502.03248","kind":"arxiv","version":2}},"canonical_sha256":"2252b471197d4b19486910836afeb98fea577aab93ac729be5e377f149b3c975","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"2252b471197d4b19486910836afeb98fea577aab93ac729be5e377f149b3c975","first_computed_at":"2026-05-18T02:20:39.357456Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T02:20:39.357456Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"rAsx+GlBF/kkxpRwMea9V4oor9Ndh6iVC3rlRnSMwjLcOhYSRumqGbatBI9lhK6BUnKk47xGS66kfwzHDdANBg==","signature_status":"signed_v1","signed_at":"2026-05-18T02:20:39.358081Z","signed_message":"canonical_sha256_bytes"},"source_id":"1502.03248","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:79f5c5e86758a34d7055fbee7e2f7abe4be527341d6009142a8707ab298ff982","sha256:f56195e5cea0b92cec591f3a3362ac9318ad29c8cf46ceae3d9054b0c9fdbb79"],"state_sha256":"366f5419e52356fd1a9d20d47d4ae28f5f0a5e0b518e310ed1251e9a920ed260"}