{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:QXW5MMLZMRYRLTMQLZ6HXNA5XM","short_pith_number":"pith:QXW5MMLZ","canonical_record":{"source":{"id":"1805.04493","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-05-11T17:12:11Z","cross_cats_sorted":[],"title_canon_sha256":"25bf77ea0b7d6930256d12ebf5864bc5ceec79654f09c88ce2a41b2e1242e91b","abstract_canon_sha256":"0d191c185224725e98aa22c3ce18758f3133f16fc521d9ac4f2892a91618ad76"},"schema_version":"1.0"},"canonical_sha256":"85edd63179647115cd905e7c7bb41dbb3d26114aeae74419189d058546f0aebe","source":{"kind":"arxiv","id":"1805.04493","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1805.04493","created_at":"2026-05-18T00:16:10Z"},{"alias_kind":"arxiv_version","alias_value":"1805.04493v1","created_at":"2026-05-18T00:16:10Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1805.04493","created_at":"2026-05-18T00:16:10Z"},{"alias_kind":"pith_short_12","alias_value":"QXW5MMLZMRYR","created_at":"2026-05-18T12:32:50Z"},{"alias_kind":"pith_short_16","alias_value":"QXW5MMLZMRYRLTMQ","created_at":"2026-05-18T12:32:50Z"},{"alias_kind":"pith_short_8","alias_value":"QXW5MMLZ","created_at":"2026-05-18T12:32:50Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:QXW5MMLZMRYRLTMQLZ6HXNA5XM","target":"record","payload":{"canonical_record":{"source":{"id":"1805.04493","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-05-11T17:12:11Z","cross_cats_sorted":[],"title_canon_sha256":"25bf77ea0b7d6930256d12ebf5864bc5ceec79654f09c88ce2a41b2e1242e91b","abstract_canon_sha256":"0d191c185224725e98aa22c3ce18758f3133f16fc521d9ac4f2892a91618ad76"},"schema_version":"1.0"},"canonical_sha256":"85edd63179647115cd905e7c7bb41dbb3d26114aeae74419189d058546f0aebe","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:16:10.137265Z","signature_b64":"12XTTR4Ak63WCgYMB000ElyiTH0ot3Pv+J5iS5jAcbalBJ9HkZvUOaqAm5aWdbxjYwWvQbwb7CulIJwAmRRNBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"85edd63179647115cd905e7c7bb41dbb3d26114aeae74419189d058546f0aebe","last_reissued_at":"2026-05-18T00:16:10.136499Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:16:10.136499Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1805.04493","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:16:10Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"JTdn5Mw2ZXZuBREWt5e/YA3i2beP4VkC5yxvlCrKUi1TOXAoKrfBmp4x40fDT/og3s79m8BDVQjOU8TZwvAxDw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T08:40:02.988058Z"},"content_sha256":"0dd6de59a3732964576a14ce5eee7f641fc71011bef59501d1221b7a9647c1b7","schema_version":"1.0","event_id":"sha256:0dd6de59a3732964576a14ce5eee7f641fc71011bef59501d1221b7a9647c1b7"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:QXW5MMLZMRYRLTMQLZ6HXNA5XM","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Interactive Reinforcement Learning with Dynamic Reuse of Prior Knowledge from Human/Agent's Demonstration","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Matthew E. Taylor, Zhaodong Wang","submitted_at":"2018-05-11T17:12:11Z","abstract_excerpt":"Reinforcement learning has enjoyed multiple successes in recent years. However, these successes typically require very large amounts of data before an agent achieves acceptable performance. This paper introduces a novel way of combating such requirements by leveraging existing (human or agent) knowledge. In particular, this paper uses demonstrations from agents and humans, allowing an untrained agent to quickly achieve high performance. We empirically compare with, and highlight the weakness of, HAT and CHAT, methods of transferring knowledge from a source agent/human to a target agent. This p"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1805.04493","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:16:10Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"QTM5k7JHswFbwK2Q+fanOJMWv1NJXZJkQZ8U3PxZRiRwQsfrTlWvR/qbYAyLorHjbuoo7q5Wsp/yxB4bphulBQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T08:40:02.988424Z"},"content_sha256":"c70a87114e30d14c907257bc5adb471713fe889025e92bfb6494b3c1bcc5cf1f","schema_version":"1.0","event_id":"sha256:c70a87114e30d14c907257bc5adb471713fe889025e92bfb6494b3c1bcc5cf1f"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/QXW5MMLZMRYRLTMQLZ6HXNA5XM/bundle.json","state_url":"https://pith.science/pith/QXW5MMLZMRYRLTMQLZ6HXNA5XM/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/QXW5MMLZMRYRLTMQLZ6HXNA5XM/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-30T08:40:02Z","links":{"resolver":"https://pith.science/pith/QXW5MMLZMRYRLTMQLZ6HXNA5XM","bundle":"https://pith.science/pith/QXW5MMLZMRYRLTMQLZ6HXNA5XM/bundle.json","state":"https://pith.science/pith/QXW5MMLZMRYRLTMQLZ6HXNA5XM/state.json","well_known_bundle":"https://pith.science/.well-known/pith/QXW5MMLZMRYRLTMQLZ6HXNA5XM/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:QXW5MMLZMRYRLTMQLZ6HXNA5XM","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"0d191c185224725e98aa22c3ce18758f3133f16fc521d9ac4f2892a91618ad76","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-05-11T17:12:11Z","title_canon_sha256":"25bf77ea0b7d6930256d12ebf5864bc5ceec79654f09c88ce2a41b2e1242e91b"},"schema_version":"1.0","source":{"id":"1805.04493","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1805.04493","created_at":"2026-05-18T00:16:10Z"},{"alias_kind":"arxiv_version","alias_value":"1805.04493v1","created_at":"2026-05-18T00:16:10Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1805.04493","created_at":"2026-05-18T00:16:10Z"},{"alias_kind":"pith_short_12","alias_value":"QXW5MMLZMRYR","created_at":"2026-05-18T12:32:50Z"},{"alias_kind":"pith_short_16","alias_value":"QXW5MMLZMRYRLTMQ","created_at":"2026-05-18T12:32:50Z"},{"alias_kind":"pith_short_8","alias_value":"QXW5MMLZ","created_at":"2026-05-18T12:32:50Z"}],"graph_snapshots":[{"event_id":"sha256:c70a87114e30d14c907257bc5adb471713fe889025e92bfb6494b3c1bcc5cf1f","target":"graph","created_at":"2026-05-18T00:16:10Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Reinforcement learning has enjoyed multiple successes in recent years. However, these successes typically require very large amounts of data before an agent achieves acceptable performance. This paper introduces a novel way of combating such requirements by leveraging existing (human or agent) knowledge. In particular, this paper uses demonstrations from agents and humans, allowing an untrained agent to quickly achieve high performance. We empirically compare with, and highlight the weakness of, HAT and CHAT, methods of transferring knowledge from a source agent/human to a target agent. This p","authors_text":"Matthew E. Taylor, Zhaodong Wang","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-05-11T17:12:11Z","title":"Interactive Reinforcement Learning with Dynamic Reuse of Prior Knowledge from Human/Agent's Demonstration"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1805.04493","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:0dd6de59a3732964576a14ce5eee7f641fc71011bef59501d1221b7a9647c1b7","target":"record","created_at":"2026-05-18T00:16:10Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"0d191c185224725e98aa22c3ce18758f3133f16fc521d9ac4f2892a91618ad76","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-05-11T17:12:11Z","title_canon_sha256":"25bf77ea0b7d6930256d12ebf5864bc5ceec79654f09c88ce2a41b2e1242e91b"},"schema_version":"1.0","source":{"id":"1805.04493","kind":"arxiv","version":1}},"canonical_sha256":"85edd63179647115cd905e7c7bb41dbb3d26114aeae74419189d058546f0aebe","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"85edd63179647115cd905e7c7bb41dbb3d26114aeae74419189d058546f0aebe","first_computed_at":"2026-05-18T00:16:10.136499Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:16:10.136499Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"12XTTR4Ak63WCgYMB000ElyiTH0ot3Pv+J5iS5jAcbalBJ9HkZvUOaqAm5aWdbxjYwWvQbwb7CulIJwAmRRNBg==","signature_status":"signed_v1","signed_at":"2026-05-18T00:16:10.137265Z","signed_message":"canonical_sha256_bytes"},"source_id":"1805.04493","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:0dd6de59a3732964576a14ce5eee7f641fc71011bef59501d1221b7a9647c1b7","sha256:c70a87114e30d14c907257bc5adb471713fe889025e92bfb6494b3c1bcc5cf1f"],"state_sha256":"96e008c816ca199a7d91dd904732dbc30a58d79c94a77259b1444f383a6fe7c4"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"/IO0WWCTGhHCUb9fiyQmQEIOfP+97+LDvrAJbJuNvqIBNMQL1prN/2HN/D8T62WP9rh7tOAcsr01sMZA0RFHAA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-30T08:40:02.993801Z","bundle_sha256":"846ad2ffd6390ba9a412372b11662307123fa5d747ca4a8d574ad9f672f34d15"}}