{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:PGCH6K75GESQM6UKCIFWYWZ6PR","short_pith_number":"pith:PGCH6K75","canonical_record":{"source":{"id":"1904.08621","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2019-04-18T07:45:36Z","cross_cats_sorted":["cs.HC","cs.LG"],"title_canon_sha256":"db61b4bec26dfa7cb56d3e0bd40c42af5c04aba63a5cfed79b8f0b4222146edc","abstract_canon_sha256":"0b3845a54cfdf84a81d20e283790ba39a27d6f89b4853e17e976ceab08e3af05"},"schema_version":"1.0"},"canonical_sha256":"79847f2bfd3125067a8a120b6c5b3e7c5484d3e2f33a4dff3773a97f09feea5d","source":{"kind":"arxiv","id":"1904.08621","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1904.08621","created_at":"2026-05-17T23:48:13Z"},{"alias_kind":"arxiv_version","alias_value":"1904.08621v1","created_at":"2026-05-17T23:48:13Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1904.08621","created_at":"2026-05-17T23:48:13Z"},{"alias_kind":"pith_short_12","alias_value":"PGCH6K75GESQ","created_at":"2026-05-18T12:33:24Z"},{"alias_kind":"pith_short_16","alias_value":"PGCH6K75GESQM6UK","created_at":"2026-05-18T12:33:24Z"},{"alias_kind":"pith_short_8","alias_value":"PGCH6K75","created_at":"2026-05-18T12:33:24Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:PGCH6K75GESQM6UKCIFWYWZ6PR","target":"record","payload":{"canonical_record":{"source":{"id":"1904.08621","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2019-04-18T07:45:36Z","cross_cats_sorted":["cs.HC","cs.LG"],"title_canon_sha256":"db61b4bec26dfa7cb56d3e0bd40c42af5c04aba63a5cfed79b8f0b4222146edc","abstract_canon_sha256":"0b3845a54cfdf84a81d20e283790ba39a27d6f89b4853e17e976ceab08e3af05"},"schema_version":"1.0"},"canonical_sha256":"79847f2bfd3125067a8a120b6c5b3e7c5484d3e2f33a4dff3773a97f09feea5d","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:48:13.679931Z","signature_b64":"gwxMFgHBSNuNnE28T3loHZwjnj7MtuXxWxGHtdJKuRI/ur+cCopmFqjL9eVUbxQFSBOSW8NJP9yHSzYqy+6VDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"79847f2bfd3125067a8a120b6c5b3e7c5484d3e2f33a4dff3773a97f09feea5d","last_reissued_at":"2026-05-17T23:48:13.679327Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:48:13.679327Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1904.08621","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:48:13Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"z44pbVo1bOiJJUnvzFd9O+A8tuGqA4QXBgNdLKPAZdJbiFY5Ir7MeJjTOcn/nw5L7I3EG1pdxXVSXJSmAkXAAw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T16:24:13.659668Z"},"content_sha256":"6f4edbce06286af6c87a6660f6fe2473ba684dc0fc3dec90fea7db8775b7dc7e","schema_version":"1.0","event_id":"sha256:6f4edbce06286af6c87a6660f6fe2473ba684dc0fc3dec90fea7db8775b7dc7e"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:PGCH6K75GESQM6UKCIFWYWZ6PR","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Improving Interactive Reinforcement Agent Planning with Human Demonstration","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.HC","cs.LG"],"primary_cat":"cs.AI","authors_text":"Bo He, Guangliang Li, Jinying Lin, Keisuke Nakamura, Qilei Zhang, Randy Gomez","submitted_at":"2019-04-18T07:45:36Z","abstract_excerpt":"TAMER has proven to be a powerful interactive reinforcement learning method for allowing ordinary people to teach and personalize autonomous agents' behavior by providing evaluative feedback. However, a TAMER agent planning with UCT---a Monte Carlo Tree Search strategy, can only update states along its path and might induce high learning cost especially for a physical robot. In this paper, we propose to drive the agent's exploration along the optimal path and reduce the learning cost by initializing the agent's reward function via inverse reinforcement learning from demonstration. We test our "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1904.08621","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:48:13Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"vZuM9ApFDf+QvS0OwgFqWDxlqNNdW/y+t7+sd5IxOwxOp7LAePKVUmYlsBUKugx98wCQ6kvX6BZjBaMrUTG1DA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T16:24:13.660018Z"},"content_sha256":"ce0e90adddcacf79310c4ae1a9155fc0fa4a569cbbbacaf7b4cac9def39189bd","schema_version":"1.0","event_id":"sha256:ce0e90adddcacf79310c4ae1a9155fc0fa4a569cbbbacaf7b4cac9def39189bd"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/PGCH6K75GESQM6UKCIFWYWZ6PR/bundle.json","state_url":"https://pith.science/pith/PGCH6K75GESQM6UKCIFWYWZ6PR/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/PGCH6K75GESQM6UKCIFWYWZ6PR/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-28T16:24:13Z","links":{"resolver":"https://pith.science/pith/PGCH6K75GESQM6UKCIFWYWZ6PR","bundle":"https://pith.science/pith/PGCH6K75GESQM6UKCIFWYWZ6PR/bundle.json","state":"https://pith.science/pith/PGCH6K75GESQM6UKCIFWYWZ6PR/state.json","well_known_bundle":"https://pith.science/.well-known/pith/PGCH6K75GESQM6UKCIFWYWZ6PR/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:PGCH6K75GESQM6UKCIFWYWZ6PR","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"0b3845a54cfdf84a81d20e283790ba39a27d6f89b4853e17e976ceab08e3af05","cross_cats_sorted":["cs.HC","cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2019-04-18T07:45:36Z","title_canon_sha256":"db61b4bec26dfa7cb56d3e0bd40c42af5c04aba63a5cfed79b8f0b4222146edc"},"schema_version":"1.0","source":{"id":"1904.08621","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1904.08621","created_at":"2026-05-17T23:48:13Z"},{"alias_kind":"arxiv_version","alias_value":"1904.08621v1","created_at":"2026-05-17T23:48:13Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1904.08621","created_at":"2026-05-17T23:48:13Z"},{"alias_kind":"pith_short_12","alias_value":"PGCH6K75GESQ","created_at":"2026-05-18T12:33:24Z"},{"alias_kind":"pith_short_16","alias_value":"PGCH6K75GESQM6UK","created_at":"2026-05-18T12:33:24Z"},{"alias_kind":"pith_short_8","alias_value":"PGCH6K75","created_at":"2026-05-18T12:33:24Z"}],"graph_snapshots":[{"event_id":"sha256:ce0e90adddcacf79310c4ae1a9155fc0fa4a569cbbbacaf7b4cac9def39189bd","target":"graph","created_at":"2026-05-17T23:48:13Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"TAMER has proven to be a powerful interactive reinforcement learning method for allowing ordinary people to teach and personalize autonomous agents' behavior by providing evaluative feedback. However, a TAMER agent planning with UCT---a Monte Carlo Tree Search strategy, can only update states along its path and might induce high learning cost especially for a physical robot. In this paper, we propose to drive the agent's exploration along the optimal path and reduce the learning cost by initializing the agent's reward function via inverse reinforcement learning from demonstration. We test our ","authors_text":"Bo He, Guangliang Li, Jinying Lin, Keisuke Nakamura, Qilei Zhang, Randy Gomez","cross_cats":["cs.HC","cs.LG"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2019-04-18T07:45:36Z","title":"Improving Interactive Reinforcement Agent Planning with Human Demonstration"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1904.08621","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:6f4edbce06286af6c87a6660f6fe2473ba684dc0fc3dec90fea7db8775b7dc7e","target":"record","created_at":"2026-05-17T23:48:13Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"0b3845a54cfdf84a81d20e283790ba39a27d6f89b4853e17e976ceab08e3af05","cross_cats_sorted":["cs.HC","cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2019-04-18T07:45:36Z","title_canon_sha256":"db61b4bec26dfa7cb56d3e0bd40c42af5c04aba63a5cfed79b8f0b4222146edc"},"schema_version":"1.0","source":{"id":"1904.08621","kind":"arxiv","version":1}},"canonical_sha256":"79847f2bfd3125067a8a120b6c5b3e7c5484d3e2f33a4dff3773a97f09feea5d","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"79847f2bfd3125067a8a120b6c5b3e7c5484d3e2f33a4dff3773a97f09feea5d","first_computed_at":"2026-05-17T23:48:13.679327Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:48:13.679327Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"gwxMFgHBSNuNnE28T3loHZwjnj7MtuXxWxGHtdJKuRI/ur+cCopmFqjL9eVUbxQFSBOSW8NJP9yHSzYqy+6VDQ==","signature_status":"signed_v1","signed_at":"2026-05-17T23:48:13.679931Z","signed_message":"canonical_sha256_bytes"},"source_id":"1904.08621","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:6f4edbce06286af6c87a6660f6fe2473ba684dc0fc3dec90fea7db8775b7dc7e","sha256:ce0e90adddcacf79310c4ae1a9155fc0fa4a569cbbbacaf7b4cac9def39189bd"],"state_sha256":"b0792fba05673189f4ca003d18c5a15c470d2760727db2ed5592c962926c0564"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"x3lyOp3UT39T20IGhZ4CVWak0x3IrgGlo0pzotSvg2Av+JaT81tsQa+NT/HSKxpiOhQ6vn7KlFXn72drj0QGAQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-28T16:24:13.662075Z","bundle_sha256":"d18bafefc03bf15cc2f53a6fd0d8ce7f9d41e9060372308b7c6dc2a787cff027"}}