{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:OGMPHBZEXBZVQ2PBKHCKKFRPSE","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"2147806537af8f993f2ef8eef17fd912504d5de8cd6ff1adee44dd85116954aa","cross_cats_sorted":["cs.AI","cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.HC","submitted_at":"2018-11-10T15:26:31Z","title_canon_sha256":"ef9e8771583056d26b116fc72fb944aa72cf6e476d9f5513bc8b962ee9921c4a"},"schema_version":"1.0","source":{"id":"1811.04272","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1811.04272","created_at":"2026-05-18T00:01:05Z"},{"alias_kind":"arxiv_version","alias_value":"1811.04272v1","created_at":"2026-05-18T00:01:05Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1811.04272","created_at":"2026-05-18T00:01:05Z"},{"alias_kind":"pith_short_12","alias_value":"OGMPHBZEXBZV","created_at":"2026-05-18T12:32:43Z"},{"alias_kind":"pith_short_16","alias_value":"OGMPHBZEXBZVQ2PB","created_at":"2026-05-18T12:32:43Z"},{"alias_kind":"pith_short_8","alias_value":"OGMPHBZE","created_at":"2026-05-18T12:32:43Z"}],"graph_snapshots":[{"event_id":"sha256:a8e54f88f87ca55382e757c6eddb3101c32251f189bc37dc2fb7e6bfe52d3976","target":"graph","created_at":"2026-05-18T00:01:05Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Providing reinforcement learning agents with informationally rich human knowledge can dramatically improve various aspects of learning. Prior work has developed different kinds of shaping methods that enable agents to learn efficiently in complex environments. All these methods, however, tailor human guidance to agents in specialized shaping procedures, thus embodying various characteristics and advantages in different domains. In this paper, we investigate the interplay between different shaping methods for more robust learning performance. We propose an adaptive shaping algorithm which is ca","authors_text":"Chao Yu, Dongxu Wang, Guangliang Li, Tianpei Yang, Wenxuan Zhu","cross_cats":["cs.AI","cs.LG"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.HC","submitted_at":"2018-11-10T15:26:31Z","title":"Learning Shaping Strategies in Human-in-the-loop Interactive Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1811.04272","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:0c881b21f3b8fb4cf100eb8a96585ba4529e6d368abf2a013384d9d30ac5af5a","target":"record","created_at":"2026-05-18T00:01:05Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"2147806537af8f993f2ef8eef17fd912504d5de8cd6ff1adee44dd85116954aa","cross_cats_sorted":["cs.AI","cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.HC","submitted_at":"2018-11-10T15:26:31Z","title_canon_sha256":"ef9e8771583056d26b116fc72fb944aa72cf6e476d9f5513bc8b962ee9921c4a"},"schema_version":"1.0","source":{"id":"1811.04272","kind":"arxiv","version":1}},"canonical_sha256":"7198f38724b8735869e151c4a5162f9122ff1785c29cda30ca6eafffce0ef436","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"7198f38724b8735869e151c4a5162f9122ff1785c29cda30ca6eafffce0ef436","first_computed_at":"2026-05-18T00:01:05.782928Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:01:05.782928Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"OB3AM1ZD3KaKl0O0AHeE1xGR2VprClwoEtcBvY8TTBo6L5Q1T3cBnmmyMLcElv3aTuC/+pBA9pJIuHZU9jtJAA==","signature_status":"signed_v1","signed_at":"2026-05-18T00:01:05.783546Z","signed_message":"canonical_sha256_bytes"},"source_id":"1811.04272","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:0c881b21f3b8fb4cf100eb8a96585ba4529e6d368abf2a013384d9d30ac5af5a","sha256:a8e54f88f87ca55382e757c6eddb3101c32251f189bc37dc2fb7e6bfe52d3976"],"state_sha256":"9b70feebe302c6eb653894b079e5d754f6a5e32c93e1df208863c1f886bd3f91"}