{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:OGMPHBZEXBZVQ2PBKHCKKFRPSE","short_pith_number":"pith:OGMPHBZE","canonical_record":{"source":{"id":"1811.04272","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.HC","submitted_at":"2018-11-10T15:26:31Z","cross_cats_sorted":["cs.AI","cs.LG"],"title_canon_sha256":"ef9e8771583056d26b116fc72fb944aa72cf6e476d9f5513bc8b962ee9921c4a","abstract_canon_sha256":"2147806537af8f993f2ef8eef17fd912504d5de8cd6ff1adee44dd85116954aa"},"schema_version":"1.0"},"canonical_sha256":"7198f38724b8735869e151c4a5162f9122ff1785c29cda30ca6eafffce0ef436","source":{"kind":"arxiv","id":"1811.04272","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1811.04272","created_at":"2026-05-18T00:01:05Z"},{"alias_kind":"arxiv_version","alias_value":"1811.04272v1","created_at":"2026-05-18T00:01:05Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1811.04272","created_at":"2026-05-18T00:01:05Z"},{"alias_kind":"pith_short_12","alias_value":"OGMPHBZEXBZV","created_at":"2026-05-18T12:32:43Z"},{"alias_kind":"pith_short_16","alias_value":"OGMPHBZEXBZVQ2PB","created_at":"2026-05-18T12:32:43Z"},{"alias_kind":"pith_short_8","alias_value":"OGMPHBZE","created_at":"2026-05-18T12:32:43Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:OGMPHBZEXBZVQ2PBKHCKKFRPSE","target":"record","payload":{"canonical_record":{"source":{"id":"1811.04272","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.HC","submitted_at":"2018-11-10T15:26:31Z","cross_cats_sorted":["cs.AI","cs.LG"],"title_canon_sha256":"ef9e8771583056d26b116fc72fb944aa72cf6e476d9f5513bc8b962ee9921c4a","abstract_canon_sha256":"2147806537af8f993f2ef8eef17fd912504d5de8cd6ff1adee44dd85116954aa"},"schema_version":"1.0"},"canonical_sha256":"7198f38724b8735869e151c4a5162f9122ff1785c29cda30ca6eafffce0ef436","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:01:05.783546Z","signature_b64":"OB3AM1ZD3KaKl0O0AHeE1xGR2VprClwoEtcBvY8TTBo6L5Q1T3cBnmmyMLcElv3aTuC/+pBA9pJIuHZU9jtJAA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"7198f38724b8735869e151c4a5162f9122ff1785c29cda30ca6eafffce0ef436","last_reissued_at":"2026-05-18T00:01:05.782928Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:01:05.782928Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1811.04272","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:01:05Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"OG7i+YdCBzn17WePbqr4zfKJYYW+gIWRKAmIzxpZ2kPbTJ4VdpqNO6uOOEWOpCZE+sV58Q1ERBZXEZ2I6MaVDw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-24T03:43:22.303923Z"},"content_sha256":"0c881b21f3b8fb4cf100eb8a96585ba4529e6d368abf2a013384d9d30ac5af5a","schema_version":"1.0","event_id":"sha256:0c881b21f3b8fb4cf100eb8a96585ba4529e6d368abf2a013384d9d30ac5af5a"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:OGMPHBZEXBZVQ2PBKHCKKFRPSE","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Learning Shaping Strategies in Human-in-the-loop Interactive Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.LG"],"primary_cat":"cs.HC","authors_text":"Chao Yu, Dongxu Wang, Guangliang Li, Tianpei Yang, Wenxuan Zhu","submitted_at":"2018-11-10T15:26:31Z","abstract_excerpt":"Providing reinforcement learning agents with informationally rich human knowledge can dramatically improve various aspects of learning. Prior work has developed different kinds of shaping methods that enable agents to learn efficiently in complex environments. All these methods, however, tailor human guidance to agents in specialized shaping procedures, thus embodying various characteristics and advantages in different domains. In this paper, we investigate the interplay between different shaping methods for more robust learning performance. We propose an adaptive shaping algorithm which is ca"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1811.04272","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:01:05Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"7DVhOEba7gEFNGpHfzq4DJH6w84WmWImpI6aaxTeft7pL4N78I2lue8heHBEMVLAxHtTdAXNunWE0lL78d34AA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-24T03:43:22.304593Z"},"content_sha256":"a8e54f88f87ca55382e757c6eddb3101c32251f189bc37dc2fb7e6bfe52d3976","schema_version":"1.0","event_id":"sha256:a8e54f88f87ca55382e757c6eddb3101c32251f189bc37dc2fb7e6bfe52d3976"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/OGMPHBZEXBZVQ2PBKHCKKFRPSE/bundle.json","state_url":"https://pith.science/pith/OGMPHBZEXBZVQ2PBKHCKKFRPSE/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/OGMPHBZEXBZVQ2PBKHCKKFRPSE/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-24T03:43:22Z","links":{"resolver":"https://pith.science/pith/OGMPHBZEXBZVQ2PBKHCKKFRPSE","bundle":"https://pith.science/pith/OGMPHBZEXBZVQ2PBKHCKKFRPSE/bundle.json","state":"https://pith.science/pith/OGMPHBZEXBZVQ2PBKHCKKFRPSE/state.json","well_known_bundle":"https://pith.science/.well-known/pith/OGMPHBZEXBZVQ2PBKHCKKFRPSE/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:OGMPHBZEXBZVQ2PBKHCKKFRPSE","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"2147806537af8f993f2ef8eef17fd912504d5de8cd6ff1adee44dd85116954aa","cross_cats_sorted":["cs.AI","cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.HC","submitted_at":"2018-11-10T15:26:31Z","title_canon_sha256":"ef9e8771583056d26b116fc72fb944aa72cf6e476d9f5513bc8b962ee9921c4a"},"schema_version":"1.0","source":{"id":"1811.04272","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1811.04272","created_at":"2026-05-18T00:01:05Z"},{"alias_kind":"arxiv_version","alias_value":"1811.04272v1","created_at":"2026-05-18T00:01:05Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1811.04272","created_at":"2026-05-18T00:01:05Z"},{"alias_kind":"pith_short_12","alias_value":"OGMPHBZEXBZV","created_at":"2026-05-18T12:32:43Z"},{"alias_kind":"pith_short_16","alias_value":"OGMPHBZEXBZVQ2PB","created_at":"2026-05-18T12:32:43Z"},{"alias_kind":"pith_short_8","alias_value":"OGMPHBZE","created_at":"2026-05-18T12:32:43Z"}],"graph_snapshots":[{"event_id":"sha256:a8e54f88f87ca55382e757c6eddb3101c32251f189bc37dc2fb7e6bfe52d3976","target":"graph","created_at":"2026-05-18T00:01:05Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Providing reinforcement learning agents with informationally rich human knowledge can dramatically improve various aspects of learning. Prior work has developed different kinds of shaping methods that enable agents to learn efficiently in complex environments. All these methods, however, tailor human guidance to agents in specialized shaping procedures, thus embodying various characteristics and advantages in different domains. In this paper, we investigate the interplay between different shaping methods for more robust learning performance. We propose an adaptive shaping algorithm which is ca","authors_text":"Chao Yu, Dongxu Wang, Guangliang Li, Tianpei Yang, Wenxuan Zhu","cross_cats":["cs.AI","cs.LG"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.HC","submitted_at":"2018-11-10T15:26:31Z","title":"Learning Shaping Strategies in Human-in-the-loop Interactive Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1811.04272","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:0c881b21f3b8fb4cf100eb8a96585ba4529e6d368abf2a013384d9d30ac5af5a","target":"record","created_at":"2026-05-18T00:01:05Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"2147806537af8f993f2ef8eef17fd912504d5de8cd6ff1adee44dd85116954aa","cross_cats_sorted":["cs.AI","cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.HC","submitted_at":"2018-11-10T15:26:31Z","title_canon_sha256":"ef9e8771583056d26b116fc72fb944aa72cf6e476d9f5513bc8b962ee9921c4a"},"schema_version":"1.0","source":{"id":"1811.04272","kind":"arxiv","version":1}},"canonical_sha256":"7198f38724b8735869e151c4a5162f9122ff1785c29cda30ca6eafffce0ef436","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"7198f38724b8735869e151c4a5162f9122ff1785c29cda30ca6eafffce0ef436","first_computed_at":"2026-05-18T00:01:05.782928Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:01:05.782928Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"OB3AM1ZD3KaKl0O0AHeE1xGR2VprClwoEtcBvY8TTBo6L5Q1T3cBnmmyMLcElv3aTuC/+pBA9pJIuHZU9jtJAA==","signature_status":"signed_v1","signed_at":"2026-05-18T00:01:05.783546Z","signed_message":"canonical_sha256_bytes"},"source_id":"1811.04272","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:0c881b21f3b8fb4cf100eb8a96585ba4529e6d368abf2a013384d9d30ac5af5a","sha256:a8e54f88f87ca55382e757c6eddb3101c32251f189bc37dc2fb7e6bfe52d3976"],"state_sha256":"9b70feebe302c6eb653894b079e5d754f6a5e32c93e1df208863c1f886bd3f91"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"dFzbTx1uo9HWXSUMNnunXWEV0xpwf0Zxb9PPZUEGRgR4jO8tjve0w8StpuY+TR9lLbNG4cjGlGoZiteK7f7EAA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-24T03:43:22.307894Z","bundle_sha256":"76cf87ad53abbaad127d34e1e8cb5d17d2b3e8bf640a334b42cc3cecdf197cb0"}}