{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:FYBCNGCGZIFF52XD6V74RK4NWJ","short_pith_number":"pith:FYBCNGCG","canonical_record":{"source":{"id":"1904.06879","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2019-04-15T07:17:20Z","cross_cats_sorted":["cs.RO"],"title_canon_sha256":"0c78f2fb37006dae6d8ea9f6944a782b2d68860be37ff58e7b74e42d73c96541","abstract_canon_sha256":"be8ac5b78acfd8e1b8df4aa2d2e3e8937841241fa69bdbef3a96391be4ea092d"},"schema_version":"1.0"},"canonical_sha256":"2e02269846ca0a5eeae3f57fc8ab8db269b4e457a8614f1eae354bcb7b3f4caa","source":{"kind":"arxiv","id":"1904.06879","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1904.06879","created_at":"2026-05-17T23:48:35Z"},{"alias_kind":"arxiv_version","alias_value":"1904.06879v1","created_at":"2026-05-17T23:48:35Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1904.06879","created_at":"2026-05-17T23:48:35Z"},{"alias_kind":"pith_short_12","alias_value":"FYBCNGCGZIFF","created_at":"2026-05-18T12:33:18Z"},{"alias_kind":"pith_short_16","alias_value":"FYBCNGCGZIFF52XD","created_at":"2026-05-18T12:33:18Z"},{"alias_kind":"pith_short_8","alias_value":"FYBCNGCG","created_at":"2026-05-18T12:33:18Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:FYBCNGCGZIFF52XD6V74RK4NWJ","target":"record","payload":{"canonical_record":{"source":{"id":"1904.06879","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2019-04-15T07:17:20Z","cross_cats_sorted":["cs.RO"],"title_canon_sha256":"0c78f2fb37006dae6d8ea9f6944a782b2d68860be37ff58e7b74e42d73c96541","abstract_canon_sha256":"be8ac5b78acfd8e1b8df4aa2d2e3e8937841241fa69bdbef3a96391be4ea092d"},"schema_version":"1.0"},"canonical_sha256":"2e02269846ca0a5eeae3f57fc8ab8db269b4e457a8614f1eae354bcb7b3f4caa","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:48:35.903065Z","signature_b64":"jNi5l9ssZk1+7wd5K8ATo2UiLARqw1XdS8M8JZsUlYdK6XBHFc2Wsqw5OBuA4+qebIMlxd8KPX0MVIRoqChzCw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"2e02269846ca0a5eeae3f57fc8ab8db269b4e457a8614f1eae354bcb7b3f4caa","last_reissued_at":"2026-05-17T23:48:35.902420Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:48:35.902420Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1904.06879","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:48:35Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"r8+QNRkfb56YCJL0xpHEVt58Cm5SkIxJaEPx3M/dJtK5dJt+IhbqNdwOnlYzyFh+BFOakewYQFmrmmHpNMAuAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T07:49:19.671722Z"},"content_sha256":"0be8b90262d226188f7888292983219564e6cac88d0b2835919e9f6ff9da6bd2","schema_version":"1.0","event_id":"sha256:0be8b90262d226188f7888292983219564e6cac88d0b2835919e9f6ff9da6bd2"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:FYBCNGCGZIFF52XD6V74RK4NWJ","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Improving interactive reinforcement learning: What makes a good teacher?","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.RO"],"primary_cat":"cs.AI","authors_text":"Francisco Cruz, Stefan Wermter, Sven Magg, Yukie Nagai","submitted_at":"2019-04-15T07:17:20Z","abstract_excerpt":"Interactive reinforcement learning has become an important apprenticeship approach to speed up convergence in classic reinforcement learning problems. In this regard, a variant of interactive reinforcement learning is policy shaping which uses a parent-like trainer to propose the next action to be performed and by doing so reduces the search space by advice. On some occasions, the trainer may be another artificial agent which in turn was trained using reinforcement learning methods to afterward becoming an advisor for other learner-agents. In this work, we analyze internal representations and "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1904.06879","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:48:35Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"htiqm9yFNxWjqbz8sEX/SoUoVzDxsktoo2WpAxRvGs/+S2mdPH5lZMY8VzQ0PpOEtnxABUpT7NhqMkBhv/PuDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T07:49:19.672068Z"},"content_sha256":"9fbd40a05a8f526dadd69c50d0fb580e220a204b969cdd6dd392932cb2f3d0f0","schema_version":"1.0","event_id":"sha256:9fbd40a05a8f526dadd69c50d0fb580e220a204b969cdd6dd392932cb2f3d0f0"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/FYBCNGCGZIFF52XD6V74RK4NWJ/bundle.json","state_url":"https://pith.science/pith/FYBCNGCGZIFF52XD6V74RK4NWJ/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/FYBCNGCGZIFF52XD6V74RK4NWJ/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-30T07:49:19Z","links":{"resolver":"https://pith.science/pith/FYBCNGCGZIFF52XD6V74RK4NWJ","bundle":"https://pith.science/pith/FYBCNGCGZIFF52XD6V74RK4NWJ/bundle.json","state":"https://pith.science/pith/FYBCNGCGZIFF52XD6V74RK4NWJ/state.json","well_known_bundle":"https://pith.science/.well-known/pith/FYBCNGCGZIFF52XD6V74RK4NWJ/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:FYBCNGCGZIFF52XD6V74RK4NWJ","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"be8ac5b78acfd8e1b8df4aa2d2e3e8937841241fa69bdbef3a96391be4ea092d","cross_cats_sorted":["cs.RO"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2019-04-15T07:17:20Z","title_canon_sha256":"0c78f2fb37006dae6d8ea9f6944a782b2d68860be37ff58e7b74e42d73c96541"},"schema_version":"1.0","source":{"id":"1904.06879","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1904.06879","created_at":"2026-05-17T23:48:35Z"},{"alias_kind":"arxiv_version","alias_value":"1904.06879v1","created_at":"2026-05-17T23:48:35Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1904.06879","created_at":"2026-05-17T23:48:35Z"},{"alias_kind":"pith_short_12","alias_value":"FYBCNGCGZIFF","created_at":"2026-05-18T12:33:18Z"},{"alias_kind":"pith_short_16","alias_value":"FYBCNGCGZIFF52XD","created_at":"2026-05-18T12:33:18Z"},{"alias_kind":"pith_short_8","alias_value":"FYBCNGCG","created_at":"2026-05-18T12:33:18Z"}],"graph_snapshots":[{"event_id":"sha256:9fbd40a05a8f526dadd69c50d0fb580e220a204b969cdd6dd392932cb2f3d0f0","target":"graph","created_at":"2026-05-17T23:48:35Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Interactive reinforcement learning has become an important apprenticeship approach to speed up convergence in classic reinforcement learning problems. In this regard, a variant of interactive reinforcement learning is policy shaping which uses a parent-like trainer to propose the next action to be performed and by doing so reduces the search space by advice. On some occasions, the trainer may be another artificial agent which in turn was trained using reinforcement learning methods to afterward becoming an advisor for other learner-agents. In this work, we analyze internal representations and ","authors_text":"Francisco Cruz, Stefan Wermter, Sven Magg, Yukie Nagai","cross_cats":["cs.RO"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2019-04-15T07:17:20Z","title":"Improving interactive reinforcement learning: What makes a good teacher?"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1904.06879","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:0be8b90262d226188f7888292983219564e6cac88d0b2835919e9f6ff9da6bd2","target":"record","created_at":"2026-05-17T23:48:35Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"be8ac5b78acfd8e1b8df4aa2d2e3e8937841241fa69bdbef3a96391be4ea092d","cross_cats_sorted":["cs.RO"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2019-04-15T07:17:20Z","title_canon_sha256":"0c78f2fb37006dae6d8ea9f6944a782b2d68860be37ff58e7b74e42d73c96541"},"schema_version":"1.0","source":{"id":"1904.06879","kind":"arxiv","version":1}},"canonical_sha256":"2e02269846ca0a5eeae3f57fc8ab8db269b4e457a8614f1eae354bcb7b3f4caa","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"2e02269846ca0a5eeae3f57fc8ab8db269b4e457a8614f1eae354bcb7b3f4caa","first_computed_at":"2026-05-17T23:48:35.902420Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:48:35.902420Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"jNi5l9ssZk1+7wd5K8ATo2UiLARqw1XdS8M8JZsUlYdK6XBHFc2Wsqw5OBuA4+qebIMlxd8KPX0MVIRoqChzCw==","signature_status":"signed_v1","signed_at":"2026-05-17T23:48:35.903065Z","signed_message":"canonical_sha256_bytes"},"source_id":"1904.06879","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:0be8b90262d226188f7888292983219564e6cac88d0b2835919e9f6ff9da6bd2","sha256:9fbd40a05a8f526dadd69c50d0fb580e220a204b969cdd6dd392932cb2f3d0f0"],"state_sha256":"ffd5f79806c0084b3d837c5de7e3252a96bba3fefadfaa4349649b8b4feff345"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"A0Dn5AyYClP8a47NJuEBPBp2x6RqE1lNFk46B3yTSf8P6nwq7me9D3sO2WKRcaN7hCYhIPNph0bOxgQ40db2BA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-30T07:49:19.674006Z","bundle_sha256":"5089e125ea11c8f610d81eb461850b446948c109c7706be2c4ba9a122581307a"}}