{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:4ASLQN4Y5426QHPVA7RQXYSCNC","short_pith_number":"pith:4ASLQN4Y","canonical_record":{"source":{"id":"1707.09079","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-07-28T00:33:53Z","cross_cats_sorted":[],"title_canon_sha256":"7ffdb3cf23c808d600ad8126034d5b2f1cb24b64f686498f759a2a47ee77fc1d","abstract_canon_sha256":"f81af70c42812080d6d7dd57f43224afb124934778c4c4f5f7b8a4ad3e20bef4"},"schema_version":"1.0"},"canonical_sha256":"e024b83798ef35e81df507e30be242688c4e9a4774c1c4fb39f2d1c31e937bce","source":{"kind":"arxiv","id":"1707.09079","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1707.09079","created_at":"2026-05-18T00:28:22Z"},{"alias_kind":"arxiv_version","alias_value":"1707.09079v1","created_at":"2026-05-18T00:28:22Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1707.09079","created_at":"2026-05-18T00:28:22Z"},{"alias_kind":"pith_short_12","alias_value":"4ASLQN4Y5426","created_at":"2026-05-18T12:30:58Z"},{"alias_kind":"pith_short_16","alias_value":"4ASLQN4Y5426QHPV","created_at":"2026-05-18T12:30:58Z"},{"alias_kind":"pith_short_8","alias_value":"4ASLQN4Y","created_at":"2026-05-18T12:30:58Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:4ASLQN4Y5426QHPVA7RQXYSCNC","target":"record","payload":{"canonical_record":{"source":{"id":"1707.09079","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-07-28T00:33:53Z","cross_cats_sorted":[],"title_canon_sha256":"7ffdb3cf23c808d600ad8126034d5b2f1cb24b64f686498f759a2a47ee77fc1d","abstract_canon_sha256":"f81af70c42812080d6d7dd57f43224afb124934778c4c4f5f7b8a4ad3e20bef4"},"schema_version":"1.0"},"canonical_sha256":"e024b83798ef35e81df507e30be242688c4e9a4774c1c4fb39f2d1c31e937bce","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:28:22.891800Z","signature_b64":"HGXraLxra+K4AgDUQfx8uK/y96yLMOqfjER+zSaLC21AmiWC1zeBBrZrxxd8BKqSpafc9bLgcB9POBo9UIffBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"e024b83798ef35e81df507e30be242688c4e9a4774c1c4fb39f2d1c31e937bce","last_reissued_at":"2026-05-18T00:28:22.891272Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:28:22.891272Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1707.09079","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:28:22Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"0OXKUgGdhRNooB2EneMcaZ/3jpC+NF+MABTHQMwF5MNMIyFBjCPZfLyAnv0VSJDnHsu7cawWLYElHQoHHUvzAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T10:41:15.850006Z"},"content_sha256":"e6e3033f7a8ae84fcfc6b3b24a86950b1711c3679e185b4a5c90562a60079caa","schema_version":"1.0","event_id":"sha256:e6e3033f7a8ae84fcfc6b3b24a86950b1711c3679e185b4a5c90562a60079caa"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:4ASLQN4Y5426QHPVA7RQXYSCNC","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Learning to Teach Reinforcement Learning Agents","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Anestis Fachantidis, Ioannis Vlahavas, Matthew E. Taylor","submitted_at":"2017-07-28T00:33:53Z","abstract_excerpt":"In this article we study the transfer learning model of action advice under a budget. We focus on reinforcement learning teachers providing action advice to heterogeneous students playing the game of Pac-Man under a limited advice budget. First, we examine several critical factors affecting advice quality in this setting, such as the average performance of the teacher, its variance and the importance of reward discounting in advising. The experiments show the non-trivial importance of the coefficient of variation (CV) as a statistic for choosing policies that generate advice. The CV statistic "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1707.09079","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:28:22Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"dvzyNCHlyVj7ZqNljG6r2tQuAoG3WkC510n4SIWFC54vJl6ZSfYdJdT6VEPI8U7rmHaNkA/vgkyS/B/wjFWIAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T10:41:15.850562Z"},"content_sha256":"8b0f4a49d309d2787e31f7e50082e189f14ce924f71af3ec78972f3a41c8d6e6","schema_version":"1.0","event_id":"sha256:8b0f4a49d309d2787e31f7e50082e189f14ce924f71af3ec78972f3a41c8d6e6"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/4ASLQN4Y5426QHPVA7RQXYSCNC/bundle.json","state_url":"https://pith.science/pith/4ASLQN4Y5426QHPVA7RQXYSCNC/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/4ASLQN4Y5426QHPVA7RQXYSCNC/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-30T10:41:15Z","links":{"resolver":"https://pith.science/pith/4ASLQN4Y5426QHPVA7RQXYSCNC","bundle":"https://pith.science/pith/4ASLQN4Y5426QHPVA7RQXYSCNC/bundle.json","state":"https://pith.science/pith/4ASLQN4Y5426QHPVA7RQXYSCNC/state.json","well_known_bundle":"https://pith.science/.well-known/pith/4ASLQN4Y5426QHPVA7RQXYSCNC/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:4ASLQN4Y5426QHPVA7RQXYSCNC","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"f81af70c42812080d6d7dd57f43224afb124934778c4c4f5f7b8a4ad3e20bef4","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-07-28T00:33:53Z","title_canon_sha256":"7ffdb3cf23c808d600ad8126034d5b2f1cb24b64f686498f759a2a47ee77fc1d"},"schema_version":"1.0","source":{"id":"1707.09079","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1707.09079","created_at":"2026-05-18T00:28:22Z"},{"alias_kind":"arxiv_version","alias_value":"1707.09079v1","created_at":"2026-05-18T00:28:22Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1707.09079","created_at":"2026-05-18T00:28:22Z"},{"alias_kind":"pith_short_12","alias_value":"4ASLQN4Y5426","created_at":"2026-05-18T12:30:58Z"},{"alias_kind":"pith_short_16","alias_value":"4ASLQN4Y5426QHPV","created_at":"2026-05-18T12:30:58Z"},{"alias_kind":"pith_short_8","alias_value":"4ASLQN4Y","created_at":"2026-05-18T12:30:58Z"}],"graph_snapshots":[{"event_id":"sha256:8b0f4a49d309d2787e31f7e50082e189f14ce924f71af3ec78972f3a41c8d6e6","target":"graph","created_at":"2026-05-18T00:28:22Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"In this article we study the transfer learning model of action advice under a budget. We focus on reinforcement learning teachers providing action advice to heterogeneous students playing the game of Pac-Man under a limited advice budget. First, we examine several critical factors affecting advice quality in this setting, such as the average performance of the teacher, its variance and the importance of reward discounting in advising. The experiments show the non-trivial importance of the coefficient of variation (CV) as a statistic for choosing policies that generate advice. The CV statistic ","authors_text":"Anestis Fachantidis, Ioannis Vlahavas, Matthew E. Taylor","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-07-28T00:33:53Z","title":"Learning to Teach Reinforcement Learning Agents"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1707.09079","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:e6e3033f7a8ae84fcfc6b3b24a86950b1711c3679e185b4a5c90562a60079caa","target":"record","created_at":"2026-05-18T00:28:22Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"f81af70c42812080d6d7dd57f43224afb124934778c4c4f5f7b8a4ad3e20bef4","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-07-28T00:33:53Z","title_canon_sha256":"7ffdb3cf23c808d600ad8126034d5b2f1cb24b64f686498f759a2a47ee77fc1d"},"schema_version":"1.0","source":{"id":"1707.09079","kind":"arxiv","version":1}},"canonical_sha256":"e024b83798ef35e81df507e30be242688c4e9a4774c1c4fb39f2d1c31e937bce","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"e024b83798ef35e81df507e30be242688c4e9a4774c1c4fb39f2d1c31e937bce","first_computed_at":"2026-05-18T00:28:22.891272Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:28:22.891272Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"HGXraLxra+K4AgDUQfx8uK/y96yLMOqfjER+zSaLC21AmiWC1zeBBrZrxxd8BKqSpafc9bLgcB9POBo9UIffBg==","signature_status":"signed_v1","signed_at":"2026-05-18T00:28:22.891800Z","signed_message":"canonical_sha256_bytes"},"source_id":"1707.09079","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:e6e3033f7a8ae84fcfc6b3b24a86950b1711c3679e185b4a5c90562a60079caa","sha256:8b0f4a49d309d2787e31f7e50082e189f14ce924f71af3ec78972f3a41c8d6e6"],"state_sha256":"256286a861bbf0c50c9ca64908fa45f629465a989e86fcc6d730f3aeb36bbe43"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"lG24GGRgXCadI926zS70ZfMxF+8vjq2YsN5bQX+JGybO0eQAWhT0zURJEV1XLlcCTVv527qFru81YRM5gfldBQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-30T10:41:15.854961Z","bundle_sha256":"a6601d96736980db759fb722b5708bcb0e0f0a288d6299f9d4b25a922eb9d9ba"}}