{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:AXLYFOLRWMMY2GPCRAEBC7LDJ6","short_pith_number":"pith:AXLYFOLR","canonical_record":{"source":{"id":"1709.10163","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-09-28T20:43:40Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"bedf0ec50bf5cb392193320a71408c9ad96ac11a65c02efcc97d1c889737c843","abstract_canon_sha256":"adfc13cd956993cb76cc92dc23064ced67cd2ccb94dd3fa37e1307ea41b539d4"},"schema_version":"1.0"},"canonical_sha256":"05d782b971b3198d19e28808117d634fb63ceb322c58b8ac3a95e91741de723c","source":{"kind":"arxiv","id":"1709.10163","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1709.10163","created_at":"2026-05-18T00:25:28Z"},{"alias_kind":"arxiv_version","alias_value":"1709.10163v2","created_at":"2026-05-18T00:25:28Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1709.10163","created_at":"2026-05-18T00:25:28Z"},{"alias_kind":"pith_short_12","alias_value":"AXLYFOLRWMMY","created_at":"2026-05-18T12:31:08Z"},{"alias_kind":"pith_short_16","alias_value":"AXLYFOLRWMMY2GPC","created_at":"2026-05-18T12:31:08Z"},{"alias_kind":"pith_short_8","alias_value":"AXLYFOLR","created_at":"2026-05-18T12:31:08Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:AXLYFOLRWMMY2GPCRAEBC7LDJ6","target":"record","payload":{"canonical_record":{"source":{"id":"1709.10163","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-09-28T20:43:40Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"bedf0ec50bf5cb392193320a71408c9ad96ac11a65c02efcc97d1c889737c843","abstract_canon_sha256":"adfc13cd956993cb76cc92dc23064ced67cd2ccb94dd3fa37e1307ea41b539d4"},"schema_version":"1.0"},"canonical_sha256":"05d782b971b3198d19e28808117d634fb63ceb322c58b8ac3a95e91741de723c","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:25:28.759339Z","signature_b64":"CLxEAoxJoPMGC4rUEksWrKrHnGS6QXRLgjHGS0HbPFCJMM3fAuZ2xLbwsQ4tFjEBXPF1sTmOGOqFuX/bRi+aDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"05d782b971b3198d19e28808117d634fb63ceb322c58b8ac3a95e91741de723c","last_reissued_at":"2026-05-18T00:25:28.758403Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:25:28.758403Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1709.10163","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:25:28Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"9FpvqD7hOXKN0dSjQnBtQGP0IT+axeg536NaulOZsnUEpaQuuxHkHqgThSTkTSXA7PKggndhADlRtrvwQ+vTDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T20:46:30.393882Z"},"content_sha256":"bbf8a3114b8c2be3db7bf70558ad85232e4b35f3023b0653436afd0601f604e8","schema_version":"1.0","event_id":"sha256:bbf8a3114b8c2be3db7bf70558ad85232e4b35f3023b0653436afd0601f604e8"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:AXLYFOLRWMMY2GPCRAEBC7LDJ6","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Deep TAMER: Interactive Agent Shaping in High-Dimensional State Spaces","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.AI","authors_text":"Garrett Warnell, Nicholas Waytowich, Peter Stone, Vernon Lawhern","submitted_at":"2017-09-28T20:43:40Z","abstract_excerpt":"While recent advances in deep reinforcement learning have allowed autonomous learning agents to succeed at a variety of complex tasks, existing algorithms generally require a lot of training data. One way to increase the speed at which agents are able to learn to perform tasks is by leveraging the input of human trainers. Although such input can take many forms, real-time, scalar-valued feedback is especially useful in situations where it proves difficult or impossible for humans to provide expert demonstrations. Previous approaches have shown the usefulness of human input provided in this fas"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1709.10163","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:25:28Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"a7njNYWhQGHBOCXnOVpY8/JJLhYwyFh5rSrX9C+qKlTkq1wdJGLGgfxL/Y/kOuiXGEaU2jmoRWQLsUf1zxhtBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T20:46:30.394663Z"},"content_sha256":"c1afbf56a6e6aff8a95382b4afa7a7755f223d0658c93370f88be85c3127d42c","schema_version":"1.0","event_id":"sha256:c1afbf56a6e6aff8a95382b4afa7a7755f223d0658c93370f88be85c3127d42c"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/AXLYFOLRWMMY2GPCRAEBC7LDJ6/bundle.json","state_url":"https://pith.science/pith/AXLYFOLRWMMY2GPCRAEBC7LDJ6/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/AXLYFOLRWMMY2GPCRAEBC7LDJ6/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-31T20:46:30Z","links":{"resolver":"https://pith.science/pith/AXLYFOLRWMMY2GPCRAEBC7LDJ6","bundle":"https://pith.science/pith/AXLYFOLRWMMY2GPCRAEBC7LDJ6/bundle.json","state":"https://pith.science/pith/AXLYFOLRWMMY2GPCRAEBC7LDJ6/state.json","well_known_bundle":"https://pith.science/.well-known/pith/AXLYFOLRWMMY2GPCRAEBC7LDJ6/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:AXLYFOLRWMMY2GPCRAEBC7LDJ6","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"adfc13cd956993cb76cc92dc23064ced67cd2ccb94dd3fa37e1307ea41b539d4","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-09-28T20:43:40Z","title_canon_sha256":"bedf0ec50bf5cb392193320a71408c9ad96ac11a65c02efcc97d1c889737c843"},"schema_version":"1.0","source":{"id":"1709.10163","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1709.10163","created_at":"2026-05-18T00:25:28Z"},{"alias_kind":"arxiv_version","alias_value":"1709.10163v2","created_at":"2026-05-18T00:25:28Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1709.10163","created_at":"2026-05-18T00:25:28Z"},{"alias_kind":"pith_short_12","alias_value":"AXLYFOLRWMMY","created_at":"2026-05-18T12:31:08Z"},{"alias_kind":"pith_short_16","alias_value":"AXLYFOLRWMMY2GPC","created_at":"2026-05-18T12:31:08Z"},{"alias_kind":"pith_short_8","alias_value":"AXLYFOLR","created_at":"2026-05-18T12:31:08Z"}],"graph_snapshots":[{"event_id":"sha256:c1afbf56a6e6aff8a95382b4afa7a7755f223d0658c93370f88be85c3127d42c","target":"graph","created_at":"2026-05-18T00:25:28Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"While recent advances in deep reinforcement learning have allowed autonomous learning agents to succeed at a variety of complex tasks, existing algorithms generally require a lot of training data. One way to increase the speed at which agents are able to learn to perform tasks is by leveraging the input of human trainers. Although such input can take many forms, real-time, scalar-valued feedback is especially useful in situations where it proves difficult or impossible for humans to provide expert demonstrations. Previous approaches have shown the usefulness of human input provided in this fas","authors_text":"Garrett Warnell, Nicholas Waytowich, Peter Stone, Vernon Lawhern","cross_cats":["cs.LG"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-09-28T20:43:40Z","title":"Deep TAMER: Interactive Agent Shaping in High-Dimensional State Spaces"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1709.10163","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:bbf8a3114b8c2be3db7bf70558ad85232e4b35f3023b0653436afd0601f604e8","target":"record","created_at":"2026-05-18T00:25:28Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"adfc13cd956993cb76cc92dc23064ced67cd2ccb94dd3fa37e1307ea41b539d4","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-09-28T20:43:40Z","title_canon_sha256":"bedf0ec50bf5cb392193320a71408c9ad96ac11a65c02efcc97d1c889737c843"},"schema_version":"1.0","source":{"id":"1709.10163","kind":"arxiv","version":2}},"canonical_sha256":"05d782b971b3198d19e28808117d634fb63ceb322c58b8ac3a95e91741de723c","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"05d782b971b3198d19e28808117d634fb63ceb322c58b8ac3a95e91741de723c","first_computed_at":"2026-05-18T00:25:28.758403Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:25:28.758403Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"CLxEAoxJoPMGC4rUEksWrKrHnGS6QXRLgjHGS0HbPFCJMM3fAuZ2xLbwsQ4tFjEBXPF1sTmOGOqFuX/bRi+aDQ==","signature_status":"signed_v1","signed_at":"2026-05-18T00:25:28.759339Z","signed_message":"canonical_sha256_bytes"},"source_id":"1709.10163","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:bbf8a3114b8c2be3db7bf70558ad85232e4b35f3023b0653436afd0601f604e8","sha256:c1afbf56a6e6aff8a95382b4afa7a7755f223d0658c93370f88be85c3127d42c"],"state_sha256":"c9582d0247bd6a89514931bb980677962a35ad04dda882a62e911b46c23a13e0"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"kJ3JG++zP4NwwqVAk4orvjW1RsMxyZRMawHv7lGDBRFAozFIubocwKO8Mo35+I6053ElLycx7XzAGHMkUFUABw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-31T20:46:30.398660Z","bundle_sha256":"c22f2111b1d8876bc6f0da991f99cd76bcf758cc120c1112c33b4314851f347e"}}