{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:KYLFNJ3IOCAEM5RFCSLPM4FKQB","short_pith_number":"pith:KYLFNJ3I","canonical_record":{"source":{"id":"1904.02206","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-04-03T19:14:15Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"48cf88b49b835a1b3358aeeeb1873c2955a0c804f84bbe4fbd84eedc675ef2d6","abstract_canon_sha256":"68c7eba5b1289ad6a6b8e351f223089b42841e6a240fac4fbf4f309497781334"},"schema_version":"1.0"},"canonical_sha256":"561656a76870804676251496f670aa806b20515f3f09b9c9a6f9e068e0797e77","source":{"kind":"arxiv","id":"1904.02206","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1904.02206","created_at":"2026-05-17T23:49:24Z"},{"alias_kind":"arxiv_version","alias_value":"1904.02206v1","created_at":"2026-05-17T23:49:24Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1904.02206","created_at":"2026-05-17T23:49:24Z"},{"alias_kind":"pith_short_12","alias_value":"KYLFNJ3IOCAE","created_at":"2026-05-18T12:33:21Z"},{"alias_kind":"pith_short_16","alias_value":"KYLFNJ3IOCAEM5RF","created_at":"2026-05-18T12:33:21Z"},{"alias_kind":"pith_short_8","alias_value":"KYLFNJ3I","created_at":"2026-05-18T12:33:21Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:KYLFNJ3IOCAEM5RFCSLPM4FKQB","target":"record","payload":{"canonical_record":{"source":{"id":"1904.02206","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-04-03T19:14:15Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"48cf88b49b835a1b3358aeeeb1873c2955a0c804f84bbe4fbd84eedc675ef2d6","abstract_canon_sha256":"68c7eba5b1289ad6a6b8e351f223089b42841e6a240fac4fbf4f309497781334"},"schema_version":"1.0"},"canonical_sha256":"561656a76870804676251496f670aa806b20515f3f09b9c9a6f9e068e0797e77","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:49:24.217352Z","signature_b64":"jz6GB4R1Z+QXNGCn0az7hAvfLotFpm1lq+UZlUg8TCyW4c4PaniguX+zqS7iE5sKZ92rv6gCCe+GeWC/IdipCw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"561656a76870804676251496f670aa806b20515f3f09b9c9a6f9e068e0797e77","last_reissued_at":"2026-05-17T23:49:24.216974Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:49:24.216974Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1904.02206","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:49:24Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"hsWSBtAEG0iAe//QY0+U+VC4/jQSrAnE8QaTjkGio+Qqx3SYo32bBoycIZjH4hMwP92P2JqZgrla910Tk2N6Ag==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T07:14:45.681306Z"},"content_sha256":"e2159fdff1efe91690538ee6d84dd4e7e55d71ff2b498afa17ca63f74ebeadc8","schema_version":"1.0","event_id":"sha256:e2159fdff1efe91690538ee6d84dd4e7e55d71ff2b498afa17ca63f74ebeadc8"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:KYLFNJ3IOCAEM5RFCSLPM4FKQB","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Jointly Pre-training with Supervised, Autoencoder, and Value Losses for Deep Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Gabriel V. de la Cruz Jr., Matthew E. Taylor, Yunshu Du","submitted_at":"2019-04-03T19:14:15Z","abstract_excerpt":"Deep Reinforcement Learning (DRL) algorithms are known to be data inefficient. One reason is that a DRL agent learns both the feature and the policy tabula rasa. Integrating prior knowledge into DRL algorithms is one way to improve learning efficiency since it helps to build helpful representations. In this work, we consider incorporating human knowledge to accelerate the asynchronous advantage actor-critic (A3C) algorithm by pre-training a small amount of non-expert human demonstrations. We leverage the supervised autoencoder framework and propose a novel pre-training strategy that jointly tr"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1904.02206","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:49:24Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"m/SGVG2Zvza77gqOetcvbU6muj8Oih89K5uuRapXNizDsa4S37SfcLrM5hpW5X0aU440VxJ5DaVp3rWQzkBLAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T07:14:45.681677Z"},"content_sha256":"0c1cafa1113eb21d88212434913dd3a30a9dbb5506acae44dd5f7a3aecc9ea9e","schema_version":"1.0","event_id":"sha256:0c1cafa1113eb21d88212434913dd3a30a9dbb5506acae44dd5f7a3aecc9ea9e"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/KYLFNJ3IOCAEM5RFCSLPM4FKQB/bundle.json","state_url":"https://pith.science/pith/KYLFNJ3IOCAEM5RFCSLPM4FKQB/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/KYLFNJ3IOCAEM5RFCSLPM4FKQB/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-30T07:14:45Z","links":{"resolver":"https://pith.science/pith/KYLFNJ3IOCAEM5RFCSLPM4FKQB","bundle":"https://pith.science/pith/KYLFNJ3IOCAEM5RFCSLPM4FKQB/bundle.json","state":"https://pith.science/pith/KYLFNJ3IOCAEM5RFCSLPM4FKQB/state.json","well_known_bundle":"https://pith.science/.well-known/pith/KYLFNJ3IOCAEM5RFCSLPM4FKQB/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:KYLFNJ3IOCAEM5RFCSLPM4FKQB","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"68c7eba5b1289ad6a6b8e351f223089b42841e6a240fac4fbf4f309497781334","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-04-03T19:14:15Z","title_canon_sha256":"48cf88b49b835a1b3358aeeeb1873c2955a0c804f84bbe4fbd84eedc675ef2d6"},"schema_version":"1.0","source":{"id":"1904.02206","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1904.02206","created_at":"2026-05-17T23:49:24Z"},{"alias_kind":"arxiv_version","alias_value":"1904.02206v1","created_at":"2026-05-17T23:49:24Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1904.02206","created_at":"2026-05-17T23:49:24Z"},{"alias_kind":"pith_short_12","alias_value":"KYLFNJ3IOCAE","created_at":"2026-05-18T12:33:21Z"},{"alias_kind":"pith_short_16","alias_value":"KYLFNJ3IOCAEM5RF","created_at":"2026-05-18T12:33:21Z"},{"alias_kind":"pith_short_8","alias_value":"KYLFNJ3I","created_at":"2026-05-18T12:33:21Z"}],"graph_snapshots":[{"event_id":"sha256:0c1cafa1113eb21d88212434913dd3a30a9dbb5506acae44dd5f7a3aecc9ea9e","target":"graph","created_at":"2026-05-17T23:49:24Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Deep Reinforcement Learning (DRL) algorithms are known to be data inefficient. One reason is that a DRL agent learns both the feature and the policy tabula rasa. Integrating prior knowledge into DRL algorithms is one way to improve learning efficiency since it helps to build helpful representations. In this work, we consider incorporating human knowledge to accelerate the asynchronous advantage actor-critic (A3C) algorithm by pre-training a small amount of non-expert human demonstrations. We leverage the supervised autoencoder framework and propose a novel pre-training strategy that jointly tr","authors_text":"Gabriel V. de la Cruz Jr., Matthew E. Taylor, Yunshu Du","cross_cats":["stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-04-03T19:14:15Z","title":"Jointly Pre-training with Supervised, Autoencoder, and Value Losses for Deep Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1904.02206","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:e2159fdff1efe91690538ee6d84dd4e7e55d71ff2b498afa17ca63f74ebeadc8","target":"record","created_at":"2026-05-17T23:49:24Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"68c7eba5b1289ad6a6b8e351f223089b42841e6a240fac4fbf4f309497781334","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-04-03T19:14:15Z","title_canon_sha256":"48cf88b49b835a1b3358aeeeb1873c2955a0c804f84bbe4fbd84eedc675ef2d6"},"schema_version":"1.0","source":{"id":"1904.02206","kind":"arxiv","version":1}},"canonical_sha256":"561656a76870804676251496f670aa806b20515f3f09b9c9a6f9e068e0797e77","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"561656a76870804676251496f670aa806b20515f3f09b9c9a6f9e068e0797e77","first_computed_at":"2026-05-17T23:49:24.216974Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:49:24.216974Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"jz6GB4R1Z+QXNGCn0az7hAvfLotFpm1lq+UZlUg8TCyW4c4PaniguX+zqS7iE5sKZ92rv6gCCe+GeWC/IdipCw==","signature_status":"signed_v1","signed_at":"2026-05-17T23:49:24.217352Z","signed_message":"canonical_sha256_bytes"},"source_id":"1904.02206","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:e2159fdff1efe91690538ee6d84dd4e7e55d71ff2b498afa17ca63f74ebeadc8","sha256:0c1cafa1113eb21d88212434913dd3a30a9dbb5506acae44dd5f7a3aecc9ea9e"],"state_sha256":"551844f3d452d592b6606d166cc46b782f62100c667188d22fc2b96072b331d9"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"apHmDhzUpYfh0ukuv81TTLxkOx/5fffj3R4rkapdPS3sWbuN+EGixNdtQDLV6W5l12Hfb7aCtk4WAzzsJRX/Dg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-30T07:14:45.683872Z","bundle_sha256":"0751ab14cf74a22120106b1f5d07ce0a94bf09c9349fa96ca2e4e96d908eee5a"}}