{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:R6U7LZPQAO62UAN2KC4UQEAAKN","short_pith_number":"pith:R6U7LZPQ","canonical_record":{"source":{"id":"1901.09330","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-01-27T06:38:24Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"f49fe3361bea2d625b9656d54055742cf0df2b6633530b72539f80e8598b9fe7","abstract_canon_sha256":"d7a66f7b54f22ceb7cdcc087056185d2efe60aa13dbe1add49a8e6b9d447ed09"},"schema_version":"1.0"},"canonical_sha256":"8fa9f5e5f003bdaa01ba50b9481000536ddec03f220a33d18086d03ef1e90c09","source":{"kind":"arxiv","id":"1901.09330","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1901.09330","created_at":"2026-05-17T23:55:25Z"},{"alias_kind":"arxiv_version","alias_value":"1901.09330v1","created_at":"2026-05-17T23:55:25Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1901.09330","created_at":"2026-05-17T23:55:25Z"},{"alias_kind":"pith_short_12","alias_value":"R6U7LZPQAO62","created_at":"2026-05-18T12:33:27Z"},{"alias_kind":"pith_short_16","alias_value":"R6U7LZPQAO62UAN2","created_at":"2026-05-18T12:33:27Z"},{"alias_kind":"pith_short_8","alias_value":"R6U7LZPQ","created_at":"2026-05-18T12:33:27Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:R6U7LZPQAO62UAN2KC4UQEAAKN","target":"record","payload":{"canonical_record":{"source":{"id":"1901.09330","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-01-27T06:38:24Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"f49fe3361bea2d625b9656d54055742cf0df2b6633530b72539f80e8598b9fe7","abstract_canon_sha256":"d7a66f7b54f22ceb7cdcc087056185d2efe60aa13dbe1add49a8e6b9d447ed09"},"schema_version":"1.0"},"canonical_sha256":"8fa9f5e5f003bdaa01ba50b9481000536ddec03f220a33d18086d03ef1e90c09","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:55:25.112825Z","signature_b64":"kZFxcgeCkAdnLXxu+Fw9vv2s1P0BYvAqaLH7sAU0hAHVyzDAE8VBoUD7gfcdxCVQzOGa0dvFmJI0bcqBb/A4CQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"8fa9f5e5f003bdaa01ba50b9481000536ddec03f220a33d18086d03ef1e90c09","last_reissued_at":"2026-05-17T23:55:25.112333Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:55:25.112333Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1901.09330","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:55:25Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"01hg1NCFzy1k91lVH+NZoN5aQFP/2z6C0pTYdwsNL5KqlnoDYkUWWvWZ96sCFQAwFnn41uy6CxjkBnWz9T+aCg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T22:14:12.618766Z"},"content_sha256":"680c8ed82d42e3e1363f35edf60492be4275c55c7f569548dfee0ff542e8be5d","schema_version":"1.0","event_id":"sha256:680c8ed82d42e3e1363f35edf60492be4275c55c7f569548dfee0ff542e8be5d"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:R6U7LZPQAO62UAN2KC4UQEAAKN","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Reward Shaping via Meta-Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Dong Yan, Hang Su, Haosheng Zou, Jun Zhu, Tongzheng Ren","submitted_at":"2019-01-27T06:38:24Z","abstract_excerpt":"Reward shaping is one of the most effective methods to tackle the crucial yet challenging problem of credit assignment in Reinforcement Learning (RL). However, designing shaping functions usually requires much expert knowledge and hand-engineering, and the difficulties are further exacerbated given multiple similar tasks to solve. In this paper, we consider reward shaping on a distribution of tasks, and propose a general meta-learning framework to automatically learn the efficient reward shaping on newly sampled tasks, assuming only shared state space but not necessarily action space. We first"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1901.09330","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:55:25Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"syDh4ilJos3oF8tL7wn73gm+I2veqeAeveFOnjqndOcN/FD+sdz0vSjyc1VbePVdVlpgS4xYVWBAIS3eOcDKBQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T22:14:12.619120Z"},"content_sha256":"777f248610d286e0158410c9020029048c13ab95bcc770df9c16f0063b5e93d6","schema_version":"1.0","event_id":"sha256:777f248610d286e0158410c9020029048c13ab95bcc770df9c16f0063b5e93d6"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/R6U7LZPQAO62UAN2KC4UQEAAKN/bundle.json","state_url":"https://pith.science/pith/R6U7LZPQAO62UAN2KC4UQEAAKN/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/R6U7LZPQAO62UAN2KC4UQEAAKN/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-30T22:14:12Z","links":{"resolver":"https://pith.science/pith/R6U7LZPQAO62UAN2KC4UQEAAKN","bundle":"https://pith.science/pith/R6U7LZPQAO62UAN2KC4UQEAAKN/bundle.json","state":"https://pith.science/pith/R6U7LZPQAO62UAN2KC4UQEAAKN/state.json","well_known_bundle":"https://pith.science/.well-known/pith/R6U7LZPQAO62UAN2KC4UQEAAKN/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:R6U7LZPQAO62UAN2KC4UQEAAKN","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"d7a66f7b54f22ceb7cdcc087056185d2efe60aa13dbe1add49a8e6b9d447ed09","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-01-27T06:38:24Z","title_canon_sha256":"f49fe3361bea2d625b9656d54055742cf0df2b6633530b72539f80e8598b9fe7"},"schema_version":"1.0","source":{"id":"1901.09330","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1901.09330","created_at":"2026-05-17T23:55:25Z"},{"alias_kind":"arxiv_version","alias_value":"1901.09330v1","created_at":"2026-05-17T23:55:25Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1901.09330","created_at":"2026-05-17T23:55:25Z"},{"alias_kind":"pith_short_12","alias_value":"R6U7LZPQAO62","created_at":"2026-05-18T12:33:27Z"},{"alias_kind":"pith_short_16","alias_value":"R6U7LZPQAO62UAN2","created_at":"2026-05-18T12:33:27Z"},{"alias_kind":"pith_short_8","alias_value":"R6U7LZPQ","created_at":"2026-05-18T12:33:27Z"}],"graph_snapshots":[{"event_id":"sha256:777f248610d286e0158410c9020029048c13ab95bcc770df9c16f0063b5e93d6","target":"graph","created_at":"2026-05-17T23:55:25Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Reward shaping is one of the most effective methods to tackle the crucial yet challenging problem of credit assignment in Reinforcement Learning (RL). However, designing shaping functions usually requires much expert knowledge and hand-engineering, and the difficulties are further exacerbated given multiple similar tasks to solve. In this paper, we consider reward shaping on a distribution of tasks, and propose a general meta-learning framework to automatically learn the efficient reward shaping on newly sampled tasks, assuming only shared state space but not necessarily action space. We first","authors_text":"Dong Yan, Hang Su, Haosheng Zou, Jun Zhu, Tongzheng Ren","cross_cats":["stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-01-27T06:38:24Z","title":"Reward Shaping via Meta-Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1901.09330","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:680c8ed82d42e3e1363f35edf60492be4275c55c7f569548dfee0ff542e8be5d","target":"record","created_at":"2026-05-17T23:55:25Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"d7a66f7b54f22ceb7cdcc087056185d2efe60aa13dbe1add49a8e6b9d447ed09","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-01-27T06:38:24Z","title_canon_sha256":"f49fe3361bea2d625b9656d54055742cf0df2b6633530b72539f80e8598b9fe7"},"schema_version":"1.0","source":{"id":"1901.09330","kind":"arxiv","version":1}},"canonical_sha256":"8fa9f5e5f003bdaa01ba50b9481000536ddec03f220a33d18086d03ef1e90c09","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"8fa9f5e5f003bdaa01ba50b9481000536ddec03f220a33d18086d03ef1e90c09","first_computed_at":"2026-05-17T23:55:25.112333Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:55:25.112333Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"kZFxcgeCkAdnLXxu+Fw9vv2s1P0BYvAqaLH7sAU0hAHVyzDAE8VBoUD7gfcdxCVQzOGa0dvFmJI0bcqBb/A4CQ==","signature_status":"signed_v1","signed_at":"2026-05-17T23:55:25.112825Z","signed_message":"canonical_sha256_bytes"},"source_id":"1901.09330","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:680c8ed82d42e3e1363f35edf60492be4275c55c7f569548dfee0ff542e8be5d","sha256:777f248610d286e0158410c9020029048c13ab95bcc770df9c16f0063b5e93d6"],"state_sha256":"95528650ae5bc7800f84661f22006a1635153735ec5c8d7152eb7fe6109dfbac"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"v5Br8xjMgtOQaLJ5zC17xg9BEB/qR3+Q6n7pk7xicQ9A4w9Spks5EUAwWxZsMIVnVEXidVl4XZ0pv1qD3Y5NDQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-30T22:14:12.621271Z","bundle_sha256":"5c7f94f5a29855f2dc5a2c3fa9ca60c3d14ae774f59420149a168c55fdf297b9"}}