{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:RFUGBEQ7OZGLEXLGJF463MKZXV","short_pith_number":"pith:RFUGBEQ7","canonical_record":{"source":{"id":"1809.06364","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-09-17T17:59:13Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"e8430dc219f0831c8b519749c2d973af1c5c963132235edf34183100864f401f","abstract_canon_sha256":"6eb46261c5a75ab61417cf7088fa89fae70f4503c55599eec39cbfe306c9d0c3"},"schema_version":"1.0"},"canonical_sha256":"896860921f764cb25d664979edb159bd68d6188f99245ec577809b45b00d0f6c","source":{"kind":"arxiv","id":"1809.06364","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1809.06364","created_at":"2026-05-18T00:05:34Z"},{"alias_kind":"arxiv_version","alias_value":"1809.06364v1","created_at":"2026-05-18T00:05:34Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1809.06364","created_at":"2026-05-18T00:05:34Z"},{"alias_kind":"pith_short_12","alias_value":"RFUGBEQ7OZGL","created_at":"2026-05-18T12:32:50Z"},{"alias_kind":"pith_short_16","alias_value":"RFUGBEQ7OZGLEXLG","created_at":"2026-05-18T12:32:50Z"},{"alias_kind":"pith_short_8","alias_value":"RFUGBEQ7","created_at":"2026-05-18T12:32:50Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:RFUGBEQ7OZGLEXLGJF463MKZXV","target":"record","payload":{"canonical_record":{"source":{"id":"1809.06364","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-09-17T17:59:13Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"e8430dc219f0831c8b519749c2d973af1c5c963132235edf34183100864f401f","abstract_canon_sha256":"6eb46261c5a75ab61417cf7088fa89fae70f4503c55599eec39cbfe306c9d0c3"},"schema_version":"1.0"},"canonical_sha256":"896860921f764cb25d664979edb159bd68d6188f99245ec577809b45b00d0f6c","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:05:34.913776Z","signature_b64":"16Qxt30QP1RNs9177POqDr4uMSdi5akEFXaC78ulTYTelAuTg5QlY5m+iGHQn+kwivrqQMtESHumZWYSoULmAA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"896860921f764cb25d664979edb159bd68d6188f99245ec577809b45b00d0f6c","last_reissued_at":"2026-05-18T00:05:34.913100Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:05:34.913100Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1809.06364","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:05:34Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"AyIWUnGGIz+VxTPChVR69sD+9EqOPisYLa0g8k/Z9KtlkrjA7T+B6Aa0JhPFsWQ6EuD2u1VdAh25DVN9yPmxAQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-01T05:03:34.541525Z"},"content_sha256":"9b721f4cccefd4f7b8a95d5d58cd93c26d07c613d31aa2f18b925db6d0fad0ed","schema_version":"1.0","event_id":"sha256:9b721f4cccefd4f7b8a95d5d58cd93c26d07c613d31aa2f18b925db6d0fad0ed"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:RFUGBEQ7OZGLEXLGJF463MKZXV","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Generalizing Across Multi-Objective Reward Functions in Deep Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Eli Friedman, Fred Fontaine","submitted_at":"2018-09-17T17:59:13Z","abstract_excerpt":"Many reinforcement-learning researchers treat the reward function as a part of the environment, meaning that the agent can only know the reward of a state if it encounters that state in a trial run. However, we argue that this is an unnecessary limitation and instead, the reward function should be provided to the learning algorithm. The advantage is that the algorithm can then use the reward function to check the reward for states that the agent hasn't even encountered yet. In addition, the algorithm can simultaneously learn policies for multiple reward functions. For each state, the algorithm"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1809.06364","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:05:34Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"0uPH65lhYhAi8lJLgGM1cAM1RdyrXLtg3L37kTrFOu2TLKkDxdjR8JRAnjVwh0TtjgIOBstmPKUwe4pNMlx3Dw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-01T05:03:34.541872Z"},"content_sha256":"442012220d2d0cff96bd1a80cba040663b16b53a9e78db1b24223167ca57aefb","schema_version":"1.0","event_id":"sha256:442012220d2d0cff96bd1a80cba040663b16b53a9e78db1b24223167ca57aefb"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/RFUGBEQ7OZGLEXLGJF463MKZXV/bundle.json","state_url":"https://pith.science/pith/RFUGBEQ7OZGLEXLGJF463MKZXV/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/RFUGBEQ7OZGLEXLGJF463MKZXV/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-01T05:03:34Z","links":{"resolver":"https://pith.science/pith/RFUGBEQ7OZGLEXLGJF463MKZXV","bundle":"https://pith.science/pith/RFUGBEQ7OZGLEXLGJF463MKZXV/bundle.json","state":"https://pith.science/pith/RFUGBEQ7OZGLEXLGJF463MKZXV/state.json","well_known_bundle":"https://pith.science/.well-known/pith/RFUGBEQ7OZGLEXLGJF463MKZXV/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:RFUGBEQ7OZGLEXLGJF463MKZXV","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"6eb46261c5a75ab61417cf7088fa89fae70f4503c55599eec39cbfe306c9d0c3","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-09-17T17:59:13Z","title_canon_sha256":"e8430dc219f0831c8b519749c2d973af1c5c963132235edf34183100864f401f"},"schema_version":"1.0","source":{"id":"1809.06364","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1809.06364","created_at":"2026-05-18T00:05:34Z"},{"alias_kind":"arxiv_version","alias_value":"1809.06364v1","created_at":"2026-05-18T00:05:34Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1809.06364","created_at":"2026-05-18T00:05:34Z"},{"alias_kind":"pith_short_12","alias_value":"RFUGBEQ7OZGL","created_at":"2026-05-18T12:32:50Z"},{"alias_kind":"pith_short_16","alias_value":"RFUGBEQ7OZGLEXLG","created_at":"2026-05-18T12:32:50Z"},{"alias_kind":"pith_short_8","alias_value":"RFUGBEQ7","created_at":"2026-05-18T12:32:50Z"}],"graph_snapshots":[{"event_id":"sha256:442012220d2d0cff96bd1a80cba040663b16b53a9e78db1b24223167ca57aefb","target":"graph","created_at":"2026-05-18T00:05:34Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Many reinforcement-learning researchers treat the reward function as a part of the environment, meaning that the agent can only know the reward of a state if it encounters that state in a trial run. However, we argue that this is an unnecessary limitation and instead, the reward function should be provided to the learning algorithm. The advantage is that the algorithm can then use the reward function to check the reward for states that the agent hasn't even encountered yet. In addition, the algorithm can simultaneously learn policies for multiple reward functions. For each state, the algorithm","authors_text":"Eli Friedman, Fred Fontaine","cross_cats":["stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-09-17T17:59:13Z","title":"Generalizing Across Multi-Objective Reward Functions in Deep Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1809.06364","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:9b721f4cccefd4f7b8a95d5d58cd93c26d07c613d31aa2f18b925db6d0fad0ed","target":"record","created_at":"2026-05-18T00:05:34Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"6eb46261c5a75ab61417cf7088fa89fae70f4503c55599eec39cbfe306c9d0c3","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-09-17T17:59:13Z","title_canon_sha256":"e8430dc219f0831c8b519749c2d973af1c5c963132235edf34183100864f401f"},"schema_version":"1.0","source":{"id":"1809.06364","kind":"arxiv","version":1}},"canonical_sha256":"896860921f764cb25d664979edb159bd68d6188f99245ec577809b45b00d0f6c","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"896860921f764cb25d664979edb159bd68d6188f99245ec577809b45b00d0f6c","first_computed_at":"2026-05-18T00:05:34.913100Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:05:34.913100Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"16Qxt30QP1RNs9177POqDr4uMSdi5akEFXaC78ulTYTelAuTg5QlY5m+iGHQn+kwivrqQMtESHumZWYSoULmAA==","signature_status":"signed_v1","signed_at":"2026-05-18T00:05:34.913776Z","signed_message":"canonical_sha256_bytes"},"source_id":"1809.06364","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:9b721f4cccefd4f7b8a95d5d58cd93c26d07c613d31aa2f18b925db6d0fad0ed","sha256:442012220d2d0cff96bd1a80cba040663b16b53a9e78db1b24223167ca57aefb"],"state_sha256":"3fc816bec19e0245037a3c3a9614e4fa755a017471e63010a6d9fbe2dd1bfb81"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"y+9mQi9v3CCoKbGv+kviCW8HkKYN5LlglfE3iw9YaMf2x5EutEsVgIerd5DGKUh9fF2i+09AAA6mIU1nUCKGDw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-01T05:03:34.543853Z","bundle_sha256":"6c72ac0280532b352ef0c5ada0094fe84f68f3c56e317f9438f5f3f8d2d6bd85"}}