{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:IZ47D2MXJLTS745ZTETUNR4IBJ","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"ba1569a61c2aae7f81a0403b3280ef9f96d279ef5370fcf7cb95e66eedfd63ce","cross_cats_sorted":["cs.AI","cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2018-06-07T03:49:05Z","title_canon_sha256":"6379560c9eed767c948dff2bd1c1a5ade211bb7dc2030a4bf8b19a08edc0195c"},"schema_version":"1.0","source":{"id":"1806.02501","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1806.02501","created_at":"2026-05-18T00:13:56Z"},{"alias_kind":"arxiv_version","alias_value":"1806.02501v1","created_at":"2026-05-18T00:13:56Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1806.02501","created_at":"2026-05-18T00:13:56Z"},{"alias_kind":"pith_short_12","alias_value":"IZ47D2MXJLTS","created_at":"2026-05-18T12:32:31Z"},{"alias_kind":"pith_short_16","alias_value":"IZ47D2MXJLTS745Z","created_at":"2026-05-18T12:32:31Z"},{"alias_kind":"pith_short_8","alias_value":"IZ47D2MX","created_at":"2026-05-18T12:32:31Z"}],"graph_snapshots":[{"event_id":"sha256:f7c594844c3c4799593ccb6ffcffb64d1557c59663de2cf9222f3d1d672aa8be","target":"graph","created_at":"2026-05-18T00:13:56Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Designing a good reward function is essential to robot planning and reinforcement learning, but it can also be challenging and frustrating. The reward needs to work across multiple different environments, and that often requires many iterations of tuning. We introduce a novel divide-and-conquer approach that enables the designer to specify a reward separately for each environment. By treating these separate reward functions as observations about the underlying true reward, we derive an approach to infer a common reward across all environments. We conduct user studies in an abstract grid world ","authors_text":"Anca D. Dragan, Dylan Hadfield-Menell, Ellis Ratner","cross_cats":["cs.AI","cs.LG"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2018-06-07T03:49:05Z","title":"Simplifying Reward Design through Divide-and-Conquer"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1806.02501","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:ac6276f1a0d7198c20e9b4f26eb8932356aade2eeac9929d473b29b1907ec60d","target":"record","created_at":"2026-05-18T00:13:56Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"ba1569a61c2aae7f81a0403b3280ef9f96d279ef5370fcf7cb95e66eedfd63ce","cross_cats_sorted":["cs.AI","cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2018-06-07T03:49:05Z","title_canon_sha256":"6379560c9eed767c948dff2bd1c1a5ade211bb7dc2030a4bf8b19a08edc0195c"},"schema_version":"1.0","source":{"id":"1806.02501","kind":"arxiv","version":1}},"canonical_sha256":"4679f1e9974ae72ff3b9992746c7880a4205096596fb34d601f68dcb52c015c4","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"4679f1e9974ae72ff3b9992746c7880a4205096596fb34d601f68dcb52c015c4","first_computed_at":"2026-05-18T00:13:56.608704Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:13:56.608704Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"f0MiY00nYLAbIECINHEMEdgASLiQI4mLvns1cRJRK9fdEbR7i9ysimwUF019OUTVujqHCq5bcOjubUel/3eSAg==","signature_status":"signed_v1","signed_at":"2026-05-18T00:13:56.609435Z","signed_message":"canonical_sha256_bytes"},"source_id":"1806.02501","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:ac6276f1a0d7198c20e9b4f26eb8932356aade2eeac9929d473b29b1907ec60d","sha256:f7c594844c3c4799593ccb6ffcffb64d1557c59663de2cf9222f3d1d672aa8be"],"state_sha256":"488898625928a0d4b0cded23f6998c8fdb74be65bb8c024ea173bb25ac4c786a"}