{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:37P2JVELGM6JGSCV5E6DH2CUMN","short_pith_number":"pith:37P2JVEL","canonical_record":{"source":{"id":"1809.08343","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-09-21T23:38:17Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"e6226f33f0689d705f7f9082044571e4fffd71cedfb275bbbd51d14d1cfeea16","abstract_canon_sha256":"8c48823bb70283f817c739ddd49bdc68a457d019200319f0e09993f29e34e0cb"},"schema_version":"1.0"},"canonical_sha256":"dfdfa4d48b333c934855e93c33e85463451a789d980c641f91bb1fb3855c916c","source":{"kind":"arxiv","id":"1809.08343","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1809.08343","created_at":"2026-05-18T00:05:08Z"},{"alias_kind":"arxiv_version","alias_value":"1809.08343v1","created_at":"2026-05-18T00:05:08Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1809.08343","created_at":"2026-05-18T00:05:08Z"},{"alias_kind":"pith_short_12","alias_value":"37P2JVELGM6J","created_at":"2026-05-18T12:32:02Z"},{"alias_kind":"pith_short_16","alias_value":"37P2JVELGM6JGSCV","created_at":"2026-05-18T12:32:02Z"},{"alias_kind":"pith_short_8","alias_value":"37P2JVEL","created_at":"2026-05-18T12:32:02Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:37P2JVELGM6JGSCV5E6DH2CUMN","target":"record","payload":{"canonical_record":{"source":{"id":"1809.08343","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-09-21T23:38:17Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"e6226f33f0689d705f7f9082044571e4fffd71cedfb275bbbd51d14d1cfeea16","abstract_canon_sha256":"8c48823bb70283f817c739ddd49bdc68a457d019200319f0e09993f29e34e0cb"},"schema_version":"1.0"},"canonical_sha256":"dfdfa4d48b333c934855e93c33e85463451a789d980c641f91bb1fb3855c916c","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:05:08.743881Z","signature_b64":"DvqDkzVoq8LkOHb8dFRj/39n3u9CI5wy8r6IlqXU5C3Ci9kMIYmgcLCT2rt6yZU+2uY9hddVicRcb/B11EsgAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"dfdfa4d48b333c934855e93c33e85463451a789d980c641f91bb1fb3855c916c","last_reissued_at":"2026-05-18T00:05:08.743291Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:05:08.743291Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1809.08343","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:05:08Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"vG6UT/HV28l08+oYuFh+faLbScwRtY+hNsuvpmZ6jMWnNAkikZLUzHUwGbKMUXWWuk/o1Ye1xwj8po/kVlLjAw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T20:05:36.037972Z"},"content_sha256":"85ca9666782fc014c3764a0d7f4a890750fe33ef06b37a181219fdf5437d0175","schema_version":"1.0","event_id":"sha256:85ca9666782fc014c3764a0d7f4a890750fe33ef06b37a181219fdf5437d0175"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:37P2JVELGM6JGSCV5E6DH2CUMN","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Interpretable Multi-Objective Reinforcement Learning through Policy Orchestration","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Djallel Bouneffouf, Francesca Rossi, Kush Varshney, Moninder Singh, Murray Campbell, Nicholas Mattei, Piyush Madan, Rachita Chandra, Ritesh Noothigattu","submitted_at":"2018-09-21T23:38:17Z","abstract_excerpt":"Autonomous cyber-physical agents and systems play an increasingly large role in our lives. To ensure that agents behave in ways aligned with the values of the societies in which they operate, we must develop techniques that allow these agents to not only maximize their reward in an environment, but also to learn and follow the implicit constraints of society. These constraints and norms can come from any number of sources including regulations, business process guidelines, laws, ethical principles, social norms, and moral values. We detail a novel approach that uses inverse reinforcement learn"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1809.08343","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:05:08Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"RBCqDbvhTXVKnmHr3Nfhs20ztbiu+BEXGgwaOBt/7OkK/xjP4IrjEcZmZtvHUUl0Opr8H8tC6GI0LLHigLSLBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T20:05:36.038849Z"},"content_sha256":"74a6d9c1bbc4ff526a0f2dfd19b10b8e814eca0f7a9a6a0e3d46210799214625","schema_version":"1.0","event_id":"sha256:74a6d9c1bbc4ff526a0f2dfd19b10b8e814eca0f7a9a6a0e3d46210799214625"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/37P2JVELGM6JGSCV5E6DH2CUMN/bundle.json","state_url":"https://pith.science/pith/37P2JVELGM6JGSCV5E6DH2CUMN/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/37P2JVELGM6JGSCV5E6DH2CUMN/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-30T20:05:36Z","links":{"resolver":"https://pith.science/pith/37P2JVELGM6JGSCV5E6DH2CUMN","bundle":"https://pith.science/pith/37P2JVELGM6JGSCV5E6DH2CUMN/bundle.json","state":"https://pith.science/pith/37P2JVELGM6JGSCV5E6DH2CUMN/state.json","well_known_bundle":"https://pith.science/.well-known/pith/37P2JVELGM6JGSCV5E6DH2CUMN/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:37P2JVELGM6JGSCV5E6DH2CUMN","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"8c48823bb70283f817c739ddd49bdc68a457d019200319f0e09993f29e34e0cb","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-09-21T23:38:17Z","title_canon_sha256":"e6226f33f0689d705f7f9082044571e4fffd71cedfb275bbbd51d14d1cfeea16"},"schema_version":"1.0","source":{"id":"1809.08343","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1809.08343","created_at":"2026-05-18T00:05:08Z"},{"alias_kind":"arxiv_version","alias_value":"1809.08343v1","created_at":"2026-05-18T00:05:08Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1809.08343","created_at":"2026-05-18T00:05:08Z"},{"alias_kind":"pith_short_12","alias_value":"37P2JVELGM6J","created_at":"2026-05-18T12:32:02Z"},{"alias_kind":"pith_short_16","alias_value":"37P2JVELGM6JGSCV","created_at":"2026-05-18T12:32:02Z"},{"alias_kind":"pith_short_8","alias_value":"37P2JVEL","created_at":"2026-05-18T12:32:02Z"}],"graph_snapshots":[{"event_id":"sha256:74a6d9c1bbc4ff526a0f2dfd19b10b8e814eca0f7a9a6a0e3d46210799214625","target":"graph","created_at":"2026-05-18T00:05:08Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Autonomous cyber-physical agents and systems play an increasingly large role in our lives. To ensure that agents behave in ways aligned with the values of the societies in which they operate, we must develop techniques that allow these agents to not only maximize their reward in an environment, but also to learn and follow the implicit constraints of society. These constraints and norms can come from any number of sources including regulations, business process guidelines, laws, ethical principles, social norms, and moral values. We detail a novel approach that uses inverse reinforcement learn","authors_text":"Djallel Bouneffouf, Francesca Rossi, Kush Varshney, Moninder Singh, Murray Campbell, Nicholas Mattei, Piyush Madan, Rachita Chandra, Ritesh Noothigattu","cross_cats":["cs.AI","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-09-21T23:38:17Z","title":"Interpretable Multi-Objective Reinforcement Learning through Policy Orchestration"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1809.08343","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:85ca9666782fc014c3764a0d7f4a890750fe33ef06b37a181219fdf5437d0175","target":"record","created_at":"2026-05-18T00:05:08Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"8c48823bb70283f817c739ddd49bdc68a457d019200319f0e09993f29e34e0cb","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-09-21T23:38:17Z","title_canon_sha256":"e6226f33f0689d705f7f9082044571e4fffd71cedfb275bbbd51d14d1cfeea16"},"schema_version":"1.0","source":{"id":"1809.08343","kind":"arxiv","version":1}},"canonical_sha256":"dfdfa4d48b333c934855e93c33e85463451a789d980c641f91bb1fb3855c916c","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"dfdfa4d48b333c934855e93c33e85463451a789d980c641f91bb1fb3855c916c","first_computed_at":"2026-05-18T00:05:08.743291Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:05:08.743291Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"DvqDkzVoq8LkOHb8dFRj/39n3u9CI5wy8r6IlqXU5C3Ci9kMIYmgcLCT2rt6yZU+2uY9hddVicRcb/B11EsgAg==","signature_status":"signed_v1","signed_at":"2026-05-18T00:05:08.743881Z","signed_message":"canonical_sha256_bytes"},"source_id":"1809.08343","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:85ca9666782fc014c3764a0d7f4a890750fe33ef06b37a181219fdf5437d0175","sha256:74a6d9c1bbc4ff526a0f2dfd19b10b8e814eca0f7a9a6a0e3d46210799214625"],"state_sha256":"c16cb425305a54ab9d7058cbbe7f51f832f02738668ab7daedd0badf8233520f"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"gp7tQu4XvNgOsPsEqn0vNx+Oaoli+RMnu0WJ27PYke7WDFejSgmGnsviGAvcNzbWPkWDdmNbdU8P8HWDr0kVAw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-30T20:05:36.043258Z","bundle_sha256":"e7eb8464d7af9a6afc8f7eac43af1a3fcbeeb9af5b419417a04ba2ef90975622"}}