{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:ZX3SQZTGVZA35RVLV7TRR7ID7N","short_pith_number":"pith:ZX3SQZTG","canonical_record":{"source":{"id":"1704.02882","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-04-10T14:38:37Z","cross_cats_sorted":["cs.LG","cs.MA","stat.ML"],"title_canon_sha256":"5fa26d70e0a965a6926eead9d3b77b7f1717b536700210074761abb8f7794061","abstract_canon_sha256":"a940887c5297b4158aaf013f45de33459739bd0f030e6baca1fe6c180edd7225"},"schema_version":"1.0"},"canonical_sha256":"cdf7286666ae41bec6abafe718fd03fb6b9d1b74a3e8b04c414e9afee78f2f51","source":{"kind":"arxiv","id":"1704.02882","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1704.02882","created_at":"2026-05-18T00:44:08Z"},{"alias_kind":"arxiv_version","alias_value":"1704.02882v2","created_at":"2026-05-18T00:44:08Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1704.02882","created_at":"2026-05-18T00:44:08Z"},{"alias_kind":"pith_short_12","alias_value":"ZX3SQZTGVZA3","created_at":"2026-05-18T12:31:59Z"},{"alias_kind":"pith_short_16","alias_value":"ZX3SQZTGVZA35RVL","created_at":"2026-05-18T12:31:59Z"},{"alias_kind":"pith_short_8","alias_value":"ZX3SQZTG","created_at":"2026-05-18T12:31:59Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:ZX3SQZTGVZA35RVLV7TRR7ID7N","target":"record","payload":{"canonical_record":{"source":{"id":"1704.02882","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-04-10T14:38:37Z","cross_cats_sorted":["cs.LG","cs.MA","stat.ML"],"title_canon_sha256":"5fa26d70e0a965a6926eead9d3b77b7f1717b536700210074761abb8f7794061","abstract_canon_sha256":"a940887c5297b4158aaf013f45de33459739bd0f030e6baca1fe6c180edd7225"},"schema_version":"1.0"},"canonical_sha256":"cdf7286666ae41bec6abafe718fd03fb6b9d1b74a3e8b04c414e9afee78f2f51","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:44:08.669830Z","signature_b64":"Ov5FP4fA3gtL5Wp9C5b6sxSbFOwkF2YFFLXnmDiBkKoF+PEbq0zqCv9ujTlyT/O3J/NemwHx6maiOPca+NoLDw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"cdf7286666ae41bec6abafe718fd03fb6b9d1b74a3e8b04c414e9afee78f2f51","last_reissued_at":"2026-05-18T00:44:08.669371Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:44:08.669371Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1704.02882","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:44:08Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"JtQMxEBukCcsGTzGSzETXAYZCOXMJWY5HvBGlf7Som7n9qRUUfpVgovZpFoPFNkU9jRSKxLJwpXuOmzY9Wy9Dw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-04T09:15:11.961928Z"},"content_sha256":"7b96a0769779285597769ea5a9d641426548e3b93fb5d770a2a0581203f265e8","schema_version":"1.0","event_id":"sha256:7b96a0769779285597769ea5a9d641426548e3b93fb5d770a2a0581203f265e8"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:ZX3SQZTGVZA35RVLV7TRR7ID7N","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Dynamic Safe Interruptibility for Decentralized Multi-Agent Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG","cs.MA","stat.ML"],"primary_cat":"cs.AI","authors_text":"Alexandre Maurer, El Mahdi El Mhamdi, Hadrien Hendrikx, Rachid Guerraoui","submitted_at":"2017-04-10T14:38:37Z","abstract_excerpt":"In reinforcement learning, agents learn by performing actions and observing their outcomes. Sometimes, it is desirable for a human operator to \\textit{interrupt} an agent in order to prevent dangerous situations from happening. Yet, as part of their learning process, agents may link these interruptions, that impact their reward, to specific states and deliberately avoid them. The situation is particularly challenging in a multi-agent context because agents might not only learn from their own past interruptions, but also from those of other agents. Orseau and Armstrong defined \\emph{safe interr"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1704.02882","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:44:08Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"DxQUbq8qiqyMpAoUcODvCHHadwpNAH54UHphhJYZSULCvMqdXLzyOKFKe6+4lahsf4iZCqeS5sUGpY3YNRUbAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-04T09:15:11.962269Z"},"content_sha256":"db024556f24f1d209aa8e6dfc2c92fe4e7990c3a396bd483793c00782e9e2e9f","schema_version":"1.0","event_id":"sha256:db024556f24f1d209aa8e6dfc2c92fe4e7990c3a396bd483793c00782e9e2e9f"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/ZX3SQZTGVZA35RVLV7TRR7ID7N/bundle.json","state_url":"https://pith.science/pith/ZX3SQZTGVZA35RVLV7TRR7ID7N/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/ZX3SQZTGVZA35RVLV7TRR7ID7N/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-04T09:15:11Z","links":{"resolver":"https://pith.science/pith/ZX3SQZTGVZA35RVLV7TRR7ID7N","bundle":"https://pith.science/pith/ZX3SQZTGVZA35RVLV7TRR7ID7N/bundle.json","state":"https://pith.science/pith/ZX3SQZTGVZA35RVLV7TRR7ID7N/state.json","well_known_bundle":"https://pith.science/.well-known/pith/ZX3SQZTGVZA35RVLV7TRR7ID7N/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:ZX3SQZTGVZA35RVLV7TRR7ID7N","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"a940887c5297b4158aaf013f45de33459739bd0f030e6baca1fe6c180edd7225","cross_cats_sorted":["cs.LG","cs.MA","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-04-10T14:38:37Z","title_canon_sha256":"5fa26d70e0a965a6926eead9d3b77b7f1717b536700210074761abb8f7794061"},"schema_version":"1.0","source":{"id":"1704.02882","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1704.02882","created_at":"2026-05-18T00:44:08Z"},{"alias_kind":"arxiv_version","alias_value":"1704.02882v2","created_at":"2026-05-18T00:44:08Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1704.02882","created_at":"2026-05-18T00:44:08Z"},{"alias_kind":"pith_short_12","alias_value":"ZX3SQZTGVZA3","created_at":"2026-05-18T12:31:59Z"},{"alias_kind":"pith_short_16","alias_value":"ZX3SQZTGVZA35RVL","created_at":"2026-05-18T12:31:59Z"},{"alias_kind":"pith_short_8","alias_value":"ZX3SQZTG","created_at":"2026-05-18T12:31:59Z"}],"graph_snapshots":[{"event_id":"sha256:db024556f24f1d209aa8e6dfc2c92fe4e7990c3a396bd483793c00782e9e2e9f","target":"graph","created_at":"2026-05-18T00:44:08Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"In reinforcement learning, agents learn by performing actions and observing their outcomes. Sometimes, it is desirable for a human operator to \\textit{interrupt} an agent in order to prevent dangerous situations from happening. Yet, as part of their learning process, agents may link these interruptions, that impact their reward, to specific states and deliberately avoid them. The situation is particularly challenging in a multi-agent context because agents might not only learn from their own past interruptions, but also from those of other agents. Orseau and Armstrong defined \\emph{safe interr","authors_text":"Alexandre Maurer, El Mahdi El Mhamdi, Hadrien Hendrikx, Rachid Guerraoui","cross_cats":["cs.LG","cs.MA","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-04-10T14:38:37Z","title":"Dynamic Safe Interruptibility for Decentralized Multi-Agent Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1704.02882","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:7b96a0769779285597769ea5a9d641426548e3b93fb5d770a2a0581203f265e8","target":"record","created_at":"2026-05-18T00:44:08Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"a940887c5297b4158aaf013f45de33459739bd0f030e6baca1fe6c180edd7225","cross_cats_sorted":["cs.LG","cs.MA","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-04-10T14:38:37Z","title_canon_sha256":"5fa26d70e0a965a6926eead9d3b77b7f1717b536700210074761abb8f7794061"},"schema_version":"1.0","source":{"id":"1704.02882","kind":"arxiv","version":2}},"canonical_sha256":"cdf7286666ae41bec6abafe718fd03fb6b9d1b74a3e8b04c414e9afee78f2f51","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"cdf7286666ae41bec6abafe718fd03fb6b9d1b74a3e8b04c414e9afee78f2f51","first_computed_at":"2026-05-18T00:44:08.669371Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:44:08.669371Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"Ov5FP4fA3gtL5Wp9C5b6sxSbFOwkF2YFFLXnmDiBkKoF+PEbq0zqCv9ujTlyT/O3J/NemwHx6maiOPca+NoLDw==","signature_status":"signed_v1","signed_at":"2026-05-18T00:44:08.669830Z","signed_message":"canonical_sha256_bytes"},"source_id":"1704.02882","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:7b96a0769779285597769ea5a9d641426548e3b93fb5d770a2a0581203f265e8","sha256:db024556f24f1d209aa8e6dfc2c92fe4e7990c3a396bd483793c00782e9e2e9f"],"state_sha256":"a1fceab8b09502faf2de658d19bb9c4090d121a37ee6144998e3ff419585de84"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"T6KfeWyhwmjwdwLcij7CfcDu2Tp8o9lVIuDxBywEDXRzFj8EwlFvQM7WYXNrt/jgEinG6diZ4So405SgcY2ZBw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-04T09:15:11.964344Z","bundle_sha256":"afb9c8446f8430f56f22d55008ecc7b71de35ec8247689e788d8fdaee4f3ed6b"}}