{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2016:MTPKMC6BS4VLSEJ32W5EWE7DMG","short_pith_number":"pith:MTPKMC6B","canonical_record":{"source":{"id":"1601.06569","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2016-01-25T11:50:43Z","cross_cats_sorted":[],"title_canon_sha256":"38cd570c6c0e118601a213de442f42fb9f44801614e2ddcceba6e8016e9e5e92","abstract_canon_sha256":"a910a4d043c169551c57f5d0fa253f502b9f0b2ccfc9f8834c787999ac23986e"},"schema_version":"1.0"},"canonical_sha256":"64dea60bc1972ab9113bd5ba4b13e3619c6a62507fce558b0fdb47933709ac87","source":{"kind":"arxiv","id":"1601.06569","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1601.06569","created_at":"2026-05-18T01:22:04Z"},{"alias_kind":"arxiv_version","alias_value":"1601.06569v1","created_at":"2026-05-18T01:22:04Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1601.06569","created_at":"2026-05-18T01:22:04Z"},{"alias_kind":"pith_short_12","alias_value":"MTPKMC6BS4VL","created_at":"2026-05-18T12:30:32Z"},{"alias_kind":"pith_short_16","alias_value":"MTPKMC6BS4VLSEJ3","created_at":"2026-05-18T12:30:32Z"},{"alias_kind":"pith_short_8","alias_value":"MTPKMC6B","created_at":"2026-05-18T12:30:32Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2016:MTPKMC6BS4VLSEJ32W5EWE7DMG","target":"record","payload":{"canonical_record":{"source":{"id":"1601.06569","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2016-01-25T11:50:43Z","cross_cats_sorted":[],"title_canon_sha256":"38cd570c6c0e118601a213de442f42fb9f44801614e2ddcceba6e8016e9e5e92","abstract_canon_sha256":"a910a4d043c169551c57f5d0fa253f502b9f0b2ccfc9f8834c787999ac23986e"},"schema_version":"1.0"},"canonical_sha256":"64dea60bc1972ab9113bd5ba4b13e3619c6a62507fce558b0fdb47933709ac87","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:22:04.205582Z","signature_b64":"s2rSqoOzyQ399PeCAiGN5cGeQXcD25RWFh23M/QABzt5zJ3j9O7ab6dC+Mv0Y7VQfjhf//9xSNFNDavMxVdNDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"64dea60bc1972ab9113bd5ba4b13e3619c6a62507fce558b0fdb47933709ac87","last_reissued_at":"2026-05-18T01:22:04.205040Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:22:04.205040Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1601.06569","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:22:04Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Vg6Q1g8MfhiJu5aA34bnBkxk4DpYIzC5uOtKkuSbGAkc3f1fWV1uedJezdDHwywRfZf9qN8ok8H6ZBFyzj58Dw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-10T09:02:19.484683Z"},"content_sha256":"622f70047081e76446e92e125b927a4c4dc34c551858d6ce75a7fa3983016eb9","schema_version":"1.0","event_id":"sha256:622f70047081e76446e92e125b927a4c4dc34c551858d6ce75a7fa3983016eb9"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2016:MTPKMC6BS4VLSEJ32W5EWE7DMG","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Towards Resolving Unidentifiability in Inverse Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Kareem Amin, Satinder Singh","submitted_at":"2016-01-25T11:50:43Z","abstract_excerpt":"We consider a setting for Inverse Reinforcement Learning (IRL) where the learner is extended with the ability to actively select multiple environments, observing an agent's behavior on each environment. We first demonstrate that if the learner can experiment with any transition dynamics on some fixed set of states and actions, then there exists an algorithm that reconstructs the agent's reward function to the fullest extent theoretically possible, and that requires only a small (logarithmic) number of experiments. We contrast this result to what is known about IRL in single fixed environments,"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1601.06569","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:22:04Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"B9vlQKqSAaVgqoCUCyOQ7RbqirBF1UGIY+ZWh5MMirdfPk3NTvkqyDnei/gmx/SSzK8FxV/SK685d/N54WZBBQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-10T09:02:19.485362Z"},"content_sha256":"961f0d7281f9845bc615688b88516e7ceb5c5e730e8b73ea39e50ce4aacf9e81","schema_version":"1.0","event_id":"sha256:961f0d7281f9845bc615688b88516e7ceb5c5e730e8b73ea39e50ce4aacf9e81"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/MTPKMC6BS4VLSEJ32W5EWE7DMG/bundle.json","state_url":"https://pith.science/pith/MTPKMC6BS4VLSEJ32W5EWE7DMG/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/MTPKMC6BS4VLSEJ32W5EWE7DMG/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-10T09:02:19Z","links":{"resolver":"https://pith.science/pith/MTPKMC6BS4VLSEJ32W5EWE7DMG","bundle":"https://pith.science/pith/MTPKMC6BS4VLSEJ32W5EWE7DMG/bundle.json","state":"https://pith.science/pith/MTPKMC6BS4VLSEJ32W5EWE7DMG/state.json","well_known_bundle":"https://pith.science/.well-known/pith/MTPKMC6BS4VLSEJ32W5EWE7DMG/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2016:MTPKMC6BS4VLSEJ32W5EWE7DMG","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"a910a4d043c169551c57f5d0fa253f502b9f0b2ccfc9f8834c787999ac23986e","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2016-01-25T11:50:43Z","title_canon_sha256":"38cd570c6c0e118601a213de442f42fb9f44801614e2ddcceba6e8016e9e5e92"},"schema_version":"1.0","source":{"id":"1601.06569","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1601.06569","created_at":"2026-05-18T01:22:04Z"},{"alias_kind":"arxiv_version","alias_value":"1601.06569v1","created_at":"2026-05-18T01:22:04Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1601.06569","created_at":"2026-05-18T01:22:04Z"},{"alias_kind":"pith_short_12","alias_value":"MTPKMC6BS4VL","created_at":"2026-05-18T12:30:32Z"},{"alias_kind":"pith_short_16","alias_value":"MTPKMC6BS4VLSEJ3","created_at":"2026-05-18T12:30:32Z"},{"alias_kind":"pith_short_8","alias_value":"MTPKMC6B","created_at":"2026-05-18T12:30:32Z"}],"graph_snapshots":[{"event_id":"sha256:961f0d7281f9845bc615688b88516e7ceb5c5e730e8b73ea39e50ce4aacf9e81","target":"graph","created_at":"2026-05-18T01:22:04Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We consider a setting for Inverse Reinforcement Learning (IRL) where the learner is extended with the ability to actively select multiple environments, observing an agent's behavior on each environment. We first demonstrate that if the learner can experiment with any transition dynamics on some fixed set of states and actions, then there exists an algorithm that reconstructs the agent's reward function to the fullest extent theoretically possible, and that requires only a small (logarithmic) number of experiments. We contrast this result to what is known about IRL in single fixed environments,","authors_text":"Kareem Amin, Satinder Singh","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2016-01-25T11:50:43Z","title":"Towards Resolving Unidentifiability in Inverse Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1601.06569","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:622f70047081e76446e92e125b927a4c4dc34c551858d6ce75a7fa3983016eb9","target":"record","created_at":"2026-05-18T01:22:04Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"a910a4d043c169551c57f5d0fa253f502b9f0b2ccfc9f8834c787999ac23986e","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2016-01-25T11:50:43Z","title_canon_sha256":"38cd570c6c0e118601a213de442f42fb9f44801614e2ddcceba6e8016e9e5e92"},"schema_version":"1.0","source":{"id":"1601.06569","kind":"arxiv","version":1}},"canonical_sha256":"64dea60bc1972ab9113bd5ba4b13e3619c6a62507fce558b0fdb47933709ac87","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"64dea60bc1972ab9113bd5ba4b13e3619c6a62507fce558b0fdb47933709ac87","first_computed_at":"2026-05-18T01:22:04.205040Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T01:22:04.205040Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"s2rSqoOzyQ399PeCAiGN5cGeQXcD25RWFh23M/QABzt5zJ3j9O7ab6dC+Mv0Y7VQfjhf//9xSNFNDavMxVdNDg==","signature_status":"signed_v1","signed_at":"2026-05-18T01:22:04.205582Z","signed_message":"canonical_sha256_bytes"},"source_id":"1601.06569","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:622f70047081e76446e92e125b927a4c4dc34c551858d6ce75a7fa3983016eb9","sha256:961f0d7281f9845bc615688b88516e7ceb5c5e730e8b73ea39e50ce4aacf9e81"],"state_sha256":"803760dd355b23ff0d63173f58a2af0078c41def917f2b2a1d7ae7e76367e337"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"rIs9acRdq9pOt89VsM3/wahcWXUupxhRkjU56CeIp3ZYHPHAu5XXtiwE/GdFR1OKZ674DKf5yFpA8UVR9GUYDg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-10T09:02:19.489612Z","bundle_sha256":"67211cd97600f557efb92ec9917e872649c30e28e157a3af66223e06c52232ed"}}