{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2012:LQ5OYQULFIXYZOG6V75FM4XY6Q","short_pith_number":"pith:LQ5OYQUL","canonical_record":{"source":{"id":"1205.4810","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2012-05-22T06:02:09Z","cross_cats_sorted":[],"title_canon_sha256":"2d4968b12c7d32b3901abd1d89456212df84466e8559ea0fa3f6dbfbe1c4c5b2","abstract_canon_sha256":"aaa39624845c806a106ee6f3c5a67557c66fb7a2291e6b775849ad439f9f5883"},"schema_version":"1.0"},"canonical_sha256":"5c3aec428b2a2f8cb8deaffa5672f8f428a8ba2003c578d6f863a73efa1b166a","source":{"kind":"arxiv","id":"1205.4810","version":3},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1205.4810","created_at":"2026-05-18T03:51:35Z"},{"alias_kind":"arxiv_version","alias_value":"1205.4810v3","created_at":"2026-05-18T03:51:35Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1205.4810","created_at":"2026-05-18T03:51:35Z"},{"alias_kind":"pith_short_12","alias_value":"LQ5OYQULFIXY","created_at":"2026-05-18T12:27:14Z"},{"alias_kind":"pith_short_16","alias_value":"LQ5OYQULFIXYZOG6","created_at":"2026-05-18T12:27:14Z"},{"alias_kind":"pith_short_8","alias_value":"LQ5OYQUL","created_at":"2026-05-18T12:27:14Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2012:LQ5OYQULFIXYZOG6V75FM4XY6Q","target":"record","payload":{"canonical_record":{"source":{"id":"1205.4810","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2012-05-22T06:02:09Z","cross_cats_sorted":[],"title_canon_sha256":"2d4968b12c7d32b3901abd1d89456212df84466e8559ea0fa3f6dbfbe1c4c5b2","abstract_canon_sha256":"aaa39624845c806a106ee6f3c5a67557c66fb7a2291e6b775849ad439f9f5883"},"schema_version":"1.0"},"canonical_sha256":"5c3aec428b2a2f8cb8deaffa5672f8f428a8ba2003c578d6f863a73efa1b166a","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T03:51:35.085530Z","signature_b64":"htJ2IucxJbRgXo2AmbOus5Up2aiwJvvfsGg05uNwuLMuPflDyNCxWRy6nr7OXJ+S0W+SLFADJPb2ETkftnYGBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"5c3aec428b2a2f8cb8deaffa5672f8f428a8ba2003c578d6f863a73efa1b166a","last_reissued_at":"2026-05-18T03:51:35.084614Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T03:51:35.084614Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1205.4810","source_version":3,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T03:51:35Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"eApcxMeXvs1mfE1XTGmo7xqLspq1bBPqryEGSSvkC/JO4VF4bw+//3t9DzXp6hXVxi/iNqG+hZqGaRWKpBLdCw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-21T23:47:53.854966Z"},"content_sha256":"882c07d0a3f5e92332aee42a8ce73ddae08d3a63df6e511ba4641bb4fb4d9389","schema_version":"1.0","event_id":"sha256:882c07d0a3f5e92332aee42a8ce73ddae08d3a63df6e511ba4641bb4fb4d9389"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2012:LQ5OYQULFIXYZOG6V75FM4XY6Q","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Safe Exploration in Markov Decision Processes","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Pieter Abbeel, Teodor Mihai Moldovan","submitted_at":"2012-05-22T06:02:09Z","abstract_excerpt":"In environments with uncertain dynamics exploration is necessary to learn how to perform well. Existing reinforcement learning algorithms provide strong exploration guarantees, but they tend to rely on an ergodicity assumption. The essence of ergodicity is that any state is eventually reachable from any other state by following a suitable policy. This assumption allows for exploration algorithms that operate by simply favoring states that have rarely been visited before. For most physical systems this assumption is impractical as the systems would break before any reasonable exploration has ta"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1205.4810","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T03:51:35Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"5QgvECymYuR1SZDL9I5cN3tNbV3r9/9UVLh0jUwnFtzzdXd+ovQbABEAdANpElksSx5qcN6xKwKEZktGr7LEAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-21T23:47:53.855653Z"},"content_sha256":"cb50bfae2712c154d23a35241ce4a4ce5f04d6e381244406c6dcefd84199cef0","schema_version":"1.0","event_id":"sha256:cb50bfae2712c154d23a35241ce4a4ce5f04d6e381244406c6dcefd84199cef0"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/LQ5OYQULFIXYZOG6V75FM4XY6Q/bundle.json","state_url":"https://pith.science/pith/LQ5OYQULFIXYZOG6V75FM4XY6Q/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/LQ5OYQULFIXYZOG6V75FM4XY6Q/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-21T23:47:53Z","links":{"resolver":"https://pith.science/pith/LQ5OYQULFIXYZOG6V75FM4XY6Q","bundle":"https://pith.science/pith/LQ5OYQULFIXYZOG6V75FM4XY6Q/bundle.json","state":"https://pith.science/pith/LQ5OYQULFIXYZOG6V75FM4XY6Q/state.json","well_known_bundle":"https://pith.science/.well-known/pith/LQ5OYQULFIXYZOG6V75FM4XY6Q/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2012:LQ5OYQULFIXYZOG6V75FM4XY6Q","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"aaa39624845c806a106ee6f3c5a67557c66fb7a2291e6b775849ad439f9f5883","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2012-05-22T06:02:09Z","title_canon_sha256":"2d4968b12c7d32b3901abd1d89456212df84466e8559ea0fa3f6dbfbe1c4c5b2"},"schema_version":"1.0","source":{"id":"1205.4810","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1205.4810","created_at":"2026-05-18T03:51:35Z"},{"alias_kind":"arxiv_version","alias_value":"1205.4810v3","created_at":"2026-05-18T03:51:35Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1205.4810","created_at":"2026-05-18T03:51:35Z"},{"alias_kind":"pith_short_12","alias_value":"LQ5OYQULFIXY","created_at":"2026-05-18T12:27:14Z"},{"alias_kind":"pith_short_16","alias_value":"LQ5OYQULFIXYZOG6","created_at":"2026-05-18T12:27:14Z"},{"alias_kind":"pith_short_8","alias_value":"LQ5OYQUL","created_at":"2026-05-18T12:27:14Z"}],"graph_snapshots":[{"event_id":"sha256:cb50bfae2712c154d23a35241ce4a4ce5f04d6e381244406c6dcefd84199cef0","target":"graph","created_at":"2026-05-18T03:51:35Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"In environments with uncertain dynamics exploration is necessary to learn how to perform well. Existing reinforcement learning algorithms provide strong exploration guarantees, but they tend to rely on an ergodicity assumption. The essence of ergodicity is that any state is eventually reachable from any other state by following a suitable policy. This assumption allows for exploration algorithms that operate by simply favoring states that have rarely been visited before. For most physical systems this assumption is impractical as the systems would break before any reasonable exploration has ta","authors_text":"Pieter Abbeel, Teodor Mihai Moldovan","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2012-05-22T06:02:09Z","title":"Safe Exploration in Markov Decision Processes"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1205.4810","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:882c07d0a3f5e92332aee42a8ce73ddae08d3a63df6e511ba4641bb4fb4d9389","target":"record","created_at":"2026-05-18T03:51:35Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"aaa39624845c806a106ee6f3c5a67557c66fb7a2291e6b775849ad439f9f5883","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2012-05-22T06:02:09Z","title_canon_sha256":"2d4968b12c7d32b3901abd1d89456212df84466e8559ea0fa3f6dbfbe1c4c5b2"},"schema_version":"1.0","source":{"id":"1205.4810","kind":"arxiv","version":3}},"canonical_sha256":"5c3aec428b2a2f8cb8deaffa5672f8f428a8ba2003c578d6f863a73efa1b166a","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"5c3aec428b2a2f8cb8deaffa5672f8f428a8ba2003c578d6f863a73efa1b166a","first_computed_at":"2026-05-18T03:51:35.084614Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T03:51:35.084614Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"htJ2IucxJbRgXo2AmbOus5Up2aiwJvvfsGg05uNwuLMuPflDyNCxWRy6nr7OXJ+S0W+SLFADJPb2ETkftnYGBQ==","signature_status":"signed_v1","signed_at":"2026-05-18T03:51:35.085530Z","signed_message":"canonical_sha256_bytes"},"source_id":"1205.4810","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:882c07d0a3f5e92332aee42a8ce73ddae08d3a63df6e511ba4641bb4fb4d9389","sha256:cb50bfae2712c154d23a35241ce4a4ce5f04d6e381244406c6dcefd84199cef0"],"state_sha256":"129d1f9b8de0332cf45318abd60873366860b5464a38bc96cf2e64413bec32d9"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"WQKiYENpAjSVb881+qV1gulCjQaw/6rvc1Fr6AAK8j0PhVsFn8/9gQJR+M1iVZNL+bm3sEdwMnOU4pdZCMd0Aw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-21T23:47:53.861033Z","bundle_sha256":"9c5807bbc081074992fe47d186f6ef44558cf22e2a95776dbe8047644c8e5a99"}}