{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2020:V6VVWLWXPT5TU5CJCDKBJIQFWA","short_pith_number":"pith:V6VVWLWX","canonical_record":{"source":{"id":"2003.00534","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2020-03-01T17:47:03Z","cross_cats_sorted":["math.OC","stat.ML"],"title_canon_sha256":"36c9549200676df8d0fcb355fe445fc370b7d9d189783374e126f9ae5224ff4c","abstract_canon_sha256":"91345aa064e20c5d7e0be912490430175cd3a0c2861729ad5185cc2431fd6cb6"},"schema_version":"1.0"},"canonical_sha256":"afab5b2ed77cfb3a744910d414a205b037e971fc50ab17042ccbbb5a2cda0148","source":{"kind":"arxiv","id":"2003.00534","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2003.00534","created_at":"2026-07-05T01:46:00Z"},{"alias_kind":"arxiv_version","alias_value":"2003.00534v2","created_at":"2026-07-05T01:46:00Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2003.00534","created_at":"2026-07-05T01:46:00Z"},{"alias_kind":"pith_short_12","alias_value":"V6VVWLWXPT5T","created_at":"2026-07-05T01:46:00Z"},{"alias_kind":"pith_short_16","alias_value":"V6VVWLWXPT5TU5CJ","created_at":"2026-07-05T01:46:00Z"},{"alias_kind":"pith_short_8","alias_value":"V6VVWLWX","created_at":"2026-07-05T01:46:00Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2020:V6VVWLWXPT5TU5CJCDKBJIQFWA","target":"record","payload":{"canonical_record":{"source":{"id":"2003.00534","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2020-03-01T17:47:03Z","cross_cats_sorted":["math.OC","stat.ML"],"title_canon_sha256":"36c9549200676df8d0fcb355fe445fc370b7d9d189783374e126f9ae5224ff4c","abstract_canon_sha256":"91345aa064e20c5d7e0be912490430175cd3a0c2861729ad5185cc2431fd6cb6"},"schema_version":"1.0"},"canonical_sha256":"afab5b2ed77cfb3a744910d414a205b037e971fc50ab17042ccbbb5a2cda0148","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-05T01:46:00.479036Z","signature_b64":"lyfEIVZHVPfG58sFiC+bYXJUgqzsC+a4RZjTBWTpEsZabbiZIWENqbzDdY6wUJkoxzO/7MJlA/V8uAdBfZR8Dw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"afab5b2ed77cfb3a744910d414a205b037e971fc50ab17042ccbbb5a2cda0148","last_reissued_at":"2026-07-05T01:46:00.478550Z","signature_status":"signed_v1","first_computed_at":"2026-07-05T01:46:00.478550Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2003.00534","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-07-05T01:46:00Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"A8m8GgMKVPgZPwTr+dJ+jfWXBQ8bbpe06IKYyyIiEeaxqGBNyESZWLxGeb4yJSiiQsICdj3lv3cIUl1oSc1qDw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-05T09:07:08.329472Z"},"content_sha256":"de6b10a4667acab9290ff1b1b9a12d513b5e42d8462fd0a6ad540eb58c159481","schema_version":"1.0","event_id":"sha256:de6b10a4667acab9290ff1b1b9a12d513b5e42d8462fd0a6ad540eb58c159481"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2020:V6VVWLWXPT5TU5CJCDKBJIQFWA","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Provably Efficient Safe Exploration via Primal-Dual Policy Optimization","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["math.OC","stat.ML"],"primary_cat":"cs.LG","authors_text":"Dongsheng Ding, Mihailo R. Jovanovi\\'c, Xiaohan Wei, Zhaoran Wang, Zhuoran Yang","submitted_at":"2020-03-01T17:47:03Z","abstract_excerpt":"We study the Safe Reinforcement Learning (SRL) problem using the Constrained Markov Decision Process (CMDP) formulation in which an agent aims to maximize the expected total reward subject to a safety constraint on the expected total value of a utility function. We focus on an episodic setting with the function approximation where the Markov transition kernels have a linear structure but do not impose any additional assumptions on the sampling model. Designing SRL algorithms with provable computational and statistical efficiency is particularly challenging under this setting because of the nee"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2003.00534","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2003.00534/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-07-05T01:46:00Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"gj1A+SGeYAtpeiqymQO4d4I1K+xdS4/seP3Jmr9A9MzSPpnZj4LtyK22w0/S2hKON581/Oo3b9StV9ZKwcRrDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-05T09:07:08.329857Z"},"content_sha256":"a322ce4e67a78c1eb56efe419339f24e7a2d2fb5efb0d8d283cd2c9269fc600b","schema_version":"1.0","event_id":"sha256:a322ce4e67a78c1eb56efe419339f24e7a2d2fb5efb0d8d283cd2c9269fc600b"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/V6VVWLWXPT5TU5CJCDKBJIQFWA/bundle.json","state_url":"https://pith.science/pith/V6VVWLWXPT5TU5CJCDKBJIQFWA/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/V6VVWLWXPT5TU5CJCDKBJIQFWA/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-07-05T09:07:08Z","links":{"resolver":"https://pith.science/pith/V6VVWLWXPT5TU5CJCDKBJIQFWA","bundle":"https://pith.science/pith/V6VVWLWXPT5TU5CJCDKBJIQFWA/bundle.json","state":"https://pith.science/pith/V6VVWLWXPT5TU5CJCDKBJIQFWA/state.json","well_known_bundle":"https://pith.science/.well-known/pith/V6VVWLWXPT5TU5CJCDKBJIQFWA/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2020:V6VVWLWXPT5TU5CJCDKBJIQFWA","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"91345aa064e20c5d7e0be912490430175cd3a0c2861729ad5185cc2431fd6cb6","cross_cats_sorted":["math.OC","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2020-03-01T17:47:03Z","title_canon_sha256":"36c9549200676df8d0fcb355fe445fc370b7d9d189783374e126f9ae5224ff4c"},"schema_version":"1.0","source":{"id":"2003.00534","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2003.00534","created_at":"2026-07-05T01:46:00Z"},{"alias_kind":"arxiv_version","alias_value":"2003.00534v2","created_at":"2026-07-05T01:46:00Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2003.00534","created_at":"2026-07-05T01:46:00Z"},{"alias_kind":"pith_short_12","alias_value":"V6VVWLWXPT5T","created_at":"2026-07-05T01:46:00Z"},{"alias_kind":"pith_short_16","alias_value":"V6VVWLWXPT5TU5CJ","created_at":"2026-07-05T01:46:00Z"},{"alias_kind":"pith_short_8","alias_value":"V6VVWLWX","created_at":"2026-07-05T01:46:00Z"}],"graph_snapshots":[{"event_id":"sha256:a322ce4e67a78c1eb56efe419339f24e7a2d2fb5efb0d8d283cd2c9269fc600b","target":"graph","created_at":"2026-07-05T01:46:00Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2003.00534/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"We study the Safe Reinforcement Learning (SRL) problem using the Constrained Markov Decision Process (CMDP) formulation in which an agent aims to maximize the expected total reward subject to a safety constraint on the expected total value of a utility function. We focus on an episodic setting with the function approximation where the Markov transition kernels have a linear structure but do not impose any additional assumptions on the sampling model. Designing SRL algorithms with provable computational and statistical efficiency is particularly challenging under this setting because of the nee","authors_text":"Dongsheng Ding, Mihailo R. Jovanovi\\'c, Xiaohan Wei, Zhaoran Wang, Zhuoran Yang","cross_cats":["math.OC","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2020-03-01T17:47:03Z","title":"Provably Efficient Safe Exploration via Primal-Dual Policy Optimization"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2003.00534","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:de6b10a4667acab9290ff1b1b9a12d513b5e42d8462fd0a6ad540eb58c159481","target":"record","created_at":"2026-07-05T01:46:00Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"91345aa064e20c5d7e0be912490430175cd3a0c2861729ad5185cc2431fd6cb6","cross_cats_sorted":["math.OC","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2020-03-01T17:47:03Z","title_canon_sha256":"36c9549200676df8d0fcb355fe445fc370b7d9d189783374e126f9ae5224ff4c"},"schema_version":"1.0","source":{"id":"2003.00534","kind":"arxiv","version":2}},"canonical_sha256":"afab5b2ed77cfb3a744910d414a205b037e971fc50ab17042ccbbb5a2cda0148","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"afab5b2ed77cfb3a744910d414a205b037e971fc50ab17042ccbbb5a2cda0148","first_computed_at":"2026-07-05T01:46:00.478550Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-07-05T01:46:00.478550Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"lyfEIVZHVPfG58sFiC+bYXJUgqzsC+a4RZjTBWTpEsZabbiZIWENqbzDdY6wUJkoxzO/7MJlA/V8uAdBfZR8Dw==","signature_status":"signed_v1","signed_at":"2026-07-05T01:46:00.479036Z","signed_message":"canonical_sha256_bytes"},"source_id":"2003.00534","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:de6b10a4667acab9290ff1b1b9a12d513b5e42d8462fd0a6ad540eb58c159481","sha256:a322ce4e67a78c1eb56efe419339f24e7a2d2fb5efb0d8d283cd2c9269fc600b"],"state_sha256":"cb27733d7fffd83399728a2bce9f0da62c651d0a768bf9e582c751eefcbc618e"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"PCuWGv+C820y9mnaDiZLhb7vjXels1zcBM3yJZf2CIdHcFeEmV3r9j/ndqPXYCeb9F1GT57kGHUrztN7PH0XCg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-07-05T09:07:08.332163Z","bundle_sha256":"ea276be5d9263cde4e7e19b3773c5fb773814b0517b532aef8e15f522b42ce79"}}