{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:PXNMNZNLBPFVEZV2MFQ4PTNRN6","short_pith_number":"pith:PXNMNZNL","canonical_record":{"source":{"id":"2605.18320","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-18T12:39:30Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"a293fa64e9047ad27cf300a18c54804aca47cac53649cb4571b833d0862fb1fc","abstract_canon_sha256":"f6d51e7a89a6986cd8a310ce475eaf33ded41a27590051e4db090317d97ed3f3"},"schema_version":"1.0"},"canonical_sha256":"7ddac6e5ab0bcb5266ba6161c7cdb16f9073e8b7785ac77a7ce85860ddaa65eb","source":{"kind":"arxiv","id":"2605.18320","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.18320","created_at":"2026-05-20T00:05:55Z"},{"alias_kind":"arxiv_version","alias_value":"2605.18320v1","created_at":"2026-05-20T00:05:55Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.18320","created_at":"2026-05-20T00:05:55Z"},{"alias_kind":"pith_short_12","alias_value":"PXNMNZNLBPFV","created_at":"2026-05-20T00:05:55Z"},{"alias_kind":"pith_short_16","alias_value":"PXNMNZNLBPFVEZV2","created_at":"2026-05-20T00:05:55Z"},{"alias_kind":"pith_short_8","alias_value":"PXNMNZNL","created_at":"2026-05-20T00:05:55Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:PXNMNZNLBPFVEZV2MFQ4PTNRN6","target":"record","payload":{"canonical_record":{"source":{"id":"2605.18320","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-18T12:39:30Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"a293fa64e9047ad27cf300a18c54804aca47cac53649cb4571b833d0862fb1fc","abstract_canon_sha256":"f6d51e7a89a6986cd8a310ce475eaf33ded41a27590051e4db090317d97ed3f3"},"schema_version":"1.0"},"canonical_sha256":"7ddac6e5ab0bcb5266ba6161c7cdb16f9073e8b7785ac77a7ce85860ddaa65eb","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:05:55.087847Z","signature_b64":"DEBO/BMlJJdWl6thLU8tOW5fOUNMQU4EyFasNj0HpLkZLnMeA3G3A4POGlrA4mpb/drIF8ft/eJ8puXisiBECg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"7ddac6e5ab0bcb5266ba6161c7cdb16f9073e8b7785ac77a7ce85860ddaa65eb","last_reissued_at":"2026-05-20T00:05:55.087156Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:05:55.087156Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.18320","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:05:55Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"zDGaVjeQJu4+JwDkUIRaBGMtPAoqK71OBFQZg5lfKcbQL8FGq9zuoZhGqlqsbqYwxFYqeWjqvXAvmjymOCYkCw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-07T05:26:00.177529Z"},"content_sha256":"73c89a45ea519297233f4fe017e8550e23a47cb1a2442e09e9c5c7fce9dc3f46","schema_version":"1.0","event_id":"sha256:73c89a45ea519297233f4fe017e8550e23a47cb1a2442e09e9c5c7fce9dc3f46"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:PXNMNZNLBPFVEZV2MFQ4PTNRN6","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"ISEP: Implicit Support Expansion for Offline Reinforcement Learning via Stochastic Policy Optimization","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Shaoqin Zhu, Xiaoqiang Ji, Yifei Chen","submitted_at":"2026-05-18T12:39:30Z","abstract_excerpt":"Offline reinforcement learning methods typically enforce strict constraints to ensure safety; yet this rigidity often prevents the discovery of optimal behaviors outside the immediate support of the behavior policy. To address this, we propose Implicit Support Expansion via stochastic Policy optimization (ISEP), which leverages a value function interpolated between in-distribution data and policy samples to implicitly expand the feasible action support. This mechanism \"densifies\" high-reward regions, creating a navigable path for policy improvement while theoretically guaranteeing bounded valu"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.18320","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.18320/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"ai_meta_artifact","ran_at":"2026-05-19T23:33:35.188939Z","status":"skipped","version":"1.0.0","findings_count":0},{"name":"claim_evidence","ran_at":"2026-05-19T23:21:58.864114Z","status":"completed","version":"1.0.0","findings_count":0}],"snapshot_sha256":"dabd1dae6495417e1375873a01770455ddde06ffae88d77348b323500900d006"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:05:55Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"vB+N2AhIMHcbmSUn1XX1IGmGekAyoW2ACFWwoiV4xKZXNwr80UzeTCxqX3eVLEPBATXTG3x0nW7JDHsbTVUJBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-07T05:26:00.178370Z"},"content_sha256":"055c19982d4047e6a73ac1a5f0e32c45ba6871f01e738a846be113e6f995ea7e","schema_version":"1.0","event_id":"sha256:055c19982d4047e6a73ac1a5f0e32c45ba6871f01e738a846be113e6f995ea7e"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/PXNMNZNLBPFVEZV2MFQ4PTNRN6/bundle.json","state_url":"https://pith.science/pith/PXNMNZNLBPFVEZV2MFQ4PTNRN6/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/PXNMNZNLBPFVEZV2MFQ4PTNRN6/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-07T05:26:00Z","links":{"resolver":"https://pith.science/pith/PXNMNZNLBPFVEZV2MFQ4PTNRN6","bundle":"https://pith.science/pith/PXNMNZNLBPFVEZV2MFQ4PTNRN6/bundle.json","state":"https://pith.science/pith/PXNMNZNLBPFVEZV2MFQ4PTNRN6/state.json","well_known_bundle":"https://pith.science/.well-known/pith/PXNMNZNLBPFVEZV2MFQ4PTNRN6/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:PXNMNZNLBPFVEZV2MFQ4PTNRN6","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"f6d51e7a89a6986cd8a310ce475eaf33ded41a27590051e4db090317d97ed3f3","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-18T12:39:30Z","title_canon_sha256":"a293fa64e9047ad27cf300a18c54804aca47cac53649cb4571b833d0862fb1fc"},"schema_version":"1.0","source":{"id":"2605.18320","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.18320","created_at":"2026-05-20T00:05:55Z"},{"alias_kind":"arxiv_version","alias_value":"2605.18320v1","created_at":"2026-05-20T00:05:55Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.18320","created_at":"2026-05-20T00:05:55Z"},{"alias_kind":"pith_short_12","alias_value":"PXNMNZNLBPFV","created_at":"2026-05-20T00:05:55Z"},{"alias_kind":"pith_short_16","alias_value":"PXNMNZNLBPFVEZV2","created_at":"2026-05-20T00:05:55Z"},{"alias_kind":"pith_short_8","alias_value":"PXNMNZNL","created_at":"2026-05-20T00:05:55Z"}],"graph_snapshots":[{"event_id":"sha256:055c19982d4047e6a73ac1a5f0e32c45ba6871f01e738a846be113e6f995ea7e","target":"graph","created_at":"2026-05-20T00:05:55Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[{"findings_count":0,"name":"ai_meta_artifact","ran_at":"2026-05-19T23:33:35.188939Z","status":"skipped","version":"1.0.0"},{"findings_count":0,"name":"claim_evidence","ran_at":"2026-05-19T23:21:58.864114Z","status":"completed","version":"1.0.0"}],"endpoint":"/pith/2605.18320/integrity.json","findings":[],"snapshot_sha256":"dabd1dae6495417e1375873a01770455ddde06ffae88d77348b323500900d006","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Offline reinforcement learning methods typically enforce strict constraints to ensure safety; yet this rigidity often prevents the discovery of optimal behaviors outside the immediate support of the behavior policy. To address this, we propose Implicit Support Expansion via stochastic Policy optimization (ISEP), which leverages a value function interpolated between in-distribution data and policy samples to implicitly expand the feasible action support. This mechanism \"densifies\" high-reward regions, creating a navigable path for policy improvement while theoretically guaranteeing bounded valu","authors_text":"Shaoqin Zhu, Xiaoqiang Ji, Yifei Chen","cross_cats":["cs.AI"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-18T12:39:30Z","title":"ISEP: Implicit Support Expansion for Offline Reinforcement Learning via Stochastic Policy Optimization"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.18320","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:73c89a45ea519297233f4fe017e8550e23a47cb1a2442e09e9c5c7fce9dc3f46","target":"record","created_at":"2026-05-20T00:05:55Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"f6d51e7a89a6986cd8a310ce475eaf33ded41a27590051e4db090317d97ed3f3","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-18T12:39:30Z","title_canon_sha256":"a293fa64e9047ad27cf300a18c54804aca47cac53649cb4571b833d0862fb1fc"},"schema_version":"1.0","source":{"id":"2605.18320","kind":"arxiv","version":1}},"canonical_sha256":"7ddac6e5ab0bcb5266ba6161c7cdb16f9073e8b7785ac77a7ce85860ddaa65eb","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"7ddac6e5ab0bcb5266ba6161c7cdb16f9073e8b7785ac77a7ce85860ddaa65eb","first_computed_at":"2026-05-20T00:05:55.087156Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-20T00:05:55.087156Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"DEBO/BMlJJdWl6thLU8tOW5fOUNMQU4EyFasNj0HpLkZLnMeA3G3A4POGlrA4mpb/drIF8ft/eJ8puXisiBECg==","signature_status":"signed_v1","signed_at":"2026-05-20T00:05:55.087847Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.18320","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:73c89a45ea519297233f4fe017e8550e23a47cb1a2442e09e9c5c7fce9dc3f46","sha256:055c19982d4047e6a73ac1a5f0e32c45ba6871f01e738a846be113e6f995ea7e"],"state_sha256":"cf9e2b1adc638a9367eff09c88434a803972397cea8d8604d0dd9fc0ea4590ab"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"o2PnBpBG6xHlQATHfiOfXMcCBVcmNoybR7Ut2fc7+F4atsD7GGSgJyUVw+r0hRQJq2Dt59ifz98SSmYR6Pd5BQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-07T05:26:00.183011Z","bundle_sha256":"541fc2f804ca5214be79e7aafd52bf19178bb1de56d420799ea9bd0abf7f788e"}}