{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:5BBJ4P63GSLCCUD5L4VVCSOADW","short_pith_number":"pith:5BBJ4P63","canonical_record":{"source":{"id":"2603.21621","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-03-23T06:37:42Z","cross_cats_sorted":[],"title_canon_sha256":"ff4414db2a9b0cf170152ea11cf8cf5a084cf4c80d2a84e4813543697ba4b16d","abstract_canon_sha256":"8ca86fa594eab58155b1144a70ad5df7f68f4a3370f7fa08af1845a564467dc9"},"schema_version":"1.0"},"canonical_sha256":"e8429e3fdb349621507d5f2b5149c01d844aa6bf3ec89a215f9e4a750811624e","source":{"kind":"arxiv","id":"2603.21621","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2603.21621","created_at":"2026-05-29T01:05:07Z"},{"alias_kind":"arxiv_version","alias_value":"2603.21621v2","created_at":"2026-05-29T01:05:07Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2603.21621","created_at":"2026-05-29T01:05:07Z"},{"alias_kind":"pith_short_12","alias_value":"5BBJ4P63GSLC","created_at":"2026-05-29T01:05:07Z"},{"alias_kind":"pith_short_16","alias_value":"5BBJ4P63GSLCCUD5","created_at":"2026-05-29T01:05:07Z"},{"alias_kind":"pith_short_8","alias_value":"5BBJ4P63","created_at":"2026-05-29T01:05:07Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:5BBJ4P63GSLCCUD5L4VVCSOADW","target":"record","payload":{"canonical_record":{"source":{"id":"2603.21621","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-03-23T06:37:42Z","cross_cats_sorted":[],"title_canon_sha256":"ff4414db2a9b0cf170152ea11cf8cf5a084cf4c80d2a84e4813543697ba4b16d","abstract_canon_sha256":"8ca86fa594eab58155b1144a70ad5df7f68f4a3370f7fa08af1845a564467dc9"},"schema_version":"1.0"},"canonical_sha256":"e8429e3fdb349621507d5f2b5149c01d844aa6bf3ec89a215f9e4a750811624e","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-29T01:05:07.744738Z","signature_b64":"vkL12jO529u2I8sWhWSblHPa8sv4+6BcGbb/DEEF+RkIBVubXg+KOM844ggSz+UdJMh20I+ENqZupSyVcd2HAA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"e8429e3fdb349621507d5f2b5149c01d844aa6bf3ec89a215f9e4a750811624e","last_reissued_at":"2026-05-29T01:05:07.744094Z","signature_status":"signed_v1","first_computed_at":"2026-05-29T01:05:07.744094Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2603.21621","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-29T01:05:07Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"lgrqlP9lyFE85NOhzlosv3j1qeGCZaBSIbwx8xTnio0dpEwZ0OT7N9pkK869CSAw78gd/Neu+VL5htHNLO50DQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-30T05:28:31.808355Z"},"content_sha256":"b806c6094c1313ec44868a5da0fdccbb4e80fc3d5313da44065e03fad78bc179","schema_version":"1.0","event_id":"sha256:b806c6094c1313ec44868a5da0fdccbb4e80fc3d5313da44065e03fad78bc179"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:5BBJ4P63GSLCCUD5L4VVCSOADW","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Path-Space Mirror Descent for On-Policy Reinforcement Learning under the Generalized Schr\\\"odinger Bridge","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Qingyuan Zhou, Shutong Ding, Yanwei Fu, Yuehu Gong, Yulin Chen, Zeyuan Wang","submitted_at":"2026-03-23T06:37:42Z","abstract_excerpt":"Classical on-policy algorithms such as PPO and mirror descent policy optimization provide stable proximal policy updates through tractable action likelihoods, but are typically instantiated with simple Gaussian policies whose expressiveness can be limited in complex continuous-control tasks. Generative policies based on diffusion and flow models provide more expressive action distributions, but they naturally define distributions over multi-step denoising paths whose terminal action density is often intractable, creating a mismatch with likelihood-based on-policy proximal updates. To address t"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2603.21621","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2603.21621/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-29T01:05:07Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"PIb9a5p4f8ZmFIShLiLgQK3BQEVliWd6VBUileLQej0muFoqIm04IZYgM/uQJLs0k5FBGu7q15c2yjPTrcH6AQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-30T05:28:31.808738Z"},"content_sha256":"6504ac22201bed74d35718880a84b9558bd7bca314f6f46f4b3c539f94da1911","schema_version":"1.0","event_id":"sha256:6504ac22201bed74d35718880a84b9558bd7bca314f6f46f4b3c539f94da1911"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/5BBJ4P63GSLCCUD5L4VVCSOADW/bundle.json","state_url":"https://pith.science/pith/5BBJ4P63GSLCCUD5L4VVCSOADW/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/5BBJ4P63GSLCCUD5L4VVCSOADW/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-30T05:28:31Z","links":{"resolver":"https://pith.science/pith/5BBJ4P63GSLCCUD5L4VVCSOADW","bundle":"https://pith.science/pith/5BBJ4P63GSLCCUD5L4VVCSOADW/bundle.json","state":"https://pith.science/pith/5BBJ4P63GSLCCUD5L4VVCSOADW/state.json","well_known_bundle":"https://pith.science/.well-known/pith/5BBJ4P63GSLCCUD5L4VVCSOADW/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:5BBJ4P63GSLCCUD5L4VVCSOADW","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"8ca86fa594eab58155b1144a70ad5df7f68f4a3370f7fa08af1845a564467dc9","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-03-23T06:37:42Z","title_canon_sha256":"ff4414db2a9b0cf170152ea11cf8cf5a084cf4c80d2a84e4813543697ba4b16d"},"schema_version":"1.0","source":{"id":"2603.21621","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2603.21621","created_at":"2026-05-29T01:05:07Z"},{"alias_kind":"arxiv_version","alias_value":"2603.21621v2","created_at":"2026-05-29T01:05:07Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2603.21621","created_at":"2026-05-29T01:05:07Z"},{"alias_kind":"pith_short_12","alias_value":"5BBJ4P63GSLC","created_at":"2026-05-29T01:05:07Z"},{"alias_kind":"pith_short_16","alias_value":"5BBJ4P63GSLCCUD5","created_at":"2026-05-29T01:05:07Z"},{"alias_kind":"pith_short_8","alias_value":"5BBJ4P63","created_at":"2026-05-29T01:05:07Z"}],"graph_snapshots":[{"event_id":"sha256:6504ac22201bed74d35718880a84b9558bd7bca314f6f46f4b3c539f94da1911","target":"graph","created_at":"2026-05-29T01:05:07Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2603.21621/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Classical on-policy algorithms such as PPO and mirror descent policy optimization provide stable proximal policy updates through tractable action likelihoods, but are typically instantiated with simple Gaussian policies whose expressiveness can be limited in complex continuous-control tasks. Generative policies based on diffusion and flow models provide more expressive action distributions, but they naturally define distributions over multi-step denoising paths whose terminal action density is often intractable, creating a mismatch with likelihood-based on-policy proximal updates. To address t","authors_text":"Qingyuan Zhou, Shutong Ding, Yanwei Fu, Yuehu Gong, Yulin Chen, Zeyuan Wang","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-03-23T06:37:42Z","title":"Path-Space Mirror Descent for On-Policy Reinforcement Learning under the Generalized Schr\\\"odinger Bridge"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2603.21621","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:b806c6094c1313ec44868a5da0fdccbb4e80fc3d5313da44065e03fad78bc179","target":"record","created_at":"2026-05-29T01:05:07Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"8ca86fa594eab58155b1144a70ad5df7f68f4a3370f7fa08af1845a564467dc9","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-03-23T06:37:42Z","title_canon_sha256":"ff4414db2a9b0cf170152ea11cf8cf5a084cf4c80d2a84e4813543697ba4b16d"},"schema_version":"1.0","source":{"id":"2603.21621","kind":"arxiv","version":2}},"canonical_sha256":"e8429e3fdb349621507d5f2b5149c01d844aa6bf3ec89a215f9e4a750811624e","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"e8429e3fdb349621507d5f2b5149c01d844aa6bf3ec89a215f9e4a750811624e","first_computed_at":"2026-05-29T01:05:07.744094Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-29T01:05:07.744094Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"vkL12jO529u2I8sWhWSblHPa8sv4+6BcGbb/DEEF+RkIBVubXg+KOM844ggSz+UdJMh20I+ENqZupSyVcd2HAA==","signature_status":"signed_v1","signed_at":"2026-05-29T01:05:07.744738Z","signed_message":"canonical_sha256_bytes"},"source_id":"2603.21621","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:b806c6094c1313ec44868a5da0fdccbb4e80fc3d5313da44065e03fad78bc179","sha256:6504ac22201bed74d35718880a84b9558bd7bca314f6f46f4b3c539f94da1911"],"state_sha256":"5a68aaa9f77b30325ebce8d921abc0ebf93970e335641a0877f425bc8ead608d"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"HHHhJ4bJu4F9Ii8NgQBWyJSW6WnUx/vAqG2jOIC5R+4LLGhwqpyxUroABS373O0W03vDkw/DgRzTEMbKWSUNBQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-30T05:28:31.811568Z","bundle_sha256":"7f48119eb954d67a1124485d5fd0841f9a968d16c2b5db58ba8af0efc68ff42f"}}