{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:Y7SNPJCSDKKHOKDPAJHASCWZOX","short_pith_number":"pith:Y7SNPJCS","canonical_record":{"source":{"id":"2603.10250","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-03-10T22:01:13Z","cross_cats_sorted":[],"title_canon_sha256":"2b3039d1780865d412693825786f09ea0665c7e3eb5209354e159773a389b2b4","abstract_canon_sha256":"e95ee327606cad4cfc53964bf2a8605cfbb579c1a3bd95f7a9768cb1da57c13e"},"schema_version":"1.0"},"canonical_sha256":"c7e4d7a4521a9477286f024e090ad975c4e795b5de15ce8fbe77350ca38f7e78","source":{"kind":"arxiv","id":"2603.10250","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2603.10250","created_at":"2026-05-26T01:03:27Z"},{"alias_kind":"arxiv_version","alias_value":"2603.10250v2","created_at":"2026-05-26T01:03:27Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2603.10250","created_at":"2026-05-26T01:03:27Z"},{"alias_kind":"pith_short_12","alias_value":"Y7SNPJCSDKKH","created_at":"2026-05-26T01:03:27Z"},{"alias_kind":"pith_short_16","alias_value":"Y7SNPJCSDKKHOKDP","created_at":"2026-05-26T01:03:27Z"},{"alias_kind":"pith_short_8","alias_value":"Y7SNPJCS","created_at":"2026-05-26T01:03:27Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:Y7SNPJCSDKKHOKDPAJHASCWZOX","target":"record","payload":{"canonical_record":{"source":{"id":"2603.10250","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-03-10T22:01:13Z","cross_cats_sorted":[],"title_canon_sha256":"2b3039d1780865d412693825786f09ea0665c7e3eb5209354e159773a389b2b4","abstract_canon_sha256":"e95ee327606cad4cfc53964bf2a8605cfbb579c1a3bd95f7a9768cb1da57c13e"},"schema_version":"1.0"},"canonical_sha256":"c7e4d7a4521a9477286f024e090ad975c4e795b5de15ce8fbe77350ca38f7e78","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-26T01:03:27.567306Z","signature_b64":"hMG2ggi6ypkQtUtX9nvAeV9MJWrM2XXB7SIvrk4LX5qU3jW+6zXTBTTiMZ5DVauTY6OuzIhDS6WAEg/N8ColCQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"c7e4d7a4521a9477286f024e090ad975c4e795b5de15ce8fbe77350ca38f7e78","last_reissued_at":"2026-05-26T01:03:27.566310Z","signature_status":"signed_v1","first_computed_at":"2026-05-26T01:03:27.566310Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2603.10250","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-26T01:03:27Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"FSBWxYBejDRMhoig3719qD+Y67V05i8JTYnzCUfRHrarjZP9YtO6lkIlHFzienpwp3NIecFRR6PJukIYvp7KAw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-02T23:04:59.635078Z"},"content_sha256":"3dc8b4c65d1604184d4ec1a2efa5138f050e2341ce6a3d2cd01df1294ce52fa7","schema_version":"1.0","event_id":"sha256:3dc8b4c65d1604184d4ec1a2efa5138f050e2341ce6a3d2cd01df1294ce52fa7"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:Y7SNPJCSDKKHOKDPAJHASCWZOX","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"GeMPO: Generalized Measure Matching for Online Diffusion Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Bo Dai, Chenxiao Gao, Haitong Ma, Na Li, Tianyi Chen","submitted_at":"2026-03-10T22:01:13Z","abstract_excerpt":"A commonly used family of RL algorithms for diffusion policies conducts softmax reweighting over samples from the behavior policy, which often induces an overgreedy policy and fails to utilize feedback from negative samples. In this work, we introduce GeMPO, a simple and unified framework that generalizes reweighting scheme in diffusion RL from softmax to general monotonic functions. GeMPO revisits diffusion RL via a measure matching perspective: First, we construct a virtual target policy measure via solving a regularized policy optimization objective; Second, we minimize the divergence betwe"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2603.10250","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2603.10250/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-26T01:03:27Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"zkEF2/Sg3Q7R4WWeBi6/lAm6fJJG6lkGHYTjhLTXFEx3QOojnwSrH219iJiqYkUMIiASNQP9Ica1ZYLlJvVgDw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-02T23:04:59.635461Z"},"content_sha256":"87ce7e182b4ac16c003cc83e6670ecb6c68dd22a954bac7656855f9afbb10a26","schema_version":"1.0","event_id":"sha256:87ce7e182b4ac16c003cc83e6670ecb6c68dd22a954bac7656855f9afbb10a26"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/Y7SNPJCSDKKHOKDPAJHASCWZOX/bundle.json","state_url":"https://pith.science/pith/Y7SNPJCSDKKHOKDPAJHASCWZOX/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/Y7SNPJCSDKKHOKDPAJHASCWZOX/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-07-02T23:04:59Z","links":{"resolver":"https://pith.science/pith/Y7SNPJCSDKKHOKDPAJHASCWZOX","bundle":"https://pith.science/pith/Y7SNPJCSDKKHOKDPAJHASCWZOX/bundle.json","state":"https://pith.science/pith/Y7SNPJCSDKKHOKDPAJHASCWZOX/state.json","well_known_bundle":"https://pith.science/.well-known/pith/Y7SNPJCSDKKHOKDPAJHASCWZOX/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:Y7SNPJCSDKKHOKDPAJHASCWZOX","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"e95ee327606cad4cfc53964bf2a8605cfbb579c1a3bd95f7a9768cb1da57c13e","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-03-10T22:01:13Z","title_canon_sha256":"2b3039d1780865d412693825786f09ea0665c7e3eb5209354e159773a389b2b4"},"schema_version":"1.0","source":{"id":"2603.10250","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2603.10250","created_at":"2026-05-26T01:03:27Z"},{"alias_kind":"arxiv_version","alias_value":"2603.10250v2","created_at":"2026-05-26T01:03:27Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2603.10250","created_at":"2026-05-26T01:03:27Z"},{"alias_kind":"pith_short_12","alias_value":"Y7SNPJCSDKKH","created_at":"2026-05-26T01:03:27Z"},{"alias_kind":"pith_short_16","alias_value":"Y7SNPJCSDKKHOKDP","created_at":"2026-05-26T01:03:27Z"},{"alias_kind":"pith_short_8","alias_value":"Y7SNPJCS","created_at":"2026-05-26T01:03:27Z"}],"graph_snapshots":[{"event_id":"sha256:87ce7e182b4ac16c003cc83e6670ecb6c68dd22a954bac7656855f9afbb10a26","target":"graph","created_at":"2026-05-26T01:03:27Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2603.10250/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"A commonly used family of RL algorithms for diffusion policies conducts softmax reweighting over samples from the behavior policy, which often induces an overgreedy policy and fails to utilize feedback from negative samples. In this work, we introduce GeMPO, a simple and unified framework that generalizes reweighting scheme in diffusion RL from softmax to general monotonic functions. GeMPO revisits diffusion RL via a measure matching perspective: First, we construct a virtual target policy measure via solving a regularized policy optimization objective; Second, we minimize the divergence betwe","authors_text":"Bo Dai, Chenxiao Gao, Haitong Ma, Na Li, Tianyi Chen","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-03-10T22:01:13Z","title":"GeMPO: Generalized Measure Matching for Online Diffusion Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2603.10250","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:3dc8b4c65d1604184d4ec1a2efa5138f050e2341ce6a3d2cd01df1294ce52fa7","target":"record","created_at":"2026-05-26T01:03:27Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"e95ee327606cad4cfc53964bf2a8605cfbb579c1a3bd95f7a9768cb1da57c13e","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-03-10T22:01:13Z","title_canon_sha256":"2b3039d1780865d412693825786f09ea0665c7e3eb5209354e159773a389b2b4"},"schema_version":"1.0","source":{"id":"2603.10250","kind":"arxiv","version":2}},"canonical_sha256":"c7e4d7a4521a9477286f024e090ad975c4e795b5de15ce8fbe77350ca38f7e78","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"c7e4d7a4521a9477286f024e090ad975c4e795b5de15ce8fbe77350ca38f7e78","first_computed_at":"2026-05-26T01:03:27.566310Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-26T01:03:27.566310Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"hMG2ggi6ypkQtUtX9nvAeV9MJWrM2XXB7SIvrk4LX5qU3jW+6zXTBTTiMZ5DVauTY6OuzIhDS6WAEg/N8ColCQ==","signature_status":"signed_v1","signed_at":"2026-05-26T01:03:27.567306Z","signed_message":"canonical_sha256_bytes"},"source_id":"2603.10250","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:3dc8b4c65d1604184d4ec1a2efa5138f050e2341ce6a3d2cd01df1294ce52fa7","sha256:87ce7e182b4ac16c003cc83e6670ecb6c68dd22a954bac7656855f9afbb10a26"],"state_sha256":"5116e10078bea149deb40fb09006956bb7825733ddeaef8653767ac86a1c30c1"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"zNGAnVC8QHzXwh0gbjXQfcqM6aaSAIx4+pZvgNhSBrhETFXXqojjEDORAUiE6407wLWt7ehhF595Ps4QecgoAw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-07-02T23:04:59.637505Z","bundle_sha256":"eff096f6e3844d1a864c9d5138082fe75446ecc9a5a3ff8d150646b2e91292a0"}}