{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:Y7SNPJCSDKKHOKDPAJHASCWZOX","short_pith_number":"pith:Y7SNPJCS","schema_version":"1.0","canonical_sha256":"c7e4d7a4521a9477286f024e090ad975c4e795b5de15ce8fbe77350ca38f7e78","source":{"kind":"arxiv","id":"2603.10250","version":2},"attestation_state":"computed","paper":{"title":"GeMPO: Generalized Measure Matching for Online Diffusion Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Bo Dai, Chenxiao Gao, Haitong Ma, Na Li, Tianyi Chen","submitted_at":"2026-03-10T22:01:13Z","abstract_excerpt":"A commonly used family of RL algorithms for diffusion policies conducts softmax reweighting over samples from the behavior policy, which often induces an overgreedy policy and fails to utilize feedback from negative samples. In this work, we introduce GeMPO, a simple and unified framework that generalizes reweighting scheme in diffusion RL from softmax to general monotonic functions. GeMPO revisits diffusion RL via a measure matching perspective: First, we construct a virtual target policy measure via solving a regularized policy optimization objective; Second, we minimize the divergence betwe"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2603.10250","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-03-10T22:01:13Z","cross_cats_sorted":[],"title_canon_sha256":"2b3039d1780865d412693825786f09ea0665c7e3eb5209354e159773a389b2b4","abstract_canon_sha256":"e95ee327606cad4cfc53964bf2a8605cfbb579c1a3bd95f7a9768cb1da57c13e"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-26T01:03:27.567306Z","signature_b64":"hMG2ggi6ypkQtUtX9nvAeV9MJWrM2XXB7SIvrk4LX5qU3jW+6zXTBTTiMZ5DVauTY6OuzIhDS6WAEg/N8ColCQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"c7e4d7a4521a9477286f024e090ad975c4e795b5de15ce8fbe77350ca38f7e78","last_reissued_at":"2026-05-26T01:03:27.566310Z","signature_status":"signed_v1","first_computed_at":"2026-05-26T01:03:27.566310Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"GeMPO: Generalized Measure Matching for Online Diffusion Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Bo Dai, Chenxiao Gao, Haitong Ma, Na Li, Tianyi Chen","submitted_at":"2026-03-10T22:01:13Z","abstract_excerpt":"A commonly used family of RL algorithms for diffusion policies conducts softmax reweighting over samples from the behavior policy, which often induces an overgreedy policy and fails to utilize feedback from negative samples. In this work, we introduce GeMPO, a simple and unified framework that generalizes reweighting scheme in diffusion RL from softmax to general monotonic functions. GeMPO revisits diffusion RL via a measure matching perspective: First, we construct a virtual target policy measure via solving a regularized policy optimization objective; Second, we minimize the divergence betwe"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2603.10250","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2603.10250/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2603.10250","created_at":"2026-05-26T01:03:27.566467+00:00"},{"alias_kind":"arxiv_version","alias_value":"2603.10250v2","created_at":"2026-05-26T01:03:27.566467+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2603.10250","created_at":"2026-05-26T01:03:27.566467+00:00"},{"alias_kind":"pith_short_12","alias_value":"Y7SNPJCSDKKH","created_at":"2026-05-26T01:03:27.566467+00:00"},{"alias_kind":"pith_short_16","alias_value":"Y7SNPJCSDKKHOKDP","created_at":"2026-05-26T01:03:27.566467+00:00"},{"alias_kind":"pith_short_8","alias_value":"Y7SNPJCS","created_at":"2026-05-26T01:03:27.566467+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/Y7SNPJCSDKKHOKDPAJHASCWZOX","json":"https://pith.science/pith/Y7SNPJCSDKKHOKDPAJHASCWZOX.json","graph_json":"https://pith.science/api/pith-number/Y7SNPJCSDKKHOKDPAJHASCWZOX/graph.json","events_json":"https://pith.science/api/pith-number/Y7SNPJCSDKKHOKDPAJHASCWZOX/events.json","paper":"https://pith.science/paper/Y7SNPJCS"},"agent_actions":{"view_html":"https://pith.science/pith/Y7SNPJCSDKKHOKDPAJHASCWZOX","download_json":"https://pith.science/pith/Y7SNPJCSDKKHOKDPAJHASCWZOX.json","view_paper":"https://pith.science/paper/Y7SNPJCS","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2603.10250&json=true","fetch_graph":"https://pith.science/api/pith-number/Y7SNPJCSDKKHOKDPAJHASCWZOX/graph.json","fetch_events":"https://pith.science/api/pith-number/Y7SNPJCSDKKHOKDPAJHASCWZOX/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/Y7SNPJCSDKKHOKDPAJHASCWZOX/action/timestamp_anchor","attest_storage":"https://pith.science/pith/Y7SNPJCSDKKHOKDPAJHASCWZOX/action/storage_attestation","attest_author":"https://pith.science/pith/Y7SNPJCSDKKHOKDPAJHASCWZOX/action/author_attestation","sign_citation":"https://pith.science/pith/Y7SNPJCSDKKHOKDPAJHASCWZOX/action/citation_signature","submit_replication":"https://pith.science/pith/Y7SNPJCSDKKHOKDPAJHASCWZOX/action/replication_record"}},"created_at":"2026-05-26T01:03:27.566467+00:00","updated_at":"2026-05-26T01:03:27.566467+00:00"}