{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:HFFH5ABCU5YW5JHNZWFTT2OUTP","short_pith_number":"pith:HFFH5ABC","schema_version":"1.0","canonical_sha256":"394a7e8022a7716ea4edcd8b39e9d49bddd1a0bc670305c66be4c2639c48f4c4","source":{"kind":"arxiv","id":"2510.21583","version":2},"attestation_state":"computed","paper":{"title":"Principled RL for Flow Matching Emerges from the Chunk-level Policy Optimization","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CV","authors_text":"Bo Li, Bo Xia, Changqian Yu, Haoyuan Sun, Keyu Fan, Kun Gai, Penghui Du, Sinan Du, Tiantian Zhang, Xinhao Hu, Xueqian Wang, Xu Wan, Yifu Luo, Yongzhe Chang, Zhiyu Chen","submitted_at":"2025-10-24T15:50:36Z","abstract_excerpt":"Recent Progress in post-training flow matching for text-to-image (T2I) generation with Group Relative Policy Optimization (GRPO) has demonstrated strong potential. However, it is hindered by a critical limitation: inaccurate advantage attribution. In this work, we argue that aggregating consecutive steps into a coherent `chunk' and shifting the policy optimization paradigm from GRPO's step level to the chunk level can effectively mitigate the negative impact of this issue. Building on this insight, we propose Group Chunking Policy Optimization (GCPO), the first chunk-level reinforcement learni"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2510.21583","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2025-10-24T15:50:36Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"97fabe0dc4a4a97d19dee2633c2d99f64f923900684515f64590df07b82ca45f","abstract_canon_sha256":"ce09295ac040d12ac6363ea91b00294a5b810f26604d6fe2fc9b1f1b3e1d2e04"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-21T01:05:11.456538Z","signature_b64":"TN4YwVcxYSn1qjazkiob6M/cX5bmHZAB1yBFLz+tcnS+E3aKo5fI45e0stC00RA4VktVFKOglTjV/CqnebkaDw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"394a7e8022a7716ea4edcd8b39e9d49bddd1a0bc670305c66be4c2639c48f4c4","last_reissued_at":"2026-05-21T01:05:11.455462Z","signature_status":"signed_v1","first_computed_at":"2026-05-21T01:05:11.455462Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Principled RL for Flow Matching Emerges from the Chunk-level Policy Optimization","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CV","authors_text":"Bo Li, Bo Xia, Changqian Yu, Haoyuan Sun, Keyu Fan, Kun Gai, Penghui Du, Sinan Du, Tiantian Zhang, Xinhao Hu, Xueqian Wang, Xu Wan, Yifu Luo, Yongzhe Chang, Zhiyu Chen","submitted_at":"2025-10-24T15:50:36Z","abstract_excerpt":"Recent Progress in post-training flow matching for text-to-image (T2I) generation with Group Relative Policy Optimization (GRPO) has demonstrated strong potential. However, it is hindered by a critical limitation: inaccurate advantage attribution. In this work, we argue that aggregating consecutive steps into a coherent `chunk' and shifting the policy optimization paradigm from GRPO's step level to the chunk level can effectively mitigate the negative impact of this issue. Building on this insight, we propose Group Chunking Policy Optimization (GCPO), the first chunk-level reinforcement learni"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2510.21583","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2510.21583/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2510.21583","created_at":"2026-05-21T01:05:11.455572+00:00"},{"alias_kind":"arxiv_version","alias_value":"2510.21583v2","created_at":"2026-05-21T01:05:11.455572+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2510.21583","created_at":"2026-05-21T01:05:11.455572+00:00"},{"alias_kind":"pith_short_12","alias_value":"HFFH5ABCU5YW","created_at":"2026-05-21T01:05:11.455572+00:00"},{"alias_kind":"pith_short_16","alias_value":"HFFH5ABCU5YW5JHN","created_at":"2026-05-21T01:05:11.455572+00:00"},{"alias_kind":"pith_short_8","alias_value":"HFFH5ABC","created_at":"2026-05-21T01:05:11.455572+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":2,"internal_anchor_count":2,"sample":[{"citing_arxiv_id":"2605.10937","citing_title":"Power Reinforcement Post-Training of Text-to-Image Models with Super-Linear Advantage Shaping","ref_index":101,"is_internal_anchor":true},{"citing_arxiv_id":"2604.19406","citing_title":"HP-Edit: A Human-Preference Post-Training Framework for Image Editing","ref_index":30,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/HFFH5ABCU5YW5JHNZWFTT2OUTP","json":"https://pith.science/pith/HFFH5ABCU5YW5JHNZWFTT2OUTP.json","graph_json":"https://pith.science/api/pith-number/HFFH5ABCU5YW5JHNZWFTT2OUTP/graph.json","events_json":"https://pith.science/api/pith-number/HFFH5ABCU5YW5JHNZWFTT2OUTP/events.json","paper":"https://pith.science/paper/HFFH5ABC"},"agent_actions":{"view_html":"https://pith.science/pith/HFFH5ABCU5YW5JHNZWFTT2OUTP","download_json":"https://pith.science/pith/HFFH5ABCU5YW5JHNZWFTT2OUTP.json","view_paper":"https://pith.science/paper/HFFH5ABC","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2510.21583&json=true","fetch_graph":"https://pith.science/api/pith-number/HFFH5ABCU5YW5JHNZWFTT2OUTP/graph.json","fetch_events":"https://pith.science/api/pith-number/HFFH5ABCU5YW5JHNZWFTT2OUTP/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/HFFH5ABCU5YW5JHNZWFTT2OUTP/action/timestamp_anchor","attest_storage":"https://pith.science/pith/HFFH5ABCU5YW5JHNZWFTT2OUTP/action/storage_attestation","attest_author":"https://pith.science/pith/HFFH5ABCU5YW5JHNZWFTT2OUTP/action/author_attestation","sign_citation":"https://pith.science/pith/HFFH5ABCU5YW5JHNZWFTT2OUTP/action/citation_signature","submit_replication":"https://pith.science/pith/HFFH5ABCU5YW5JHNZWFTT2OUTP/action/replication_record"}},"created_at":"2026-05-21T01:05:11.455572+00:00","updated_at":"2026-05-21T01:05:11.455572+00:00"}