{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:KMSLJJOBIGX44SXX67E3LDWGTF","short_pith_number":"pith:KMSLJJOB","canonical_record":{"source":{"id":"2605.08063","kind":"arxiv","version":4},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-05-08T17:50:15Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"17bb8772104084dc225d4714dee66f4d885f7966e4b1abdb0e02a5936f90f450","abstract_canon_sha256":"461631e79734da5e5af6eb58614015c6fe61285d00aa3e34415eae71ee82b715"},"schema_version":"1.0"},"canonical_sha256":"5324b4a5c141afce4af7f7c9b58ec6994020700309b366174a913882661e3e33","source":{"kind":"arxiv","id":"2605.08063","version":4},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.08063","created_at":"2026-05-20T01:05:15Z"},{"alias_kind":"arxiv_version","alias_value":"2605.08063v4","created_at":"2026-05-20T01:05:15Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.08063","created_at":"2026-05-20T01:05:15Z"},{"alias_kind":"pith_short_12","alias_value":"KMSLJJOBIGX4","created_at":"2026-05-20T01:05:15Z"},{"alias_kind":"pith_short_16","alias_value":"KMSLJJOBIGX44SXX","created_at":"2026-05-20T01:05:15Z"},{"alias_kind":"pith_short_8","alias_value":"KMSLJJOB","created_at":"2026-05-20T01:05:15Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:KMSLJJOBIGX44SXX67E3LDWGTF","target":"record","payload":{"canonical_record":{"source":{"id":"2605.08063","kind":"arxiv","version":4},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-05-08T17:50:15Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"17bb8772104084dc225d4714dee66f4d885f7966e4b1abdb0e02a5936f90f450","abstract_canon_sha256":"461631e79734da5e5af6eb58614015c6fe61285d00aa3e34415eae71ee82b715"},"schema_version":"1.0"},"canonical_sha256":"5324b4a5c141afce4af7f7c9b58ec6994020700309b366174a913882661e3e33","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T01:05:15.920024Z","signature_b64":"kG0AO0h/0QHeqTiyegrFiM5r/c42xrQxtab62MzedIu9tFp+4XUVv8zSfaYY+J2t22mkK4rrecSIWyserakHDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"5324b4a5c141afce4af7f7c9b58ec6994020700309b366174a913882661e3e33","last_reissued_at":"2026-05-20T01:05:15.919408Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T01:05:15.919408Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.08063","source_version":4,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T01:05:15Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"U88als0vmwBkw0PgP6T14boLTBtX8MbhaqtqHECS/nP3g1nBDinUhm96gbnE80uotLejaJk3GHoZFNd/6QpTAQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-05T06:03:28.010772Z"},"content_sha256":"432406c2b396b1e067a1f0d44f18231d32964499e0ed1bdc35d4051386dda318","schema_version":"1.0","event_id":"sha256:432406c2b396b1e067a1f0d44f18231d32964499e0ed1bdc35d4051386dda318"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:KMSLJJOBIGX44SXX67E3LDWGTF","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Flow-OPD: On-Policy Distillation for Flow Matching Models","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"Flow-OPD trains domain-specialized teachers with single-reward GRPO then distills them into one flow-matching student using on-policy sampling and dense supervision.","cross_cats":["cs.AI"],"primary_cat":"cs.CV","authors_text":"Feng Zhao, Kaituo Feng, Lin Chen, Shaosheng Cao, Shuang Chen, Wenxuan Huang, Yiming Zhao, Yunlong Lin, Yu Zeng, Zehui Chen, Zhen Fang","submitted_at":"2026-05-08T17:50:15Z","abstract_excerpt":"Existing Flow Matching (FM) text-to-image models suffer from two critical bottlenecks under multi-task alignment: the reward sparsity induced by scalar-valued rewards, and the gradient interference arising from jointly optimizing heterogeneous objectives, which together give rise to a 'seesaw effect' of competing metrics and pervasive reward hacking. Inspired by the success of On-Policy Distillation (OPD) in the large language model community, we propose Flow-OPD, the first unified post-training framework that integrates on-policy distillation into Flow Matching models. Flow-OPD adopts a two-s"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Built upon Stable Diffusion 3.5 Medium, Flow-OPD raises the GenEval score from 63 to 92 and the OCR accuracy from 59 to 94, yielding an overall improvement of roughly 10 points over vanilla GRPO, while preserving image fidelity and human-preference alignment and exhibiting an emergent 'teacher-surpassing' effect.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That single-reward GRPO fine-tuning lets each domain-specialized teacher reach its performance ceiling in isolation and that the subsequent three-step orchestration of on-policy sampling, task-routing labeling, and dense supervision can consolidate heterogeneous expertise into one student without reintroducing gradient interference or reward hacking.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"Flow-OPD applies on-policy distillation to flow matching models via specialized teachers, cold-start initialization, and manifold anchor regularization, lifting GenEval from 63 to 92 and OCR from 59 to 94 on Stable Diffusion 3.5 Medium.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Flow-OPD trains domain-specialized teachers with single-reward GRPO then distills them into one flow-matching student using on-policy sampling and dense supervision.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"750da906f62c4a06351115d3f5725e784c1d6e549f3a954dbf350a17ac110357"},"source":{"id":"2605.08063","kind":"arxiv","version":4},"verdict":{"id":"3e42d563-e79e-433e-8ae7-e59fd9ac495a","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-15T05:49:13.205127Z","strongest_claim":"Built upon Stable Diffusion 3.5 Medium, Flow-OPD raises the GenEval score from 63 to 92 and the OCR accuracy from 59 to 94, yielding an overall improvement of roughly 10 points over vanilla GRPO, while preserving image fidelity and human-preference alignment and exhibiting an emergent 'teacher-surpassing' effect.","one_line_summary":"Flow-OPD applies on-policy distillation to flow matching models via specialized teachers, cold-start initialization, and manifold anchor regularization, lifting GenEval from 63 to 92 and OCR from 59 to 94 on Stable Diffusion 3.5 Medium.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That single-reward GRPO fine-tuning lets each domain-specialized teacher reach its performance ceiling in isolation and that the subsequent three-step orchestration of on-policy sampling, task-routing labeling, and dense supervision can consolidate heterogeneous expertise into one student without reintroducing gradient interference or reward hacking.","pith_extraction_headline":"Flow-OPD trains domain-specialized teachers with single-reward GRPO then distills them into one flow-matching student using on-policy sampling and dense supervision."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.08063/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"doi_title_agreement","ran_at":"2026-05-19T15:01:18.472720Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"doi_compliance","ran_at":"2026-05-19T11:15:15.189810Z","status":"completed","version":"1.0.0","findings_count":0}],"snapshot_sha256":"a3489b45cd4f188ce9baf5a0b1cd4203888d809151de54fdbb8e0dd99a550e32"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":2,"snapshot_sha256":"420c214dac7ec8d6b59e6922ecdb2188f6cc71bf50ce69c47933fdebb7e1c803"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"3e42d563-e79e-433e-8ae7-e59fd9ac495a"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T01:05:15Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"KUnU+SPGgoCV4+q/T/aPbcRj6vb0VxsD03g4qe7PtPrY+g7RxVB58oSQp/t/MNxFZyFOOCrUnK7prejnqdj2Bw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-05T06:03:28.011652Z"},"content_sha256":"5d78e039e9eaf4bbd6d23368656b246f1ad3825a748b78ac8256c0a265cceece","schema_version":"1.0","event_id":"sha256:5d78e039e9eaf4bbd6d23368656b246f1ad3825a748b78ac8256c0a265cceece"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/KMSLJJOBIGX44SXX67E3LDWGTF/bundle.json","state_url":"https://pith.science/pith/KMSLJJOBIGX44SXX67E3LDWGTF/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/KMSLJJOBIGX44SXX67E3LDWGTF/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-05T06:03:28Z","links":{"resolver":"https://pith.science/pith/KMSLJJOBIGX44SXX67E3LDWGTF","bundle":"https://pith.science/pith/KMSLJJOBIGX44SXX67E3LDWGTF/bundle.json","state":"https://pith.science/pith/KMSLJJOBIGX44SXX67E3LDWGTF/state.json","well_known_bundle":"https://pith.science/.well-known/pith/KMSLJJOBIGX44SXX67E3LDWGTF/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:KMSLJJOBIGX44SXX67E3LDWGTF","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"461631e79734da5e5af6eb58614015c6fe61285d00aa3e34415eae71ee82b715","cross_cats_sorted":["cs.AI"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-05-08T17:50:15Z","title_canon_sha256":"17bb8772104084dc225d4714dee66f4d885f7966e4b1abdb0e02a5936f90f450"},"schema_version":"1.0","source":{"id":"2605.08063","kind":"arxiv","version":4}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.08063","created_at":"2026-05-20T01:05:15Z"},{"alias_kind":"arxiv_version","alias_value":"2605.08063v4","created_at":"2026-05-20T01:05:15Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.08063","created_at":"2026-05-20T01:05:15Z"},{"alias_kind":"pith_short_12","alias_value":"KMSLJJOBIGX4","created_at":"2026-05-20T01:05:15Z"},{"alias_kind":"pith_short_16","alias_value":"KMSLJJOBIGX44SXX","created_at":"2026-05-20T01:05:15Z"},{"alias_kind":"pith_short_8","alias_value":"KMSLJJOB","created_at":"2026-05-20T01:05:15Z"}],"graph_snapshots":[{"event_id":"sha256:5d78e039e9eaf4bbd6d23368656b246f1ad3825a748b78ac8256c0a265cceece","target":"graph","created_at":"2026-05-20T01:05:15Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Built upon Stable Diffusion 3.5 Medium, Flow-OPD raises the GenEval score from 63 to 92 and the OCR accuracy from 59 to 94, yielding an overall improvement of roughly 10 points over vanilla GRPO, while preserving image fidelity and human-preference alignment and exhibiting an emergent 'teacher-surpassing' effect."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That single-reward GRPO fine-tuning lets each domain-specialized teacher reach its performance ceiling in isolation and that the subsequent three-step orchestration of on-policy sampling, task-routing labeling, and dense supervision can consolidate heterogeneous expertise into one student without reintroducing gradient interference or reward hacking."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"Flow-OPD applies on-policy distillation to flow matching models via specialized teachers, cold-start initialization, and manifold anchor regularization, lifting GenEval from 63 to 92 and OCR from 59 to 94 on Stable Diffusion 3.5 Medium."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Flow-OPD trains domain-specialized teachers with single-reward GRPO then distills them into one flow-matching student using on-policy sampling and dense supervision."}],"snapshot_sha256":"750da906f62c4a06351115d3f5725e784c1d6e549f3a954dbf350a17ac110357"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"420c214dac7ec8d6b59e6922ecdb2188f6cc71bf50ce69c47933fdebb7e1c803"},"integrity":{"available":true,"clean":true,"detectors_run":[{"findings_count":0,"name":"doi_title_agreement","ran_at":"2026-05-19T15:01:18.472720Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"doi_compliance","ran_at":"2026-05-19T11:15:15.189810Z","status":"completed","version":"1.0.0"}],"endpoint":"/pith/2605.08063/integrity.json","findings":[],"snapshot_sha256":"a3489b45cd4f188ce9baf5a0b1cd4203888d809151de54fdbb8e0dd99a550e32","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Existing Flow Matching (FM) text-to-image models suffer from two critical bottlenecks under multi-task alignment: the reward sparsity induced by scalar-valued rewards, and the gradient interference arising from jointly optimizing heterogeneous objectives, which together give rise to a 'seesaw effect' of competing metrics and pervasive reward hacking. Inspired by the success of On-Policy Distillation (OPD) in the large language model community, we propose Flow-OPD, the first unified post-training framework that integrates on-policy distillation into Flow Matching models. Flow-OPD adopts a two-s","authors_text":"Feng Zhao, Kaituo Feng, Lin Chen, Shaosheng Cao, Shuang Chen, Wenxuan Huang, Yiming Zhao, Yunlong Lin, Yu Zeng, Zehui Chen, Zhen Fang","cross_cats":["cs.AI"],"headline":"Flow-OPD trains domain-specialized teachers with single-reward GRPO then distills them into one flow-matching student using on-policy sampling and dense supervision.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-05-08T17:50:15Z","title":"Flow-OPD: On-Policy Distillation for Flow Matching Models"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.08063","kind":"arxiv","version":4},"verdict":{"created_at":"2026-05-15T05:49:13.205127Z","id":"3e42d563-e79e-433e-8ae7-e59fd9ac495a","model_set":{"reader":"grok-4.3"},"one_line_summary":"Flow-OPD applies on-policy distillation to flow matching models via specialized teachers, cold-start initialization, and manifold anchor regularization, lifting GenEval from 63 to 92 and OCR from 59 to 94 on Stable Diffusion 3.5 Medium.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Flow-OPD trains domain-specialized teachers with single-reward GRPO then distills them into one flow-matching student using on-policy sampling and dense supervision.","strongest_claim":"Built upon Stable Diffusion 3.5 Medium, Flow-OPD raises the GenEval score from 63 to 92 and the OCR accuracy from 59 to 94, yielding an overall improvement of roughly 10 points over vanilla GRPO, while preserving image fidelity and human-preference alignment and exhibiting an emergent 'teacher-surpassing' effect.","weakest_assumption":"That single-reward GRPO fine-tuning lets each domain-specialized teacher reach its performance ceiling in isolation and that the subsequent three-step orchestration of on-policy sampling, task-routing labeling, and dense supervision can consolidate heterogeneous expertise into one student without reintroducing gradient interference or reward hacking."}},"verdict_id":"3e42d563-e79e-433e-8ae7-e59fd9ac495a"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:432406c2b396b1e067a1f0d44f18231d32964499e0ed1bdc35d4051386dda318","target":"record","created_at":"2026-05-20T01:05:15Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"461631e79734da5e5af6eb58614015c6fe61285d00aa3e34415eae71ee82b715","cross_cats_sorted":["cs.AI"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-05-08T17:50:15Z","title_canon_sha256":"17bb8772104084dc225d4714dee66f4d885f7966e4b1abdb0e02a5936f90f450"},"schema_version":"1.0","source":{"id":"2605.08063","kind":"arxiv","version":4}},"canonical_sha256":"5324b4a5c141afce4af7f7c9b58ec6994020700309b366174a913882661e3e33","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"5324b4a5c141afce4af7f7c9b58ec6994020700309b366174a913882661e3e33","first_computed_at":"2026-05-20T01:05:15.919408Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-20T01:05:15.919408Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"kG0AO0h/0QHeqTiyegrFiM5r/c42xrQxtab62MzedIu9tFp+4XUVv8zSfaYY+J2t22mkK4rrecSIWyserakHDg==","signature_status":"signed_v1","signed_at":"2026-05-20T01:05:15.920024Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.08063","source_kind":"arxiv","source_version":4}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:432406c2b396b1e067a1f0d44f18231d32964499e0ed1bdc35d4051386dda318","sha256:5d78e039e9eaf4bbd6d23368656b246f1ad3825a748b78ac8256c0a265cceece"],"state_sha256":"ce556e917868cd2aeb994fce3fd68ad564f4971cf5ccb983d8a35fdcc0df8467"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"5eZAGGzn2AXAzzw1GmiG6YE58iNMdCNF+lmiyz74z7RO14/Gs3mvjRofe/Rd1uVc14pRMYQE75EvMCtgV4YzBg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-05T06:03:28.016363Z","bundle_sha256":"3b800d821cc74f6df2f5552e2471578e41b72ec95b3033f22f40c6cfbcf5a99b"}}