{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:MSXZA4MQXUP3MC5UVXSIZORBNC","short_pith_number":"pith:MSXZA4MQ","schema_version":"1.0","canonical_sha256":"64af907190bd1fb60bb4ade48cba216891b6b6261f470438bdca6ff9e2006dca","source":{"kind":"arxiv","id":"2605.15792","version":1},"attestation_state":"computed","paper":{"title":"Reversing the Flow: Generation-to-Understanding Synergy in Large Multimodal Models","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Dongliang Chang, Xintong Liu, Yuanchen Fang, Yujun Tong, Zhanyu Ma, Zijin Yin","submitted_at":"2026-05-15T09:48:46Z","abstract_excerpt":"The long-standing goal of multimodal AI is to build unified models in which visual understanding and visual generation mutually enhance one another. Despite recent works such as BAGEL, BLIP3o achieves remarkable progress; In practice, however, this unification remains one-directional: understanding routinely guides generation, yet how and why generation can support understanding is rarely investigated. We revisit this asymmetry and propose Generation-to-Understanding (G2U) synergy, where visual generation becomes an explicit intermediate reasoning step. Our framework enables a model to perform"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.15792","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-05-15T09:48:46Z","cross_cats_sorted":[],"title_canon_sha256":"1fca55cb03c5ff2f6440d8423ec723c8d44f142fceaaa28e76013c8edc0ac19e","abstract_canon_sha256":"0d0dd8b476ef939c33d81d601c5fcfa476cb13d31796217a69f9b5623083d6f7"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:01:18.600602Z","signature_b64":"c4zeqoi9P+0XpARNbD8tAcaSt63ggczFp4nYXXQMe2/asJepDqqEkY8rGX9JHJoYRJBdUklwoLafyTbmMniHAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"64af907190bd1fb60bb4ade48cba216891b6b6261f470438bdca6ff9e2006dca","last_reissued_at":"2026-05-20T00:01:18.599915Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:01:18.599915Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Reversing the Flow: Generation-to-Understanding Synergy in Large Multimodal Models","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Dongliang Chang, Xintong Liu, Yuanchen Fang, Yujun Tong, Zhanyu Ma, Zijin Yin","submitted_at":"2026-05-15T09:48:46Z","abstract_excerpt":"The long-standing goal of multimodal AI is to build unified models in which visual understanding and visual generation mutually enhance one another. Despite recent works such as BAGEL, BLIP3o achieves remarkable progress; In practice, however, this unification remains one-directional: understanding routinely guides generation, yet how and why generation can support understanding is rarely investigated. We revisit this asymmetry and propose Generation-to-Understanding (G2U) synergy, where visual generation becomes an explicit intermediate reasoning step. Our framework enables a model to perform"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.15792","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.15792/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"ai_meta_artifact","ran_at":"2026-05-19T17:33:48.743760Z","status":"skipped","version":"1.0.0","findings_count":0},{"name":"claim_evidence","ran_at":"2026-05-19T17:21:55.911391Z","status":"completed","version":"1.0.0","findings_count":0}],"snapshot_sha256":"77ce330c918e9baaa595ed67de2ff1e727a6d224d7d9c6c14fd29ac3a110c5b1"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.15792","created_at":"2026-05-20T00:01:18.600009+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.15792v1","created_at":"2026-05-20T00:01:18.600009+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.15792","created_at":"2026-05-20T00:01:18.600009+00:00"},{"alias_kind":"pith_short_12","alias_value":"MSXZA4MQXUP3","created_at":"2026-05-20T00:01:18.600009+00:00"},{"alias_kind":"pith_short_16","alias_value":"MSXZA4MQXUP3MC5U","created_at":"2026-05-20T00:01:18.600009+00:00"},{"alias_kind":"pith_short_8","alias_value":"MSXZA4MQ","created_at":"2026-05-20T00:01:18.600009+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/MSXZA4MQXUP3MC5UVXSIZORBNC","json":"https://pith.science/pith/MSXZA4MQXUP3MC5UVXSIZORBNC.json","graph_json":"https://pith.science/api/pith-number/MSXZA4MQXUP3MC5UVXSIZORBNC/graph.json","events_json":"https://pith.science/api/pith-number/MSXZA4MQXUP3MC5UVXSIZORBNC/events.json","paper":"https://pith.science/paper/MSXZA4MQ"},"agent_actions":{"view_html":"https://pith.science/pith/MSXZA4MQXUP3MC5UVXSIZORBNC","download_json":"https://pith.science/pith/MSXZA4MQXUP3MC5UVXSIZORBNC.json","view_paper":"https://pith.science/paper/MSXZA4MQ","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.15792&json=true","fetch_graph":"https://pith.science/api/pith-number/MSXZA4MQXUP3MC5UVXSIZORBNC/graph.json","fetch_events":"https://pith.science/api/pith-number/MSXZA4MQXUP3MC5UVXSIZORBNC/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/MSXZA4MQXUP3MC5UVXSIZORBNC/action/timestamp_anchor","attest_storage":"https://pith.science/pith/MSXZA4MQXUP3MC5UVXSIZORBNC/action/storage_attestation","attest_author":"https://pith.science/pith/MSXZA4MQXUP3MC5UVXSIZORBNC/action/author_attestation","sign_citation":"https://pith.science/pith/MSXZA4MQXUP3MC5UVXSIZORBNC/action/citation_signature","submit_replication":"https://pith.science/pith/MSXZA4MQXUP3MC5UVXSIZORBNC/action/replication_record"}},"created_at":"2026-05-20T00:01:18.600009+00:00","updated_at":"2026-05-20T00:01:18.600009+00:00"}