{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:GCGY5GYL2WNFEH7SUV3CPVEPVC","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"396849b50901b5c2d18b42f454bf27b4f63b4e4caa7b19452aa641d8a0379828","cross_cats_sorted":["cs.AI","cs.MM"],"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.CV","submitted_at":"2026-02-02T13:59:39Z","title_canon_sha256":"a3a004a856edd2a7c835034529bccf638be46badc1f9285774d0bc71a3b3d631"},"schema_version":"1.0","source":{"id":"2602.07026","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2602.07026","created_at":"2026-06-08T01:03:58Z"},{"alias_kind":"arxiv_version","alias_value":"2602.07026v3","created_at":"2026-06-08T01:03:58Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2602.07026","created_at":"2026-06-08T01:03:58Z"},{"alias_kind":"pith_short_12","alias_value":"GCGY5GYL2WNF","created_at":"2026-06-08T01:03:58Z"},{"alias_kind":"pith_short_16","alias_value":"GCGY5GYL2WNFEH7S","created_at":"2026-06-08T01:03:58Z"},{"alias_kind":"pith_short_8","alias_value":"GCGY5GYL","created_at":"2026-06-08T01:03:58Z"}],"graph_snapshots":[{"event_id":"sha256:9fb48b6d0d89db8e67d7abc1da1d052adfb08c86f036a558672a01cba54e2c19","target":"graph","created_at":"2026-06-08T01:03:58Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"ReAlign, a training-free three-step procedure (Anchor, Trace, Centroid Alignment) that uses statistics from massive unpaired data, explicitly rectifies geometric misalignment so that unpaired text can substitute for paired image-text data in MLLM pretraining."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The Fixed-frame Modality Gap Theory assumes that the decomposition into stable biases and anisotropic residuals remains valid when the reference frame is frozen and that the statistics computed from unpaired data accurately capture the target image distribution without introducing new distortions."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"ReAlign corrects the modality gap in unpaired data to let MLLMs learn visual distributions from text alone before instruction tuning, reducing dependence on expensive paired corpora."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"ReAlign aligns text embeddings to image distributions via a training-free three-step process using unpaired data, letting MLLMs pretrain without paired image-text examples."}],"snapshot_sha256":"cad15353c14c8479783c40301ad97efd885cb51fa026216fe79707e44a091861"},"formal_canon":{"evidence_count":1,"snapshot_sha256":"2972064ae0599586b3ab7db6127b910b08909e25048ddafe049462bbdefebb14"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2602.07026/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Despite the success of multimodal contrastive learning in aligning visual and linguistic representations, a persistent geometric anomaly, the Modality Gap, remains: embeddings of distinct modalities expressing identical semantics occupy systematically offset regions. Prior approaches to bridge this gap are largely limited by oversimplified isotropic assumptions, hindering their application in large-scale scenarios. In this paper, we address these limitations by precisely characterizing the geometric shape of the modality gap and leveraging it for efficient model scaling. First, we propose the ","authors_text":"Chengwei Qin, Chen Liu, Chonghan Liu, Hanzhen Zhao, Hao Tang, Hui Xiong, Shuicheng Yan, Wenjie Zhang, Xiaobin Hu, Xiaomin Yu, Xiaoxing Hu, Yi Xin, Yuhui Zhang, Yu Qiao, Ziyue Qiao","cross_cats":["cs.AI","cs.MM"],"headline":"ReAlign aligns text embeddings to image distributions via a training-free three-step process using unpaired data, letting MLLMs pretrain without paired image-text examples.","license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.CV","submitted_at":"2026-02-02T13:59:39Z","title":"Modality Gap-Driven Subspace Alignment Training Paradigm For Multimodal Large Language Models"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2602.07026","kind":"arxiv","version":3},"verdict":{"created_at":"2026-05-16T08:15:12.284635Z","id":"9468bec5-563c-4687-9028-109af0a86830","model_set":{"reader":"grok-4.3"},"one_line_summary":"ReAlign corrects the modality gap in unpaired data to let MLLMs learn visual distributions from text alone before instruction tuning, reducing dependence on expensive paired corpora.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"ReAlign aligns text embeddings to image distributions via a training-free three-step process using unpaired data, letting MLLMs pretrain without paired image-text examples.","strongest_claim":"ReAlign, a training-free three-step procedure (Anchor, Trace, Centroid Alignment) that uses statistics from massive unpaired data, explicitly rectifies geometric misalignment so that unpaired text can substitute for paired image-text data in MLLM pretraining.","weakest_assumption":"The Fixed-frame Modality Gap Theory assumes that the decomposition into stable biases and anisotropic residuals remains valid when the reference frame is frozen and that the statistics computed from unpaired data accurately capture the target image distribution without introducing new distortions."}},"verdict_id":"9468bec5-563c-4687-9028-109af0a86830"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:ae39e142ebfb70950045fd0421c6e2fb6bd7deaf75447d56f6c08ba2d4e4259c","target":"record","created_at":"2026-06-08T01:03:58Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"396849b50901b5c2d18b42f454bf27b4f63b4e4caa7b19452aa641d8a0379828","cross_cats_sorted":["cs.AI","cs.MM"],"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.CV","submitted_at":"2026-02-02T13:59:39Z","title_canon_sha256":"a3a004a856edd2a7c835034529bccf638be46badc1f9285774d0bc71a3b3d631"},"schema_version":"1.0","source":{"id":"2602.07026","kind":"arxiv","version":3}},"canonical_sha256":"308d8e9b0bd59a521ff2a57627d48fa89f245c840d23165effe84d74f3930f9e","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"308d8e9b0bd59a521ff2a57627d48fa89f245c840d23165effe84d74f3930f9e","first_computed_at":"2026-06-08T01:03:58.232218Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-08T01:03:58.232218Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"YNShjXCeCwHdKyQFKlGddnG7wj3J6lyaYnAk1qlCQKzfKmIZ1/7wxnR1DbNdtpIHb75E6QZDvDqAyA+qtpj2Bg==","signature_status":"signed_v1","signed_at":"2026-06-08T01:03:58.233186Z","signed_message":"canonical_sha256_bytes"},"source_id":"2602.07026","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:ae39e142ebfb70950045fd0421c6e2fb6bd7deaf75447d56f6c08ba2d4e4259c","sha256:9fb48b6d0d89db8e67d7abc1da1d052adfb08c86f036a558672a01cba54e2c19"],"state_sha256":"664cd6e0393d7da53be2819398d488d99d51841af5b34ea6b2de7295e18a35af"}