{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:YPHKM4EJRCZ5UJCQL2UWL3Y2P2","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"405fe0f1b8407cb4324f8cb7cfe2a184ddbb68ca88a963435733d0569cc84b45","cross_cats_sorted":["cs.CL","cs.IR"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-14T06:41:53Z","title_canon_sha256":"81f20f7a08c7da0115639609c88d140d5c2e5c032065544a9c24fb5f14f2b914"},"schema_version":"1.0","source":{"id":"2605.14448","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.14448","created_at":"2026-05-17T23:39:06Z"},{"alias_kind":"arxiv_version","alias_value":"2605.14448v1","created_at":"2026-05-17T23:39:06Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.14448","created_at":"2026-05-17T23:39:06Z"},{"alias_kind":"pith_short_12","alias_value":"YPHKM4EJRCZ5","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"YPHKM4EJRCZ5UJCQ","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"YPHKM4EJ","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:0b9ec87f906ede7069b5e30cd3d86b2a5cb1ec2b9b254d5900b1638ed0c4a185","target":"graph","created_at":"2026-05-17T23:39:06Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"On the 78 tasks of MMEB-V2, TWN achieves state-of-the-art embedding quality while being substantially more efficient than existing generative methods, requiring only 3-5% additional parameters relative to the backbone and up to 50% fewer reasoning tokens compared to the full generative mode."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The self-supervised routing gate accurately identifies inputs where reasoning is unnecessary or harmful, and that detaching gradients at the LoRA interface fully resolves optimization conflicts without introducing new biases in the learned adapters."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"TWN attaches separate reasoning and embedding LoRA adapters to a frozen backbone with gradient detachment and a self-supervised gate that decides per input whether to generate CoT, achieving SOTA on MMEB-V2 with 3-5% added parameters and up to 50% fewer reasoning tokens."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"A dual-LoRA architecture with a routing gate lets multimodal embeddings add chain-of-thought reasoning only when it improves results."}],"snapshot_sha256":"77c02bf4f1589215c9f264e4c4b7c32a873e0f5adebab55979fcceb7480ee781"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Multimodal large language models (MLLMs) have emerged as a powerful backbone for multimodal embeddings. Recent methods introduce chain-of-thought (CoT) reasoning into the embedding pipeline to improve retrieval quality, but remain costly in both model size and inference cost. They typically employ separate reasoner and embedder with substantial parameter overhead, and generate CoT indiscriminately for every input. However, we observe that for simple inputs, discriminative embeddings already perform well, and redundant reasoning can even mislead the model, degrading performance. To address thes","authors_text":"Guanghao Zhang, Hao Jiang, Longxiang Zhang, Pipei Huang, Weilong Dai","cross_cats":["cs.CL","cs.IR"],"headline":"A dual-LoRA architecture with a routing gate lets multimodal embeddings add chain-of-thought reasoning only when it improves results.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-14T06:41:53Z","title":"Think When Needed: Adaptive Reasoning-Driven Multimodal Embeddings with a Dual-LoRA Architecture"},"references":{"count":43,"internal_anchors":7,"resolved_work":43,"sample":[{"cited_arxiv_id":"2511.21631","doi":"","is_internal_anchor":true,"ref_index":1,"title":"Qwen3-VL Technical Report","work_id":"1fe243aa-e3c0-4da6-b391-4cbcfc88d5c0","year":2025},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"Llm2vec: Large language models are secretly powerful text encoders","work_id":"156e1320-54cd-416f-af15-d9da54374957","year":2024},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"Internvl: Scaling up vision foundation models and aligning for generic visual-linguistic tasks","work_id":"75a0de0b-0e1f-4038-aad8-a85626ae5442","year":2024},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"Think then embed: Generative con- text improves multimodal embedding","work_id":"2a83d4ce-5ef7-4aac-bff2-d4fa50e611d9","year":2025},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"Flashattention-2: Faster attention with better parallelism and work partitioning","work_id":"0f89dfdc-a347-49f7-87ff-cec69596f875","year":2024}],"snapshot_sha256":"b4d40bf86940a84c8e93c36f2525d237dc0200c865aec3c4b46b6b03794ed1a4"},"source":{"id":"2605.14448","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-15T02:48:12.288585Z","id":"b3b4b0c4-c7ac-4ffc-8a40-a2464d7f8ee9","model_set":{"reader":"grok-4.3"},"one_line_summary":"TWN attaches separate reasoning and embedding LoRA adapters to a frozen backbone with gradient detachment and a self-supervised gate that decides per input whether to generate CoT, achieving SOTA on MMEB-V2 with 3-5% added parameters and up to 50% fewer reasoning tokens.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"A dual-LoRA architecture with a routing gate lets multimodal embeddings add chain-of-thought reasoning only when it improves results.","strongest_claim":"On the 78 tasks of MMEB-V2, TWN achieves state-of-the-art embedding quality while being substantially more efficient than existing generative methods, requiring only 3-5% additional parameters relative to the backbone and up to 50% fewer reasoning tokens compared to the full generative mode.","weakest_assumption":"The self-supervised routing gate accurately identifies inputs where reasoning is unnecessary or harmful, and that detaching gradients at the LoRA interface fully resolves optimization conflicts without introducing new biases in the learned adapters."}},"verdict_id":"b3b4b0c4-c7ac-4ffc-8a40-a2464d7f8ee9"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:90ca819e688980ce2f7e271bea2d94557024ae6aa8b99dc088fafd008dd64b34","target":"record","created_at":"2026-05-17T23:39:06Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"405fe0f1b8407cb4324f8cb7cfe2a184ddbb68ca88a963435733d0569cc84b45","cross_cats_sorted":["cs.CL","cs.IR"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-14T06:41:53Z","title_canon_sha256":"81f20f7a08c7da0115639609c88d140d5c2e5c032065544a9c24fb5f14f2b914"},"schema_version":"1.0","source":{"id":"2605.14448","kind":"arxiv","version":1}},"canonical_sha256":"c3cea6708988b3da24505ea965ef1a7ea68849a2dfc53da52becc64b1d2f27aa","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"c3cea6708988b3da24505ea965ef1a7ea68849a2dfc53da52becc64b1d2f27aa","first_computed_at":"2026-05-17T23:39:06.927990Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:39:06.927990Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"OsmdW2PUF+PhCFUcqE/scLKXCED2+bF8bSin+PLuDwoxu1FeP5eBXSvz+Qz3C/3Mfc9EI4bd64GXxjbnTM6TDA==","signature_status":"signed_v1","signed_at":"2026-05-17T23:39:06.928796Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.14448","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:90ca819e688980ce2f7e271bea2d94557024ae6aa8b99dc088fafd008dd64b34","sha256:0b9ec87f906ede7069b5e30cd3d86b2a5cb1ec2b9b254d5900b1638ed0c4a185"],"state_sha256":"8f4f3ea2ee61a2e1cfda444238bcb526c8a5240771c952252650d3f0367ad8c5"}