{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:FYDMEK4T3Q5WOKQXCB76FABIB2","short_pith_number":"pith:FYDMEK4T","schema_version":"1.0","canonical_sha256":"2e06c22b93dc3b672a17107fe280280ebbcce608148446ea7d5a122d11351cfa","source":{"kind":"arxiv","id":"2605.16638","version":1},"attestation_state":"computed","paper":{"title":"TTE-Flash: Accelerating Reasoning-based Multimodal Representations via Think-Then-Embed Tokens","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Chaitanya Ahuja, Fan Xia, Hanchao Yu, Jiangfan Zhang, Jianpeng Cheng, Jun Xiao, Qi Guo, Shaodan Zhai, Shlok Kumar Mishra, Wentao Bao, Xiangjun Fan, Xian Wu, Yang Gao","submitted_at":"2026-05-15T21:10:56Z","abstract_excerpt":"Recent research has demonstrated that Universal Multimodal Embedding (UME) benefits significantly from Chain-of-Thought (CoT) reasoning. In this paradigm, a generative model produces explicit reasoning traces for a multimodal query, with the final representation extracted from an <eos> embedding token attending to both the query and the reasoning. Despite its effectiveness, the computational overhead of generating explicit CoT traces is often prohibitive. In this work, we propose replacing explicit CoT with latent think tokens, which are interpreted as latent variables that can produce explici"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.16638","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-05-15T21:10:56Z","cross_cats_sorted":[],"title_canon_sha256":"f2b2c9a6295a9fdb11d6f0a001cac1e49844a4f5d1b3de9e7de2f258a10d8cea","abstract_canon_sha256":"a6dc791d612335d251eb21a6f7556782492bede35c2dcfb41ca6e78ea6273da3"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:02:33.809425Z","signature_b64":"xPWtbj7p/PbH+XnxEKjDIx9zcZjuZeKHS52MpnKn0pR7i+M7WxgHXDWIWcy7G26yT3J0SV3heaDT5SMXGzu9AA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"2e06c22b93dc3b672a17107fe280280ebbcce608148446ea7d5a122d11351cfa","last_reissued_at":"2026-05-20T00:02:33.808539Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:02:33.808539Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"TTE-Flash: Accelerating Reasoning-based Multimodal Representations via Think-Then-Embed Tokens","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Chaitanya Ahuja, Fan Xia, Hanchao Yu, Jiangfan Zhang, Jianpeng Cheng, Jun Xiao, Qi Guo, Shaodan Zhai, Shlok Kumar Mishra, Wentao Bao, Xiangjun Fan, Xian Wu, Yang Gao","submitted_at":"2026-05-15T21:10:56Z","abstract_excerpt":"Recent research has demonstrated that Universal Multimodal Embedding (UME) benefits significantly from Chain-of-Thought (CoT) reasoning. In this paradigm, a generative model produces explicit reasoning traces for a multimodal query, with the final representation extracted from an <eos> embedding token attending to both the query and the reasoning. Despite its effectiveness, the computational overhead of generating explicit CoT traces is often prohibitive. In this work, we propose replacing explicit CoT with latent think tokens, which are interpreted as latent variables that can produce explici"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.16638","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.16638/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"claim_evidence","ran_at":"2026-05-19T19:01:56.415324Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"ai_meta_artifact","ran_at":"2026-05-19T18:33:26.576647Z","status":"skipped","version":"1.0.0","findings_count":0}],"snapshot_sha256":"c90007a5483380919a612f7023927f931e062cc2cb33515f53d066a473966d3f"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.16638","created_at":"2026-05-20T00:02:33.808684+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.16638v1","created_at":"2026-05-20T00:02:33.808684+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.16638","created_at":"2026-05-20T00:02:33.808684+00:00"},{"alias_kind":"pith_short_12","alias_value":"FYDMEK4T3Q5W","created_at":"2026-05-20T00:02:33.808684+00:00"},{"alias_kind":"pith_short_16","alias_value":"FYDMEK4T3Q5WOKQX","created_at":"2026-05-20T00:02:33.808684+00:00"},{"alias_kind":"pith_short_8","alias_value":"FYDMEK4T","created_at":"2026-05-20T00:02:33.808684+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/FYDMEK4T3Q5WOKQXCB76FABIB2","json":"https://pith.science/pith/FYDMEK4T3Q5WOKQXCB76FABIB2.json","graph_json":"https://pith.science/api/pith-number/FYDMEK4T3Q5WOKQXCB76FABIB2/graph.json","events_json":"https://pith.science/api/pith-number/FYDMEK4T3Q5WOKQXCB76FABIB2/events.json","paper":"https://pith.science/paper/FYDMEK4T"},"agent_actions":{"view_html":"https://pith.science/pith/FYDMEK4T3Q5WOKQXCB76FABIB2","download_json":"https://pith.science/pith/FYDMEK4T3Q5WOKQXCB76FABIB2.json","view_paper":"https://pith.science/paper/FYDMEK4T","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.16638&json=true","fetch_graph":"https://pith.science/api/pith-number/FYDMEK4T3Q5WOKQXCB76FABIB2/graph.json","fetch_events":"https://pith.science/api/pith-number/FYDMEK4T3Q5WOKQXCB76FABIB2/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/FYDMEK4T3Q5WOKQXCB76FABIB2/action/timestamp_anchor","attest_storage":"https://pith.science/pith/FYDMEK4T3Q5WOKQXCB76FABIB2/action/storage_attestation","attest_author":"https://pith.science/pith/FYDMEK4T3Q5WOKQXCB76FABIB2/action/author_attestation","sign_citation":"https://pith.science/pith/FYDMEK4T3Q5WOKQXCB76FABIB2/action/citation_signature","submit_replication":"https://pith.science/pith/FYDMEK4T3Q5WOKQXCB76FABIB2/action/replication_record"}},"created_at":"2026-05-20T00:02:33.808684+00:00","updated_at":"2026-05-20T00:02:33.808684+00:00"}