{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2024:NDJPKY322GAKH3VG5AOJTLCDTR","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"a7f6c5b6f45a7ac779b7dfe74fc2db9c77e236c79712c3492386cf5ca706a101","cross_cats_sorted":["cs.AI","cs.IR","cs.LG"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2024-05-27T17:59:45Z","title_canon_sha256":"9cc3182331acfb590da2194415353ff9e9f16aba89d9ab2ec357db5a469d6304"},"schema_version":"1.0","source":{"id":"2405.17428","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2405.17428","created_at":"2026-05-17T23:39:21Z"},{"alias_kind":"arxiv_version","alias_value":"2405.17428v3","created_at":"2026-05-17T23:39:21Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2405.17428","created_at":"2026-05-17T23:39:21Z"},{"alias_kind":"pith_short_12","alias_value":"NDJPKY322GAK","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"NDJPKY322GAKH3VG","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"NDJPKY32","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:cb7c5c4eeeaf0034f8ec7e3fc0ac4578ceddd88d63d2e29308e2816577228ff9","target":"graph","created_at":"2026-05-17T23:39:21Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"By combining the latent attention layer, removal of the causal attention mask, two-stage contrastive instruction-tuning, and curated datasets including hard negatives and synthetic data, NV-Embed-v1 and NV-Embed-v2 obtain the No.1 position on the MTEB leaderboard across 56 tasks."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That the reported gains stem primarily from the proposed architectural and procedural changes rather than from larger training compute, model scale, or the specific choice of public datasets alone."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"NV-Embed achieves first place on the MTEB leaderboard across 56 tasks by combining a latent attention layer, causal-mask removal, two-stage contrastive training, and data curation for LLM-based embedding models."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Decoder-only LLMs outperform BERT and T5 embedding models on general tasks by using a latent attention layer, removing causal masks, and applying two-stage contrastive instruction tuning."}],"snapshot_sha256":"f1ab2dcb0c51c1ec49b3c4689c6ae8304e36e3fb919d3adfc495ab4d976e4231"},"formal_canon":{"evidence_count":1,"snapshot_sha256":"3cff1c009108c745061a6a45e3c7f15d5b37baa9e2f32f96db14ae0a05958b9f"},"paper":{"abstract_excerpt":"Decoder-only LLM-based embedding models are beginning to outperform BERT or T5-based embedding models in general-purpose text embedding tasks, including dense vector-based retrieval. In this work, we introduce NV-Embed, incorporating architectural designs, training procedures, and curated datasets to significantly enhance the performance of LLM as a versatile embedding model, while maintaining its simplicity and reproducibility. For model architecture, we propose a latent attention layer to obtain pooled embeddings, which consistently improves retrieval and downstream task accuracy compared to","authors_text":"Bryan Catanzaro, Chankyu Lee, Jonathan Raiman, Mengyao Xu, Mohammad Shoeybi, Rajarshi Roy, Wei Ping","cross_cats":["cs.AI","cs.IR","cs.LG"],"headline":"Decoder-only LLMs outperform BERT and T5 embedding models on general tasks by using a latent attention layer, removing causal masks, and applying two-stage contrastive instruction tuning.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2024-05-27T17:59:45Z","title":"NV-Embed: Improved Techniques for Training LLMs as Generalist Embedding Models"},"references":{"count":121,"internal_anchors":22,"resolved_work":121,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"Adams, Daniel Borkan, Jeffrey Sorensen, Lucas Dixon, Lucy Vasserman, and Nithum Thain","work_id":"e6b82b89-83d2-4785-afe4-51851d731321","year":2019},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"S em E val-2012 task 6: A pilot on semantic textual similarity","work_id":"1d6ab256-a6da-4115-a303-7ceb21fa334f","year":2012},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":6,"title":"Language models are few-shot learners","work_id":"b5af3a68-2622-4421-b39b-b1d2fbde2d8d","year":1901},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":7,"title":"Efficient intent detection with dual sentence encoders","work_id":"d963b119-021b-48ee-9acb-554b5e402977","year":2020},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":9,"title":"Bge m3-embedding: Multi-lingual, multi-functionality, multi-granularity text embeddings through self-knowledge distillation, 2023","work_id":"99655c36-3038-4267-abae-eb9bd7978726","year":2023}],"snapshot_sha256":"cc1ec95389852ac9f876b6121a7610ff83fcc6a61de4400535acb6d940aeb710"},"source":{"id":"2405.17428","kind":"arxiv","version":3},"verdict":{"created_at":"2026-05-14T21:10:16.373016Z","id":"bb8cdfa5-7843-4e4a-b6e4-918cf5084467","model_set":{"reader":"grok-4.3"},"one_line_summary":"NV-Embed achieves first place on the MTEB leaderboard across 56 tasks by combining a latent attention layer, causal-mask removal, two-stage contrastive training, and data curation for LLM-based embedding models.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Decoder-only LLMs outperform BERT and T5 embedding models on general tasks by using a latent attention layer, removing causal masks, and applying two-stage contrastive instruction tuning.","strongest_claim":"By combining the latent attention layer, removal of the causal attention mask, two-stage contrastive instruction-tuning, and curated datasets including hard negatives and synthetic data, NV-Embed-v1 and NV-Embed-v2 obtain the No.1 position on the MTEB leaderboard across 56 tasks.","weakest_assumption":"That the reported gains stem primarily from the proposed architectural and procedural changes rather than from larger training compute, model scale, or the specific choice of public datasets alone."}},"verdict_id":"bb8cdfa5-7843-4e4a-b6e4-918cf5084467"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:988ee8ec7b20a22531627d8d90a5836c79076eda660bea56de84021e0614d1d8","target":"record","created_at":"2026-05-17T23:39:21Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"a7f6c5b6f45a7ac779b7dfe74fc2db9c77e236c79712c3492386cf5ca706a101","cross_cats_sorted":["cs.AI","cs.IR","cs.LG"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2024-05-27T17:59:45Z","title_canon_sha256":"9cc3182331acfb590da2194415353ff9e9f16aba89d9ab2ec357db5a469d6304"},"schema_version":"1.0","source":{"id":"2405.17428","kind":"arxiv","version":3}},"canonical_sha256":"68d2f5637ad180a3eea6e81c99ac439c63e2cd81aab074ba9ac5be8730b06582","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"68d2f5637ad180a3eea6e81c99ac439c63e2cd81aab074ba9ac5be8730b06582","first_computed_at":"2026-05-17T23:39:21.658359Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:39:21.658359Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"jjecsRnGNnqB4QE7QsqeHcAhmHOes2D2DqjY3ZRSGkcrLo6fgjAXYbOJ7wpkQWdwW7t81DEu4fNpCkmWXmN9Dg==","signature_status":"signed_v1","signed_at":"2026-05-17T23:39:21.659039Z","signed_message":"canonical_sha256_bytes"},"source_id":"2405.17428","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:988ee8ec7b20a22531627d8d90a5836c79076eda660bea56de84021e0614d1d8","sha256:cb7c5c4eeeaf0034f8ec7e3fc0ac4578ceddd88d63d2e29308e2816577228ff9"],"state_sha256":"6dfd57d9f8d0b77e677166bdc3ba2b4c62a0a6293234453c9630d2990892c44f"}