{"work":{"id":"5d8d3efd-bb5b-4a30-8457-28f190c026e9","openalex_id":null,"doi":null,"arxiv_id":"2309.07597","raw_key":null,"title":"C-Pack: Packed Resources For General Chinese Embeddings","authors":null,"authors_text":"Shitao Xiao, Zheng Liu, Peitian Zhang, Niklas Muennighoff, Defu Lian, Jian-Yun Nie","year":2023,"venue":"cs.CL","abstract":"We introduce C-Pack, a package of resources that significantly advance the field of general Chinese embeddings. C-Pack includes three critical resources. 1) C-MTEB is a comprehensive benchmark for Chinese text embeddings covering 6 tasks and 35 datasets. 2) C-MTP is a massive text embedding dataset curated from labeled and unlabeled Chinese corpora for training embedding models. 3) C-TEM is a family of embedding models covering multiple sizes. Our models outperform all prior Chinese text embeddings on C-MTEB by up to +10% upon the time of the release. We also integrate and optimize the entire suite of training methods for C-TEM. Along with our resources on general Chinese embedding, we release our data and models for English text embeddings. The English models achieve state-of-the-art performance on MTEB benchmark; meanwhile, our released English data is 2 times larger than the Chinese data. All these resources are made publicly available at https://github.com/FlagOpen/FlagEmbedding.","external_url":"https://arxiv.org/abs/2309.07597","cited_by_count":null,"metadata_source":"pith","metadata_fetched_at":"2026-05-25T04:25:19.213474+00:00","pith_arxiv_id":"2309.07597","created_at":"2026-05-10T08:02:25.163120+00:00","updated_at":"2026-05-25T04:25:19.213474+00:00","title_quality_ok":true,"display_title":"C-Pack: Packed Resources For General Chinese Embeddings","render_title":"C-Pack: Packed Resources For General Chinese Embeddings"},"hub":{"state":{"work_id":"5d8d3efd-bb5b-4a30-8457-28f190c026e9","tier":"hub","tier_reason":"10+ Pith inbound or 1,000+ external citations","pith_inbound_count":47,"external_cited_by_count":null,"distinct_field_count":10,"first_pith_cited_at":"2024-01-07T18:12:20+00:00","last_pith_cited_at":"2026-05-22T13:25:13+00:00","author_build_status":"not_needed","summary_status":"needed","contexts_status":"needed","graph_status":"needed","ask_index_status":"not_needed","reader_status":"not_needed","recognition_status":"not_needed","updated_at":"2026-06-04T15:57:45.558323+00:00","tier_text":"hub"},"tier":"hub","role_counts":[{"context_role":"background","n":6},{"context_role":"baseline","n":1},{"context_role":"method","n":1}],"polarity_counts":[{"context_polarity":"background","n":6},{"context_polarity":"baseline","n":1},{"context_polarity":"use_method","n":1}],"runs":{},"summary":{},"graph":{},"authors":[]}}