{"work":{"id":"123cad45-dfd0-4b02-a99a-47d962dc2ab4","openalex_id":null,"doi":null,"arxiv_id":"1808.10583","raw_key":null,"title":"AISHELL-2: Transforming Mandarin ASR Research Into Industrial Scale","authors":null,"authors_text":"J","year":2018,"venue":"cs.CL","abstract":"AISHELL-1 is by far the largest open-source speech corpus available for Mandarin speech recognition research. It was released with a baseline system containing solid training and testing pipelines for Mandarin ASR. In AISHELL-2, 1000 hours of clean read-speech data from iOS is published, which is free for academic usage. On top of AISHELL-2 corpus, an improved recipe is developed and released, containing key components for industrial applications, such as Chinese word segmentation, flexible vocabulary expension and phone set transformation etc. Pipelines support various state-of-the-art techniques, such as time-delayed neural networks and Lattic-Free MMI objective funciton. In addition, we also release dev and test data from other channels(Android and Mic). For research community, we hope that AISHELL-2 corpus can be a solid resource for topics like transfer learning and robust ASR. For industry, we hope AISHELL-2 recipe can be a helpful reference for building meaningful industrial systems and products.","external_url":"https://arxiv.org/abs/1808.10583","cited_by_count":null,"metadata_source":"pith","metadata_fetched_at":"2026-05-25T02:55:16.449358+00:00","pith_arxiv_id":"1808.10583","created_at":"2026-05-11T02:14:45.464113+00:00","updated_at":"2026-05-25T02:55:16.449358+00:00","title_quality_ok":true,"display_title":"Aishell-2: Transform- ing mandarin asr research into industrial scale","render_title":"Aishell-2: Transform- ing mandarin asr research into industrial scale"},"hub":{"state":{"work_id":"123cad45-dfd0-4b02-a99a-47d962dc2ab4","tier":"hub","tier_reason":"10+ Pith inbound or 1,000+ external citations","pith_inbound_count":10,"external_cited_by_count":null,"distinct_field_count":3,"first_pith_cited_at":"2023-11-14T05:34:50+00:00","last_pith_cited_at":"2026-05-22T10:24:50+00:00","author_build_status":"not_needed","summary_status":"needed","contexts_status":"needed","graph_status":"needed","ask_index_status":"not_needed","reader_status":"not_needed","recognition_status":"not_needed","updated_at":"2026-05-30T12:11:19.942150+00:00","tier_text":"hub"},"tier":"hub","role_counts":[{"context_role":"dataset","n":4},{"context_role":"background","n":2}],"polarity_counts":[{"context_polarity":"use_dataset","n":4},{"context_polarity":"background","n":1},{"context_polarity":"unclear","n":1}],"runs":{},"summary":{},"graph":{},"authors":[]}}