csukuangfj commited on
Commit
bcc0b4f
·
1 Parent(s): 46930c1

add sense-voice

Browse files
examples.py CHANGED
@@ -32,6 +32,14 @@ examples = [
32
  "Yes",
33
  "./test_wavs/cantonese/2.wav",
34
  ],
 
 
 
 
 
 
 
 
35
  [
36
  "Cantonese",
37
  "zrjin/icefall-asr-mdcc-zipformer-2024-03-11",
@@ -461,4 +469,36 @@ examples = [
461
  "No",
462
  "./test_wavs/thai/2.wav",
463
  ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
464
  ]
 
32
  "Yes",
33
  "./test_wavs/cantonese/2.wav",
34
  ],
35
+ [
36
+ "Chinese+English+Cantonese+Japanese+Korean",
37
+ "csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17",
38
+ "greedy_search",
39
+ 4,
40
+ "Yes",
41
+ "./test_wavs/sense_voice/yue.wav",
42
+ ],
43
  [
44
  "Cantonese",
45
  "zrjin/icefall-asr-mdcc-zipformer-2024-03-11",
 
469
  "No",
470
  "./test_wavs/thai/2.wav",
471
  ],
472
+ [
473
+ "Chinese+English+Cantonese+Japanese+Korean",
474
+ "csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17",
475
+ "greedy_search",
476
+ 4,
477
+ "Yes",
478
+ "./test_wavs/sense_voice/zh.wav",
479
+ ],
480
+ [
481
+ "Chinese+English+Cantonese+Japanese+Korean",
482
+ "csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17",
483
+ "greedy_search",
484
+ 4,
485
+ "Yes",
486
+ "./test_wavs/sense_voice/en.wav",
487
+ ],
488
+ [
489
+ "Chinese+English+Cantonese+Japanese+Korean",
490
+ "csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17",
491
+ "greedy_search",
492
+ 4,
493
+ "Yes",
494
+ "./test_wavs/sense_voice/ja.wav",
495
+ ],
496
+ [
497
+ "Chinese+English+Cantonese+Japanese+Korean",
498
+ "csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17",
499
+ "greedy_search",
500
+ 4,
501
+ "Yes",
502
+ "./test_wavs/sense_voice/ko.wav",
503
+ ],
504
  ]
model.py CHANGED
@@ -184,6 +184,10 @@ def get_pretrained_model(
184
  return chinese_cantonese_english_models[repo_id](
185
  repo_id, decoding_method=decoding_method, num_active_paths=num_active_paths
186
  )
 
 
 
 
187
  elif repo_id in cantonese_models:
188
  return cantonese_models[repo_id](
189
  repo_id, decoding_method=decoding_method, num_active_paths=num_active_paths
@@ -1294,6 +1298,37 @@ def _get_chinese_dialect_models(
1294
  return recognizer
1295
 
1296
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1297
  @lru_cache(maxsize=10)
1298
  def _get_paraformer_pre_trained_model(
1299
  repo_id: str,
@@ -1531,6 +1566,10 @@ chinese_cantonese_english_models = {
1531
  "csukuangfj/sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en": _get_streaming_paraformer_zh_yue_en_pre_trained_model,
1532
  }
1533
 
 
 
 
 
1534
  cantonese_models = {
1535
  "zrjin/icefall-asr-mdcc-zipformer-2024-03-11": _get_zrjin_cantonese_pre_trained_model,
1536
  }
@@ -1550,6 +1589,7 @@ all_models = {
1550
  **english_models,
1551
  **chinese_english_mixed_models,
1552
  **chinese_cantonese_english_models,
 
1553
  **cantonese_models,
1554
  # **japanese_models,
1555
  **tibetan_models,
@@ -1567,6 +1607,9 @@ language_to_models = {
1567
  "English": list(english_models.keys()),
1568
  "Chinese+English": list(chinese_english_mixed_models.keys()),
1569
  "Chinese+English+Cantonese": list(chinese_cantonese_english_models.keys()),
 
 
 
1570
  "Cantonese": list(cantonese_models.keys()),
1571
  # "Japanese": list(japanese_models.keys()),
1572
  "Tibetan": list(tibetan_models.keys()),
 
184
  return chinese_cantonese_english_models[repo_id](
185
  repo_id, decoding_method=decoding_method, num_active_paths=num_active_paths
186
  )
187
+ elif repo_id in chinese_cantonese_english_japanese_korean_models:
188
+ return chinese_cantonese_english_japanese_korean_models[repo_id](
189
+ repo_id, decoding_method=decoding_method, num_active_paths=num_active_paths
190
+ )
191
  elif repo_id in cantonese_models:
192
  return cantonese_models[repo_id](
193
  repo_id, decoding_method=decoding_method, num_active_paths=num_active_paths
 
1298
  return recognizer
1299
 
1300
 
1301
+ @lru_cache(maxsize=10)
1302
+ def _get_sense_voice_pre_trained_model(
1303
+ repo_id: str,
1304
+ decoding_method: str,
1305
+ num_active_paths: int,
1306
+ ) -> sherpa_onnx.OfflineRecognizer:
1307
+ assert repo_id in [
1308
+ "csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17",
1309
+ ], repo_id
1310
+
1311
+ nn_model = _get_nn_model_filename(
1312
+ repo_id=repo_id,
1313
+ filename="model.int8.onnx",
1314
+ subfolder=".",
1315
+ )
1316
+
1317
+ tokens = _get_token_filename(repo_id=repo_id, subfolder=".")
1318
+
1319
+ recognizer = sherpa_onnx.OfflineRecognizer.from_sense_voice(
1320
+ model=nn_model,
1321
+ tokens=tokens,
1322
+ num_threads=2,
1323
+ sample_rate=sample_rate,
1324
+ feature_dim=80,
1325
+ decoding_method="greedy_search",
1326
+ debug=True,
1327
+ )
1328
+
1329
+ return recognizer
1330
+
1331
+
1332
  @lru_cache(maxsize=10)
1333
  def _get_paraformer_pre_trained_model(
1334
  repo_id: str,
 
1566
  "csukuangfj/sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en": _get_streaming_paraformer_zh_yue_en_pre_trained_model,
1567
  }
1568
 
1569
+ chinese_cantonese_english_japanese_korean_models = {
1570
+ "csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17": _get_sense_voice_pre_trained_model,
1571
+ }
1572
+
1573
  cantonese_models = {
1574
  "zrjin/icefall-asr-mdcc-zipformer-2024-03-11": _get_zrjin_cantonese_pre_trained_model,
1575
  }
 
1589
  **english_models,
1590
  **chinese_english_mixed_models,
1591
  **chinese_cantonese_english_models,
1592
+ **chinese_cantonese_english_japanese_korean_models,
1593
  **cantonese_models,
1594
  # **japanese_models,
1595
  **tibetan_models,
 
1607
  "English": list(english_models.keys()),
1608
  "Chinese+English": list(chinese_english_mixed_models.keys()),
1609
  "Chinese+English+Cantonese": list(chinese_cantonese_english_models.keys()),
1610
+ "Chinese+English+Cantonese+Japanese+Korean": list(
1611
+ chinese_cantonese_english_japanese_korean_models.keys()
1612
+ ),
1613
  "Cantonese": list(cantonese_models.keys()),
1614
  # "Japanese": list(japanese_models.keys()),
1615
  "Tibetan": list(tibetan_models.keys()),
requirements.txt CHANGED
@@ -12,4 +12,4 @@ huggingface_hub
12
 
13
  #https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-1.9.26-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
14
 
15
- sherpa-onnx>=1.9.21
 
12
 
13
  #https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-1.9.26-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
14
 
15
+ sherpa-onnx>=1.10.17
test_wavs/sense_voice/en.wav ADDED
Binary file (229 kB). View file
 
test_wavs/sense_voice/ja.wav ADDED
Binary file (230 kB). View file
 
test_wavs/sense_voice/ko.wav ADDED
Binary file (148 kB). View file
 
test_wavs/sense_voice/yue.wav ADDED
Binary file (165 kB). View file
 
test_wavs/sense_voice/zh.wav ADDED
Binary file (179 kB). View file