csukuangfj commited on
Commit
3b63443
·
1 Parent(s): c07a3d1

minor fixes

Browse files
Files changed (2) hide show
  1. app.py +12 -9
  2. model.py +22 -21
app.py CHANGED
@@ -89,15 +89,18 @@ def process_uploaded_file(
89
  "result_item_error",
90
  )
91
 
92
- try:
93
- input_threshold = float(input_threshold)
94
- if input_threshold < 0 or input_threshold < 10:
95
- raise ValueError("")
96
- except ValueError:
97
- return "", build_html_output(
98
- "Please set a valid threshold between (0, 10)",
99
- "result_item_error",
100
- )
 
 
 
101
 
102
  MyPrint(f"Processing uploaded file: {in_filename}")
103
  try:
 
89
  "result_item_error",
90
  )
91
 
92
+ if input_num_speakers < 0:
93
+ try:
94
+ input_threshold = float(input_threshold)
95
+ if input_threshold < 0 or input_threshold > 10:
96
+ raise ValueError("")
97
+ except ValueError:
98
+ return "", build_html_output(
99
+ "Please set a valid threshold between (0, 10)",
100
+ "result_item_error",
101
+ )
102
+ else:
103
+ input_threshold = 0
104
 
105
  MyPrint(f"Processing uploaded file: {in_filename}")
106
  try:
model.py CHANGED
@@ -71,6 +71,7 @@ def get_speaker_segmentation_model(repo_id) -> List[str]:
71
 
72
 
73
  def get_speaker_embedding_model(model_name) -> List[str]:
 
74
  assert (
75
  model_name
76
  in three_d_speaker_embedding_models
@@ -117,32 +118,32 @@ speaker_segmentation_models = ["pyannote/segmentation-3.0"]
117
 
118
 
119
  nemo_speaker_embedding_models = [
120
- "nemo_en_speakerverification_speakernet.onnx",
121
- "nemo_en_titanet_large.onnx",
122
- "nemo_en_titanet_small.onnx",
123
  ]
124
 
125
  three_d_speaker_embedding_models = [
126
- "3dspeaker_speech_campplus_sv_en_voxceleb_16k.onnx",
127
- "3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx",
128
- "3dspeaker_speech_campplus_sv_zh_en_16k-common_advanced.onnx",
129
- "3dspeaker_speech_eres2net_base_200k_sv_zh-cn_16k-common.onnx",
130
- "3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx",
131
- "3dspeaker_speech_eres2net_large_sv_zh-cn_3dspeaker_16k.onnx",
132
- "3dspeaker_speech_eres2net_sv_en_voxceleb_16k.onnx",
133
- "3dspeaker_speech_eres2net_sv_zh-cn_16k-common.onnx",
134
- "3dspeaker_speech_eres2netv2_sv_zh-cn_16k-common.onnx",
135
  ]
136
  wespeaker_embedding_models = [
137
- "wespeaker_en_voxceleb_CAM++.onnx",
138
- "wespeaker_en_voxceleb_CAM++_LM.onnx",
139
- "wespeaker_en_voxceleb_resnet152_LM.onnx",
140
- "wespeaker_en_voxceleb_resnet221_LM.onnx",
141
- "wespeaker_en_voxceleb_resnet293_LM.onnx",
142
- "wespeaker_en_voxceleb_resnet34.onnx",
143
- "wespeaker_en_voxceleb_resnet34_LM.onnx",
144
- "wespeaker_zh_cnceleb_resnet34.onnx",
145
- "wespeaker_zh_cnceleb_resnet34_LM.onnx",
146
  ]
147
 
148
  embedding2models = {
 
71
 
72
 
73
  def get_speaker_embedding_model(model_name) -> List[str]:
74
+ model_name = model_name.split("|")[0]
75
  assert (
76
  model_name
77
  in three_d_speaker_embedding_models
 
118
 
119
 
120
  nemo_speaker_embedding_models = [
121
+ "nemo_en_speakerverification_speakernet.onnx|22MB",
122
+ "nemo_en_titanet_large.onnx|97MB",
123
+ "nemo_en_titanet_small.onnx|38MB",
124
  ]
125
 
126
  three_d_speaker_embedding_models = [
127
+ "3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx|37.8MB",
128
+ "3dspeaker_speech_campplus_sv_en_voxceleb_16k.onnx|28.2MB",
129
+ "3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx|27MB",
130
+ "3dspeaker_speech_campplus_sv_zh_en_16k-common_advanced.onnx|27MB",
131
+ "3dspeaker_speech_eres2net_base_200k_sv_zh-cn_16k-common.onnx|37.8MB",
132
+ "3dspeaker_speech_eres2net_large_sv_zh-cn_3dspeaker_16k.onnx|111MB",
133
+ "3dspeaker_speech_eres2net_sv_en_voxceleb_16k.onnx|25.3MB",
134
+ "3dspeaker_speech_eres2net_sv_zh-cn_16k-common.onnx|210MB",
135
+ "3dspeaker_speech_eres2netv2_sv_zh-cn_16k-common.onnx|68.1MB",
136
  ]
137
  wespeaker_embedding_models = [
138
+ "wespeaker_en_voxceleb_CAM++.onnx|28MB",
139
+ "wespeaker_en_voxceleb_CAM++_LM.onnx|28MB",
140
+ "wespeaker_en_voxceleb_resnet152_LM.onnx|76MB",
141
+ "wespeaker_en_voxceleb_resnet221_LM.onnx|91MB",
142
+ "wespeaker_en_voxceleb_resnet293_LM.onnx|110MB",
143
+ "wespeaker_en_voxceleb_resnet34.onnx|26MB",
144
+ "wespeaker_en_voxceleb_resnet34_LM.onnx|26MB",
145
+ "wespeaker_zh_cnceleb_resnet34.onnx|26MB",
146
+ "wespeaker_zh_cnceleb_resnet34_LM.onnx|26MB",
147
  ]
148
 
149
  embedding2models = {