Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -29,8 +29,7 @@ from transformers import Wav2Vec2ProcessorWithLM
|
|
29 |
# from waveletDenoise import wavelet_denoise
|
30 |
from scipy.signal import butter, lfilter, wiener
|
31 |
|
32 |
-
|
33 |
-
asr_model_kannada = pipeline("automatic-speech-recognition", model="cdactvm/w2v_bert_kannada_030125")
|
34 |
|
35 |
def createlex(filename):
|
36 |
#filename = "num_map.txt"
|
@@ -228,41 +227,28 @@ def apply_wiener_filter(audio):
|
|
228 |
|
229 |
|
230 |
# Function to handle speech recognition
|
231 |
-
def
|
232 |
audio, sr = librosa.load(audio_file, sr=16000)
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
result = asr_model_telugu(audio)
|
238 |
text_value = result['text']
|
239 |
-
|
240 |
-
|
241 |
-
converted_text=
|
242 |
-
|
243 |
-
# converted_to_list = convert_to_list(cleaned_text, text_to_list())
|
244 |
-
# processed_doubles = process_doubles(converted_to_list)
|
245 |
-
# replaced_words = replace_words(processed_doubles)
|
246 |
-
# converted_text = text_to_int(replaced_words)
|
247 |
-
return cleaned_text +" -----------------> " + converted_text
|
248 |
-
#return cleaned_text
|
249 |
|
250 |
# Function to handle speech recognition
|
251 |
-
def
|
252 |
audio, sr = librosa.load(audio_file, sr=16000)
|
253 |
audio = high_pass_filter(audio, sr)
|
254 |
audio = apply_wiener_filter(audio)
|
255 |
denoised_audio = wavelet_denoise(audio)
|
256 |
-
result =
|
257 |
text_value = result['text']
|
258 |
cleaned_text = text_value.replace("[UNK]", "")
|
259 |
converted_text=convert2numkn(cleaned_text,kanlex)
|
260 |
-
#converted_text=convert2num(cleaned_text,lex)
|
261 |
-
# cleaned_text=convert2num(cleaned_text,lex)
|
262 |
-
# converted_to_list = convert_to_list(cleaned_text, text_to_list())
|
263 |
-
# processed_doubles = process_doubles(converted_to_list)
|
264 |
-
# replaced_words = replace_words(processed_doubles)
|
265 |
-
# converted_text = text_to_int(replaced_words)
|
266 |
return cleaned_text +" -----------------> " + converted_text
|
267 |
|
268 |
def sel_lng(lng, mic=None, file=None):
|
@@ -273,22 +259,18 @@ def sel_lng(lng, mic=None, file=None):
|
|
273 |
else:
|
274 |
return "You must either provide a mic recording or a file"
|
275 |
|
276 |
-
if lng == "
|
277 |
-
return
|
278 |
-
elif lng == "
|
279 |
-
return
|
280 |
-
|
281 |
-
|
282 |
-
# elif lng== "model_4":
|
283 |
-
# return Noise_cancellation_function(audio)
|
284 |
-
|
285 |
-
|
286 |
demo=gr.Interface(
|
287 |
fn=sel_lng,
|
288 |
|
289 |
inputs=[
|
290 |
gr.Dropdown([
|
291 |
-
"
|
292 |
gr.Audio(sources=["microphone","upload"], type="filepath"),
|
293 |
],
|
294 |
outputs=[
|
|
|
29 |
# from waveletDenoise import wavelet_denoise
|
30 |
from scipy.signal import butter, lfilter, wiener
|
31 |
|
32 |
+
asr_model_malayalam = pipeline("automatic-speech-recognition", model="cdactvm/w2v-bert-malayalam")
|
|
|
33 |
|
34 |
def createlex(filename):
|
35 |
#filename = "num_map.txt"
|
|
|
227 |
|
228 |
|
229 |
# Function to handle speech recognition
|
230 |
+
def recognize_speech_malayalam_model1(audio_file):
|
231 |
audio, sr = librosa.load(audio_file, sr=16000)
|
232 |
+
audio = high_pass_filter(audio, sr)
|
233 |
+
audio = apply_wiener_filter(audio)
|
234 |
+
denoised_audio = wavelet_denoise(audio)
|
235 |
+
result = asr_model_malayalam(denoised_audio)
|
|
|
236 |
text_value = result['text']
|
237 |
+
converted_to_list = convert_to_list(cleaned_text, text_to_list())
|
238 |
+
replaced_words = replace_words(converted_to_list)
|
239 |
+
converted_text = text_to_int(replaced_words)
|
240 |
+
return text_value +" -----------------> " + converted_text
|
|
|
|
|
|
|
|
|
|
|
|
|
241 |
|
242 |
# Function to handle speech recognition
|
243 |
+
def recognize_speech_malayalam2(audio_file):
|
244 |
audio, sr = librosa.load(audio_file, sr=16000)
|
245 |
audio = high_pass_filter(audio, sr)
|
246 |
audio = apply_wiener_filter(audio)
|
247 |
denoised_audio = wavelet_denoise(audio)
|
248 |
+
result = asr_model_malayalam(denoised_audio)
|
249 |
text_value = result['text']
|
250 |
cleaned_text = text_value.replace("[UNK]", "")
|
251 |
converted_text=convert2numkn(cleaned_text,kanlex)
|
|
|
|
|
|
|
|
|
|
|
|
|
252 |
return cleaned_text +" -----------------> " + converted_text
|
253 |
|
254 |
def sel_lng(lng, mic=None, file=None):
|
|
|
259 |
else:
|
260 |
return "You must either provide a mic recording or a file"
|
261 |
|
262 |
+
if lng == "malayalam_model1":
|
263 |
+
return recognize_speech_malayalam_model1(audio)
|
264 |
+
elif lng == "malayalam_model2":
|
265 |
+
return recognize_speech_malayalam_model2(audio)
|
266 |
+
|
267 |
+
|
|
|
|
|
|
|
|
|
268 |
demo=gr.Interface(
|
269 |
fn=sel_lng,
|
270 |
|
271 |
inputs=[
|
272 |
gr.Dropdown([
|
273 |
+
"malayalam_model2","malayalam_model2"],label="Select Model"),
|
274 |
gr.Audio(sources=["microphone","upload"], type="filepath"),
|
275 |
],
|
276 |
outputs=[
|