Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -29,7 +29,7 @@ from wienerFilter import wiener_filter
|
|
| 29 |
from highPassFilter import high_pass_filter
|
| 30 |
from waveletDenoise import wavelet_denoise
|
| 31 |
|
| 32 |
-
|
| 33 |
# transcriber_hindi_old = pipeline(task="automatic-speech-recognition", model="cdactvm/w2v-bert-tamil_new")
|
| 34 |
processor = AutoProcessor.from_pretrained("cdactvm/w2v-bert-tamil_new")
|
| 35 |
vocab_dict = processor.tokenizer.get_vocab()
|
|
@@ -49,7 +49,7 @@ processor.feature_extractor._processor_class = "Wav2Vec2ProcessorWithLM"
|
|
| 49 |
|
| 50 |
def transcribe_tamil_new(audio):
|
| 51 |
# # Process the audio file
|
| 52 |
-
transcript =
|
| 53 |
text_value = transcript['text']
|
| 54 |
processd_doubles=process_doubles(text_value)
|
| 55 |
replaced_words = replace_words(processd_doubles)
|
|
@@ -83,7 +83,7 @@ def Noise_cancellation_function(audio_file):
|
|
| 83 |
write(temp_wav, sr, denoised_audio)
|
| 84 |
|
| 85 |
# Perform speech recognition on the denoised audio
|
| 86 |
-
transcript =
|
| 87 |
text_value = transcript['text']
|
| 88 |
cleaned_text=text_value.replace("<s>","")
|
| 89 |
processd_doubles=process_doubles(cleaned_text)
|
|
@@ -102,13 +102,13 @@ def sel_lng(lng, mic=None, file=None):
|
|
| 102 |
return "You must either provide a mic recording or a file"
|
| 103 |
|
| 104 |
if lng == "model_1":
|
| 105 |
-
return
|
| 106 |
-
elif lng == "model_2":
|
| 107 |
-
|
| 108 |
-
elif lng== "model_3":
|
| 109 |
-
|
| 110 |
-
elif lng== "model_4":
|
| 111 |
-
|
| 112 |
|
| 113 |
|
| 114 |
demo=gr.Interface(
|
|
|
|
| 29 |
from highPassFilter import high_pass_filter
|
| 30 |
from waveletDenoise import wavelet_denoise
|
| 31 |
|
| 32 |
+
transcriber_taml_new = pipeline(task="automatic-speech-recognition", model="cdactvm/w2v-bert-tamil_new")
|
| 33 |
# transcriber_hindi_old = pipeline(task="automatic-speech-recognition", model="cdactvm/w2v-bert-tamil_new")
|
| 34 |
processor = AutoProcessor.from_pretrained("cdactvm/w2v-bert-tamil_new")
|
| 35 |
vocab_dict = processor.tokenizer.get_vocab()
|
|
|
|
| 49 |
|
| 50 |
def transcribe_tamil_new(audio):
|
| 51 |
# # Process the audio file
|
| 52 |
+
transcript = transcriber_taml_new(audio)
|
| 53 |
text_value = transcript['text']
|
| 54 |
processd_doubles=process_doubles(text_value)
|
| 55 |
replaced_words = replace_words(processd_doubles)
|
|
|
|
| 83 |
write(temp_wav, sr, denoised_audio)
|
| 84 |
|
| 85 |
# Perform speech recognition on the denoised audio
|
| 86 |
+
transcript = transcriber_taml_new(temp_wav)
|
| 87 |
text_value = transcript['text']
|
| 88 |
cleaned_text=text_value.replace("<s>","")
|
| 89 |
processd_doubles=process_doubles(cleaned_text)
|
|
|
|
| 102 |
return "You must either provide a mic recording or a file"
|
| 103 |
|
| 104 |
if lng == "model_1":
|
| 105 |
+
return transcriber_taml_new(audio)
|
| 106 |
+
# elif lng == "model_2":
|
| 107 |
+
# return transcribe_hindi_new(audio)
|
| 108 |
+
# elif lng== "model_3":
|
| 109 |
+
# return transcribe_hindi_lm(audio)
|
| 110 |
+
# elif lng== "model_4":
|
| 111 |
+
# return Noise_cancellation_function(audio)
|
| 112 |
|
| 113 |
|
| 114 |
demo=gr.Interface(
|