Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -29,10 +29,8 @@ from wienerFilter import wiener_filter
|
|
29 |
from highPassFilter import high_pass_filter
|
30 |
from waveletDenoise import wavelet_denoise
|
31 |
|
32 |
-
|
33 |
-
|
34 |
transcriber_hindi_new = pipeline(task="automatic-speech-recognition", model="cdactvm/w2v-bert-tamil_new")
|
35 |
-
transcriber_hindi_old = pipeline(task="automatic-speech-recognition", model="cdactvm/w2v-bert-tamil_new")
|
36 |
processor = AutoProcessor.from_pretrained("cdactvm/w2v-bert-tamil_new")
|
37 |
vocab_dict = processor.tokenizer.get_vocab()
|
38 |
sorted_vocab_dict = {k.lower(): v for k, v in sorted(vocab_dict.items(), key=lambda item: item[1])}
|
@@ -40,16 +38,16 @@ decoder = build_ctcdecoder(
|
|
40 |
labels=list(sorted_vocab_dict.keys()),
|
41 |
kenlm_model_path="lm.binary",
|
42 |
)
|
43 |
-
processor_with_lm = Wav2Vec2ProcessorWithLM(
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
processor.feature_extractor._processor_class = "Wav2Vec2ProcessorWithLM"
|
49 |
-
transcriber_hindi_lm = pipeline("automatic-speech-recognition", model="cdactvm/w2v-bert-tamil_new", tokenizer=processor_with_lm, feature_extractor=processor_with_lm.feature_extractor, decoder=processor_with_lm.decoder)
|
50 |
|
51 |
|
52 |
-
def
|
53 |
# # Process the audio file
|
54 |
transcript = transcriber_hindi_new(audio)
|
55 |
text_value = transcript['text']
|
@@ -58,24 +56,6 @@ def transcribe_hindi_new(audio):
|
|
58 |
converted_text=text_to_int(replaced_words)
|
59 |
return converted_text
|
60 |
|
61 |
-
def transcribe_hindi_lm(audio):
|
62 |
-
# # Process the audio file
|
63 |
-
transcript = transcriber_hindi_lm(audio)
|
64 |
-
text_value = transcript['text']
|
65 |
-
processd_doubles=process_doubles(text_value)
|
66 |
-
replaced_words = replace_words(processd_doubles)
|
67 |
-
converted_text=text_to_int(replaced_words)
|
68 |
-
return converted_text
|
69 |
-
|
70 |
-
def transcribe_hindi_old(audio):
|
71 |
-
# # Process the audio file
|
72 |
-
transcript = transcriber_hindi_old(audio)
|
73 |
-
text_value = transcript['text']
|
74 |
-
cleaned_text=text_value.replace("<s>","")
|
75 |
-
processd_doubles=process_doubles(cleaned_text)
|
76 |
-
replaced_words = replace_words(processd_doubles)
|
77 |
-
converted_text=text_to_int(replaced_words)
|
78 |
-
return converted_text
|
79 |
|
80 |
###############################################
|
81 |
# implementation of noise reduction techniques.
|
@@ -131,18 +111,6 @@ def sel_lng(lng, mic=None, file=None):
|
|
131 |
return Noise_cancellation_function(audio)
|
132 |
|
133 |
|
134 |
-
# demo=gr.Interface(
|
135 |
-
# transcribe,
|
136 |
-
# inputs=[
|
137 |
-
# gr.Audio(sources=["microphone","upload"], type="filepath"),
|
138 |
-
# ],
|
139 |
-
# outputs=[
|
140 |
-
# "textbox"
|
141 |
-
# ],
|
142 |
-
# title="Automatic Speech Recognition",
|
143 |
-
# description = "Demo for Automatic Speech Recognition. Use microphone to record speech. Please press Record button. Initially it will take some time to load the model. The recognized text will appear in the output textbox",
|
144 |
-
# ).launch()
|
145 |
-
|
146 |
demo=gr.Interface(
|
147 |
fn=sel_lng,
|
148 |
|
|
|
29 |
from highPassFilter import high_pass_filter
|
30 |
from waveletDenoise import wavelet_denoise
|
31 |
|
|
|
|
|
32 |
transcriber_hindi_new = pipeline(task="automatic-speech-recognition", model="cdactvm/w2v-bert-tamil_new")
|
33 |
+
# transcriber_hindi_old = pipeline(task="automatic-speech-recognition", model="cdactvm/w2v-bert-tamil_new")
|
34 |
processor = AutoProcessor.from_pretrained("cdactvm/w2v-bert-tamil_new")
|
35 |
vocab_dict = processor.tokenizer.get_vocab()
|
36 |
sorted_vocab_dict = {k.lower(): v for k, v in sorted(vocab_dict.items(), key=lambda item: item[1])}
|
|
|
38 |
labels=list(sorted_vocab_dict.keys()),
|
39 |
kenlm_model_path="lm.binary",
|
40 |
)
|
41 |
+
# processor_with_lm = Wav2Vec2ProcessorWithLM(
|
42 |
+
# feature_extractor=processor.feature_extractor,
|
43 |
+
# tokenizer=processor.tokenizer,
|
44 |
+
# decoder=decoder
|
45 |
+
# )
|
46 |
processor.feature_extractor._processor_class = "Wav2Vec2ProcessorWithLM"
|
47 |
+
# transcriber_hindi_lm = pipeline("automatic-speech-recognition", model="cdactvm/w2v-bert-tamil_new", tokenizer=processor_with_lm, feature_extractor=processor_with_lm.feature_extractor, decoder=processor_with_lm.decoder)
|
48 |
|
49 |
|
50 |
+
def transcribe_tamil_new(audio):
|
51 |
# # Process the audio file
|
52 |
transcript = transcriber_hindi_new(audio)
|
53 |
text_value = transcript['text']
|
|
|
56 |
converted_text=text_to_int(replaced_words)
|
57 |
return converted_text
|
58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
|
60 |
###############################################
|
61 |
# implementation of noise reduction techniques.
|
|
|
111 |
return Noise_cancellation_function(audio)
|
112 |
|
113 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
demo=gr.Interface(
|
115 |
fn=sel_lng,
|
116 |
|