Spaces:

cdactvm
/

Malayalam_ASR_Demo

Running

App Files Files Community

cdactvm commited on Jan 24

Commit

7ae75cf

verified ·

1 Parent(s): 52bb13c

Update app.py

Browse files

Files changed (1) hide show

app.py +159 -159

app.py CHANGED Viewed

@@ -31,179 +31,179 @@ from scipy.signal import butter, lfilter, wiener
 asr_model_malayalam = pipeline("automatic-speech-recognition", model="cdactvm/w2v-bert-malayalam")
-def createlex(filename):
-#filename = "num_map.txt"
-# Initialize an empty dictionary
-    data_dict = {}
-# Open the file and read it line by line
-    with open(filename, "r", encoding="utf-8") as f:
-        for line in f:
-        # Strip newline characters and split by tab
-            key, value = line.strip().split("\t")
-        # Add to dictionary
-            data_dict[key] = value
-    return data_dict
-tellex=createlex("num_words_tel.txt")
-kanlex=createlex("num_words_kn.txt")
-def addnum(inlist):
-    sum=0
-    for num in inlist:
-        sum+=int(num)
-    return sum
-from rapidfuzz import process
-def get_val(word, lexicon):
-    threshold = 80  # Minimum similarity score
-    length_difference = 4
-    #length_range = (4, 6)  # Acceptable character length range (min, max)
-    # Find the best match above the similarity threshold
-    result = process.extractOne(word, lexicon.keys(), score_cutoff=threshold)
-    print (result)
-    if result:
-        match, score, _ = result
-        #print(lexicon[match])
-        #return lexicon[match]
-        if abs(len(match) - len(word)) <= length_difference:
-        #if length_range[0] <= len(match) <= length_range[1]:
-            return lexicon[match]
-        else:
-            return None
-    else:
-        return None
-def convert2numtel(input, lex):
-    input += " #"  # Add a period for termination
-    words = input.split()
-    i = 0
-    num = 0
-    outstr = ""
-    digit_end = True
-    numlist = []
-    addflag = False
-    prevword=""
-    single_list=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,15,17,18,19]
-    # Process the words
-    while i < len(words):
-        #checkwordlist = handleSpecialnum(words[i])
-        # Handle special numbers
-        #if len(checkwordlist) == 2:
-        #    words[i] = checkwordlist[0]
-        #    words.insert(i + 1, checkwordlist[1])  # Collect new word for later processing
-        # Get numerical value of the word
-        numval = get_val(words[i], lex)
-        if numval is not None:
-            if prevword not in single_list:
-                addflag = True
-                numlist.append(numval)
-            else:
-                if addflag:
-                    numlist.append(numval)
-                    num = addnum(numlist)
-                    outstr += str(num) + " "
-                    addflag = False
-                    numlist = []
-                else:
-                    outstr += " " + str(numval) + " "
-            digit_end = False
-            prevword=numval
-        else:
-            prevword=""
-            if addflag:
-                num = addnum(numlist)
-                outstr += str(num) + " " + words[i] + " "
-                addflag = False
-                numlist = []
-            else:
-                outstr += words[i] + " "
-            if not digit_end:
-                digit_end = True
-        # Move to the next word
-        i += 1
-    # Final processing
-    outstr = outstr.replace('#','')  # Remove trailing spaces
-    return outstr
-def convert2numkn(input, lex):
-    input += " ######"  # Add a period for termination
-    words = input.split()
-    i = 0
-    num = 0
-    outstr = ""
-    digit_end = True
-    numlist = []
-    addflag = False
-    prevword = []
-    # Process the words
-    while i < len(words):
-        # Get numerical value of the word
-        numval = get_val(words[i], lex)
-        if len(prevword)>=3:
-            prevword.pop(0)
-            prevword.append(words[i])
-        else:
-            prevword.append(words[i])
-        if numval is not None:
-            addflag = True
-            numlist.append(numval)
-        else:
-            #print("word--->"+words[i])
-            #print("addflagword--->"+str(addflag))
-            prevwords=" ".join(prevword)
-            #print("prev word--->"+prevwords)
-            numval=get_val(prevwords,lex)
-            if numval is not None:
-                #addflag=True
-                #print("numval " +numval)
-                numlist=[]
-                #print("First outstr--->"+outstr)
-                outwords = outstr.split()
-                outstr=" ".join(outwords[:-1])
-                #print("outstr--->"+outstr)
-                outstr += " " + str(numval) + " "
-                #print(" aoutstr--->"+outstr)
-                numval=0
-                addflag=False
-            else:
-                if addflag:
-                    num = addnum(numlist)
-                    outstr += str(num) + " " + words[i] + " "
-                    #print("penlast outstr--->"+outstr)
-                    addflag = False
-                    numlist = []
-                else:
-                    outstr += words[i] + " "
-                    #print("last outstr--->"+outstr)
-                if not digit_end:
-                    digit_end = True
-        # Move to the next word
-        i += 1
-    # Final processing
-    outstr = outstr.replace('#','')  # Remove trailing spaces
-    return outstr
 # Function to apply a high-pass filter
 def high_pass_filter(audio, sr, cutoff=300):
@@ -240,16 +240,16 @@ def recognize_speech_malayalam_model1(audio_file):
     return text_value +" -----------------> " + final_text
 ## Function to handle speech recognition
-def recognize_speech_malayalam2(audio_file):
-    audio, sr = librosa.load(audio_file, sr=16000)
-    audio = high_pass_filter(audio, sr)
-    audio = apply_wiener_filter(audio)
-    denoised_audio = wavelet_denoise(audio)
-    result = asr_model_malayalam(denoised_audio)
-    text_value = result['text']
-    cleaned_text = text_value.replace("[UNK]", "")
-    converted_text=convert2numkn(cleaned_text,kanlex)
-    return cleaned_text +" -----------------> " + converted_text
 def sel_lng(lng, mic=None, file=None):
     if mic is not None:
@@ -261,8 +261,8 @@ def sel_lng(lng, mic=None, file=None):
     if lng == "malayalam_model1":
         return recognize_speech_malayalam_model1(audio)
-    elif lng == "malayalam_model2":
-        return recognize_speech_malayalam_model2(audio)
 demo=gr.Interface(
@@ -270,7 +270,7 @@ demo=gr.Interface(
     inputs=[
         gr.Dropdown([
-            "malayalam_model1",'malayalam_model2'],label="Select Model"),
         gr.Audio(sources=["microphone","upload"], type="filepath"),
     ],
     outputs=[

 asr_model_malayalam = pipeline("automatic-speech-recognition", model="cdactvm/w2v-bert-malayalam")
+# def createlex(filename):
+# #filename = "num_map.txt"
+# # Initialize an empty dictionary
+#     data_dict = {}
+# # Open the file and read it line by line
+#     with open(filename, "r", encoding="utf-8") as f:
+#         for line in f:
+#         # Strip newline characters and split by tab
+#             key, value = line.strip().split("\t")
+#         # Add to dictionary
+#             data_dict[key] = value
+#     return data_dict
+# tellex=createlex("num_words_tel.txt")
+# kanlex=createlex("num_words_kn.txt")
+# def addnum(inlist):
+#     sum=0
+#     for num in inlist:
+#         sum+=int(num)
+#     return sum
+# from rapidfuzz import process
+# def get_val(word, lexicon):
+#     threshold = 80  # Minimum similarity score
+#     length_difference = 4
+#     #length_range = (4, 6)  # Acceptable character length range (min, max)
+#     # Find the best match above the similarity threshold
+#     result = process.extractOne(word, lexicon.keys(), score_cutoff=threshold)
+#     print (result)
+#     if result:
+#         match, score, _ = result
+#         #print(lexicon[match])
+#         #return lexicon[match]
+#         if abs(len(match) - len(word)) <= length_difference:
+#         #if length_range[0] <= len(match) <= length_range[1]:
+#             return lexicon[match]
+#         else:
+#             return None
+#     else:
+#         return None
+# def convert2numtel(input, lex):
+#     input += " #"  # Add a period for termination
+#     words = input.split()
+#     i = 0
+#     num = 0
+#     outstr = ""
+#     digit_end = True
+#     numlist = []
+#     addflag = False
+#     prevword=""
+#     single_list=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,15,17,18,19]
+#     # Process the words
+#     while i < len(words):
+#         #checkwordlist = handleSpecialnum(words[i])
+#         # Handle special numbers
+#         #if len(checkwordlist) == 2:
+#         #    words[i] = checkwordlist[0]
+#         #    words.insert(i + 1, checkwordlist[1])  # Collect new word for later processing
+#         # Get numerical value of the word
+#         numval = get_val(words[i], lex)
+#         if numval is not None:
+#             if prevword not in single_list:
+#                 addflag = True
+#                 numlist.append(numval)
+#             else:
+#                 if addflag:
+#                     numlist.append(numval)
+#                     num = addnum(numlist)
+#                     outstr += str(num) + " "
+#                     addflag = False
+#                     numlist = []
+#                 else:
+#                     outstr += " " + str(numval) + " "
+#             digit_end = False
+#             prevword=numval
+#         else:
+#             prevword=""
+#             if addflag:
+#                 num = addnum(numlist)
+#                 outstr += str(num) + " " + words[i] + " "
+#                 addflag = False
+#                 numlist = []
+#             else:
+#                 outstr += words[i] + " "
+#             if not digit_end:
+#                 digit_end = True
+#         # Move to the next word
+#         i += 1
+#     # Final processing
+#     outstr = outstr.replace('#','')  # Remove trailing spaces
+#     return outstr
+# def convert2numkn(input, lex):
+#     input += " ######"  # Add a period for termination
+#     words = input.split()
+#     i = 0
+#     num = 0
+#     outstr = ""
+#     digit_end = True
+#     numlist = []
+#     addflag = False
+#     prevword = []
+#     # Process the words
+#     while i < len(words):
+#         # Get numerical value of the word
+#         numval = get_val(words[i], lex)
+#         if len(prevword)>=3:
+#             prevword.pop(0)
+#             prevword.append(words[i])
+#         else:
+#             prevword.append(words[i])
+#         if numval is not None:
+#             addflag = True
+#             numlist.append(numval)
+#         else:
+#             #print("word--->"+words[i])
+#             #print("addflagword--->"+str(addflag))
+#             prevwords=" ".join(prevword)
+#             #print("prev word--->"+prevwords)
+#             numval=get_val(prevwords,lex)
+#             if numval is not None:
+#                 #addflag=True
+#                 #print("numval " +numval)
+#                 numlist=[]
+#                 #print("First outstr--->"+outstr)
+#                 outwords = outstr.split()
+#                 outstr=" ".join(outwords[:-1])
+#                 #print("outstr--->"+outstr)
+#                 outstr += " " + str(numval) + " "
+#                 #print(" aoutstr--->"+outstr)
+#                 numval=0
+#                 addflag=False
+#             else:
+#                 if addflag:
+#                     num = addnum(numlist)
+#                     outstr += str(num) + " " + words[i] + " "
+#                     #print("penlast outstr--->"+outstr)
+#                     addflag = False
+#                     numlist = []
+#                 else:
+#                     outstr += words[i] + " "
+#                     #print("last outstr--->"+outstr)
+#                 if not digit_end:
+#                     digit_end = True
+#         # Move to the next word
+#         i += 1
+#     # Final processing
+#     outstr = outstr.replace('#','')  # Remove trailing spaces
+#     return outstr
 # Function to apply a high-pass filter
 def high_pass_filter(audio, sr, cutoff=300):
     return text_value +" -----------------> " + final_text
 ## Function to handle speech recognition
+# def recognize_speech_malayalam2(audio_file):
+#     audio, sr = librosa.load(audio_file, sr=16000)
+#     audio = high_pass_filter(audio, sr)
+#     audio = apply_wiener_filter(audio)
+#     denoised_audio = wavelet_denoise(audio)
+#     result = asr_model_malayalam(denoised_audio)
+#     text_value = result['text']
+#     cleaned_text = text_value.replace("[UNK]", "")
+#     converted_text=convert2numkn(cleaned_text,kanlex)
+#     return cleaned_text +" -----------------> " + converted_text
 def sel_lng(lng, mic=None, file=None):
     if mic is not None:
     if lng == "malayalam_model1":
         return recognize_speech_malayalam_model1(audio)
+    # elif lng == "malayalam_model2":
+    #     return recognize_speech_malayalam_model2(audio)
 demo=gr.Interface(
     inputs=[
         gr.Dropdown([
+            "malayalam_model1"],label="Select Model"),
         gr.Audio(sources=["microphone","upload"], type="filepath"),
     ],
     outputs=[