Spaces:
Sleeping
Sleeping
Delete Tamil_number_conversion.py
Browse files- Tamil_number_conversion.py +0 -65
Tamil_number_conversion.py
DELETED
@@ -1,65 +0,0 @@
|
|
1 |
-
import gradio as gr
|
2 |
-
import librosa
|
3 |
-
import numpy as np
|
4 |
-
import pywt
|
5 |
-
import nbimporter
|
6 |
-
from scipy.signal import butter, lfilter, wiener
|
7 |
-
from scipy.io.wavfile import write
|
8 |
-
from transformers import pipeline
|
9 |
-
from text2int import text_to_int
|
10 |
-
from isNumber import is_number
|
11 |
-
from Text2List import text_to_list
|
12 |
-
from convert2list import convert_to_list
|
13 |
-
from processDoubles import process_doubles
|
14 |
-
from replaceWords import replace_words
|
15 |
-
|
16 |
-
asr_model = pipeline("automatic-speech-recognition", model="cdactvm/w2v-bert-tamil_new")
|
17 |
-
|
18 |
-
# Function to apply a high-pass filter
|
19 |
-
def high_pass_filter(audio, sr, cutoff=300):
|
20 |
-
nyquist = 0.5 * sr
|
21 |
-
normal_cutoff = cutoff / nyquist
|
22 |
-
b, a = butter(1, normal_cutoff, btype='high', analog=False)
|
23 |
-
filtered_audio = lfilter(b, a, audio)
|
24 |
-
return filtered_audio
|
25 |
-
|
26 |
-
# Function to apply wavelet denoising
|
27 |
-
def wavelet_denoise(audio, wavelet='db1', level=1):
|
28 |
-
coeffs = pywt.wavedec(audio, wavelet, mode='per')
|
29 |
-
sigma = np.median(np.abs(coeffs[-level])) / 0.5
|
30 |
-
uthresh = sigma * np.sqrt(2 * np.log(len(audio)))
|
31 |
-
coeffs[1:] = [pywt.threshold(i, value=uthresh, mode='soft') for i in coeffs[1:]]
|
32 |
-
return pywt.waverec(coeffs, wavelet, mode='per')
|
33 |
-
|
34 |
-
# Function to apply a Wiener filter for noise reduction
|
35 |
-
def apply_wiener_filter(audio):
|
36 |
-
return wiener(audio)
|
37 |
-
|
38 |
-
# Function to handle speech recognition
|
39 |
-
def recognize_speech(audio_file):
|
40 |
-
audio, sr = librosa.load(audio_file, sr=16000)
|
41 |
-
audio = high_pass_filter(audio, sr)
|
42 |
-
audio = apply_wiener_filter(audio)
|
43 |
-
denoised_audio = wavelet_denoise(audio)
|
44 |
-
result = asr_model(denoised_audio)
|
45 |
-
text_value = result['text']
|
46 |
-
cleaned_text = text_value.replace("<s>", "")
|
47 |
-
print(cleaned_text)
|
48 |
-
converted_to_list = convert_to_list(cleaned_text, text_to_list())
|
49 |
-
print(converted_to_list)
|
50 |
-
processed_doubles = process_doubles(converted_to_list)
|
51 |
-
print(processed_doubles)
|
52 |
-
replaced_words = replace_words(processed_doubles)
|
53 |
-
print(replaced_words)
|
54 |
-
converted_text = text_to_int(replaced_words)
|
55 |
-
print(converted_text)
|
56 |
-
return converted_text
|
57 |
-
|
58 |
-
# Gradio Interface
|
59 |
-
gr.Interface(
|
60 |
-
fn=recognize_speech,
|
61 |
-
inputs=gr.Audio(sources=["microphone","upload"], type="filepath"),
|
62 |
-
outputs="text",
|
63 |
-
title="Speech Recognition with Advanced Noise Reduction & Hindi ASR",
|
64 |
-
description="Upload an audio file, and the system will use high-pass filtering, Wiener filtering, and wavelet-based denoising, then a Hindi ASR model will transcribe the clean audio."
|
65 |
-
).launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|