cdactvm commited on
Commit
63a812a
·
verified ·
1 Parent(s): 7da92b3

Delete Tamil_number_conversion.ipynb

Browse files
Files changed (1) hide show
  1. Tamil_number_conversion.ipynb +0 -223
Tamil_number_conversion.ipynb DELETED
@@ -1,223 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 1,
6
- "id": "dc09394e-2130-4bd4-af30-01346d8ee355",
7
- "metadata": {},
8
- "outputs": [
9
- {
10
- "name": "stderr",
11
- "output_type": "stream",
12
- "text": [
13
- "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
14
- ]
15
- },
16
- {
17
- "name": "stdout",
18
- "output_type": "stream",
19
- "text": [
20
- "Running on local URL: http://127.0.0.1:7860\n",
21
- "\n",
22
- "To create a public link, set `share=True` in `launch()`.\n"
23
- ]
24
- },
25
- {
26
- "data": {
27
- "text/html": [
28
- "<div><iframe src=\"http://127.0.0.1:7860/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
29
- ],
30
- "text/plain": [
31
- "<IPython.core.display.HTML object>"
32
- ]
33
- },
34
- "metadata": {},
35
- "output_type": "display_data"
36
- },
37
- {
38
- "data": {
39
- "text/plain": []
40
- },
41
- "execution_count": 1,
42
- "metadata": {},
43
- "output_type": "execute_result"
44
- },
45
- {
46
- "name": "stderr",
47
- "output_type": "stream",
48
- "text": [
49
- "C:\\Users\\WCHL\\anaconda3\\envs\\RunInference2\\Lib\\site-packages\\gradio\\analytics.py:106: UserWarning: IMPORTANT: You are using gradio version 4.37.2, however version 4.44.1 is available, please upgrade. \n",
50
- "--------\n",
51
- " warnings.warn(\n",
52
- "ERROR: Exception in ASGI application\n",
53
- "Traceback (most recent call last):\n",
54
- " File \"C:\\Users\\WCHL\\anaconda3\\envs\\RunInference2\\Lib\\site-packages\\uvicorn\\protocols\\http\\h11_impl.py\", line 404, in run_asgi\n",
55
- " result = await app( # type: ignore[func-returns-value]\n",
56
- " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
57
- " File \"C:\\Users\\WCHL\\anaconda3\\envs\\RunInference2\\Lib\\site-packages\\uvicorn\\middleware\\proxy_headers.py\", line 84, in __call__\n",
58
- " return await self.app(scope, receive, send)\n",
59
- " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
60
- " File \"C:\\Users\\WCHL\\anaconda3\\envs\\RunInference2\\Lib\\site-packages\\fastapi\\applications.py\", line 1054, in __call__\n",
61
- " await super().__call__(scope, receive, send)\n",
62
- " File \"C:\\Users\\WCHL\\anaconda3\\envs\\RunInference2\\Lib\\site-packages\\starlette\\applications.py\", line 123, in __call__\n",
63
- " await self.middleware_stack(scope, receive, send)\n",
64
- " File \"C:\\Users\\WCHL\\anaconda3\\envs\\RunInference2\\Lib\\site-packages\\starlette\\middleware\\errors.py\", line 186, in __call__\n",
65
- " raise exc\n",
66
- " File \"C:\\Users\\WCHL\\anaconda3\\envs\\RunInference2\\Lib\\site-packages\\starlette\\middleware\\errors.py\", line 164, in __call__\n",
67
- " await self.app(scope, receive, _send)\n",
68
- " File \"C:\\Users\\WCHL\\anaconda3\\envs\\RunInference2\\Lib\\site-packages\\gradio\\route_utils.py\", line 714, in __call__\n",
69
- " await self.app(scope, receive, send)\n",
70
- " File \"C:\\Users\\WCHL\\anaconda3\\envs\\RunInference2\\Lib\\site-packages\\starlette\\middleware\\exceptions.py\", line 62, in __call__\n",
71
- " await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)\n",
72
- " File \"C:\\Users\\WCHL\\anaconda3\\envs\\RunInference2\\Lib\\site-packages\\starlette\\_exception_handler.py\", line 64, in wrapped_app\n",
73
- " raise exc\n",
74
- " File \"C:\\Users\\WCHL\\anaconda3\\envs\\RunInference2\\Lib\\site-packages\\starlette\\_exception_handler.py\", line 53, in wrapped_app\n",
75
- " await app(scope, receive, sender)\n",
76
- " File \"C:\\Users\\WCHL\\anaconda3\\envs\\RunInference2\\Lib\\site-packages\\starlette\\routing.py\", line 762, in __call__\n",
77
- " await self.middleware_stack(scope, receive, send)\n",
78
- " File \"C:\\Users\\WCHL\\anaconda3\\envs\\RunInference2\\Lib\\site-packages\\starlette\\routing.py\", line 782, in app\n",
79
- " await route.handle(scope, receive, send)\n",
80
- " File \"C:\\Users\\WCHL\\anaconda3\\envs\\RunInference2\\Lib\\site-packages\\starlette\\routing.py\", line 297, in handle\n",
81
- " await self.app(scope, receive, send)\n",
82
- " File \"C:\\Users\\WCHL\\anaconda3\\envs\\RunInference2\\Lib\\site-packages\\starlette\\routing.py\", line 77, in app\n",
83
- " await wrap_app_handling_exceptions(app, request)(scope, receive, send)\n",
84
- " File \"C:\\Users\\WCHL\\anaconda3\\envs\\RunInference2\\Lib\\site-packages\\starlette\\_exception_handler.py\", line 64, in wrapped_app\n",
85
- " raise exc\n",
86
- " File \"C:\\Users\\WCHL\\anaconda3\\envs\\RunInference2\\Lib\\site-packages\\starlette\\_exception_handler.py\", line 53, in wrapped_app\n",
87
- " await app(scope, receive, sender)\n",
88
- " File \"C:\\Users\\WCHL\\anaconda3\\envs\\RunInference2\\Lib\\site-packages\\starlette\\routing.py\", line 75, in app\n",
89
- " await response(scope, receive, send)\n",
90
- " File \"C:\\Users\\WCHL\\anaconda3\\envs\\RunInference2\\Lib\\site-packages\\starlette\\responses.py\", line 346, in __call__\n",
91
- " await send(\n",
92
- " File \"C:\\Users\\WCHL\\anaconda3\\envs\\RunInference2\\Lib\\site-packages\\starlette\\_exception_handler.py\", line 50, in sender\n",
93
- " await send(message)\n",
94
- " File \"C:\\Users\\WCHL\\anaconda3\\envs\\RunInference2\\Lib\\site-packages\\starlette\\_exception_handler.py\", line 50, in sender\n",
95
- " await send(message)\n",
96
- " File \"C:\\Users\\WCHL\\anaconda3\\envs\\RunInference2\\Lib\\site-packages\\starlette\\middleware\\errors.py\", line 161, in _send\n",
97
- " await send(message)\n",
98
- " File \"C:\\Users\\WCHL\\anaconda3\\envs\\RunInference2\\Lib\\site-packages\\uvicorn\\protocols\\http\\h11_impl.py\", line 508, in send\n",
99
- " output = self.conn.send(event=h11.EndOfMessage())\n",
100
- " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
101
- " File \"C:\\Users\\WCHL\\anaconda3\\envs\\RunInference2\\Lib\\site-packages\\h11\\_connection.py\", line 512, in send\n",
102
- " data_list = self.send_with_data_passthrough(event)\n",
103
- " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
104
- " File \"C:\\Users\\WCHL\\anaconda3\\envs\\RunInference2\\Lib\\site-packages\\h11\\_connection.py\", line 545, in send_with_data_passthrough\n",
105
- " writer(event, data_list.append)\n",
106
- " File \"C:\\Users\\WCHL\\anaconda3\\envs\\RunInference2\\Lib\\site-packages\\h11\\_writers.py\", line 67, in __call__\n",
107
- " self.send_eom(event.headers, write)\n",
108
- " File \"C:\\Users\\WCHL\\anaconda3\\envs\\RunInference2\\Lib\\site-packages\\h11\\_writers.py\", line 96, in send_eom\n",
109
- " raise LocalProtocolError(\"Too little data for declared Content-Length\")\n",
110
- "h11._util.LocalProtocolError: Too little data for declared Content-Length\n"
111
- ]
112
- },
113
- {
114
- "name": "stdout",
115
- "output_type": "stream",
116
- "text": [
117
- "எண்பது\n",
118
- "எண்பது\n",
119
- "எண்பது\n",
120
- "eighty\n",
121
- "80\n"
122
- ]
123
- }
124
- ],
125
- "source": [
126
- "import gradio as gr\n",
127
- "import librosa\n",
128
- "import numpy as np\n",
129
- "import pywt\n",
130
- "import nbimporter\n",
131
- "from scipy.signal import butter, lfilter, wiener\n",
132
- "from scipy.io.wavfile import write\n",
133
- "from transformers import pipeline\n",
134
- "from text2int import text_to_int\n",
135
- "from isNumber import is_number\n",
136
- "from Text2List import text_to_list\n",
137
- "from convert2list import convert_to_list\n",
138
- "from processDoubles import process_doubles\n",
139
- "from replaceWords import replace_words\n",
140
- "\n",
141
- "asr_model = pipeline(\"automatic-speech-recognition\", model=\"cdactvm/w2v-bert-tamil_new\")\n",
142
- "\n",
143
- "# Function to apply a high-pass filter\n",
144
- "def high_pass_filter(audio, sr, cutoff=300):\n",
145
- " nyquist = 0.5 * sr\n",
146
- " normal_cutoff = cutoff / nyquist\n",
147
- " b, a = butter(1, normal_cutoff, btype='high', analog=False)\n",
148
- " filtered_audio = lfilter(b, a, audio)\n",
149
- " return filtered_audio\n",
150
- "\n",
151
- "# Function to apply wavelet denoising\n",
152
- "def wavelet_denoise(audio, wavelet='db1', level=1):\n",
153
- " coeffs = pywt.wavedec(audio, wavelet, mode='per')\n",
154
- " sigma = np.median(np.abs(coeffs[-level])) / 0.5\n",
155
- " uthresh = sigma * np.sqrt(2 * np.log(len(audio)))\n",
156
- " coeffs[1:] = [pywt.threshold(i, value=uthresh, mode='soft') for i in coeffs[1:]]\n",
157
- " return pywt.waverec(coeffs, wavelet, mode='per')\n",
158
- "\n",
159
- "# Function to apply a Wiener filter for noise reduction\n",
160
- "def apply_wiener_filter(audio):\n",
161
- " return wiener(audio)\n",
162
- "\n",
163
- "# Function to handle speech recognition\n",
164
- "def recognize_speech(audio_file):\n",
165
- " audio, sr = librosa.load(audio_file, sr=16000)\n",
166
- " audio = high_pass_filter(audio, sr)\n",
167
- " audio = apply_wiener_filter(audio)\n",
168
- " denoised_audio = wavelet_denoise(audio)\n",
169
- " result = asr_model(denoised_audio)\n",
170
- " text_value = result['text']\n",
171
- " cleaned_text = text_value.replace(\"<s>\", \"\")\n",
172
- " print(cleaned_text)\n",
173
- " converted_to_list = convert_to_list(cleaned_text, text_to_list())\n",
174
- " print(converted_to_list)\n",
175
- " processed_doubles = process_doubles(converted_to_list)\n",
176
- " print(processed_doubles)\n",
177
- " replaced_words = replace_words(processed_doubles)\n",
178
- " print(replaced_words)\n",
179
- " converted_text = text_to_int(replaced_words)\n",
180
- " print(converted_text)\n",
181
- " return converted_text\n",
182
- "\n",
183
- "# Gradio Interface\n",
184
- "gr.Interface(\n",
185
- " fn=recognize_speech,\n",
186
- " inputs=gr.Audio(sources=[\"microphone\",\"upload\"], type=\"filepath\"),\n",
187
- " outputs=\"text\",\n",
188
- " title=\"Speech Recognition with Advanced Noise Reduction & Hindi ASR\",\n",
189
- " description=\"Upload an audio file, and the system will use high-pass filtering, Wiener filtering, and wavelet-based denoising, then a Hindi ASR model will transcribe the clean audio.\"\n",
190
- ").launch()\n"
191
- ]
192
- },
193
- {
194
- "cell_type": "code",
195
- "execution_count": null,
196
- "id": "d4565cfb-a8e0-49a1-8878-6e5b1cd105e6",
197
- "metadata": {},
198
- "outputs": [],
199
- "source": []
200
- }
201
- ],
202
- "metadata": {
203
- "kernelspec": {
204
- "display_name": "Python 3 (ipykernel)",
205
- "language": "python",
206
- "name": "python3"
207
- },
208
- "language_info": {
209
- "codemirror_mode": {
210
- "name": "ipython",
211
- "version": 3
212
- },
213
- "file_extension": ".py",
214
- "mimetype": "text/x-python",
215
- "name": "python",
216
- "nbconvert_exporter": "python",
217
- "pygments_lexer": "ipython3",
218
- "version": "3.11.7"
219
- }
220
- },
221
- "nbformat": 4,
222
- "nbformat_minor": 5
223
- }