Logeswaransr commited on
Commit
561f113
·
1 Parent(s): 9e56d60

Whisper model Initialization

Browse files
Files changed (3) hide show
  1. app.py +55 -0
  2. languages.txt +1 -0
  3. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import pipeline
3
+ import numpy as np
4
+ import gradio as gr
5
+
6
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
7
+ torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
8
+
9
+ model_id = "openai/whisper-medium"
10
+
11
+ print("\n\nReading Languages...\n\n")
12
+
13
+ with open("languages.txt", "r") as file:
14
+ languages = file.read().strip().split(",")
15
+
16
+ languages = [language.strip().lower() for language in languages]
17
+
18
+ print("\n\nInitializing model...\n\n")
19
+
20
+ transcriber = pipeline(
21
+ "automatic-speech-recognition",
22
+ model=model_id,
23
+ torch_dtype=torch_dtype,
24
+ device=device,
25
+ )
26
+
27
+ print("\n\nModel Ready!!\n\nLaunching Interface...\n\n")
28
+
29
+ def transcribe(audio, language: str):
30
+ sr, y = audio
31
+
32
+ # Convert to mono if stereo
33
+ if y.ndim > 1:
34
+ y = y.mean(axis=1)
35
+
36
+ y = y.astype(np.float32)
37
+ y /= np.max(np.abs(y))
38
+
39
+ language = language.lower()
40
+ if(language not in languages):
41
+ return "Error!! Not a valid language!!"
42
+
43
+ args = {"task":"transcribe", "language":language}
44
+
45
+ return transcriber({"sampling_rate": sr, "raw": y}, generate_kwargs=args)["text"]
46
+
47
+ demo = gr.Interface(
48
+ transcribe,
49
+ inputs=[gr.Audio(sources="microphone"), gr.Textbox(label="Language", placeholder="Enter the language")],
50
+ outputs=["text"],
51
+ title="Whisper Model Interface",
52
+ description=model_id
53
+ )
54
+
55
+ demo.launch()
languages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Afrikaans, Arabic, Armenian, Azerbaijani, Belarusian, Bosnian, Bulgarian, Catalan, Chinese, Croatian, Czech, Danish, Dutch, English, Estonian, Finnish, French, Galician, German, Greek, Hebrew, Hindi, Hungarian, Icelandic, Indonesian, Italian, Japanese, Kannada, Kazakh, Korean, Latvian, Lithuanian, Macedonian, Malay, Marathi, Maori, Nepali, Norwegian, Persian, Polish, Portuguese, Romanian, Russian, Serbian, Slovak, Slovenian, Spanish, Swahili, Swedish, Tagalog, Tamil, Thai, Turkish, Ukrainian, Urdu, Vietnamese, Welsh
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ gradio
2
+ transformers[torch]
3
+ torchaudio
4
+ sentencepiece
5
+ tiktoken
6
+ accelerate
7
+ numpy