Meckyhugging commited on
Commit
5d9bcf4
·
verified ·
1 Parent(s): 69cf891

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -0
app.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+
3
+ import gradio as gr
4
+ import torch
5
+ import whisper
6
+
7
+ # Load a fast Whisper model
8
+ model = whisper.load_model("small") # You can use "tiny" if you want even faster
9
+
10
+ def transcribe_audio(audio):
11
+ # Audio is received as a tuple (sample_rate, numpy_array)
12
+ audio = audio[1] # Get the raw audio waveform
13
+
14
+ # Whisper expects 16000 Hz sample rate
15
+ result = model.transcribe(audio, fp16=torch.cuda.is_available())
16
+ text = result["text"]
17
+ return text
18
+
19
+ # Gradio Interface
20
+ iface = gr.Interface(
21
+ fn=transcribe_audio,
22
+ inputs=gr.Audio(source="microphone", type="numpy", streaming=True),
23
+ outputs=gr.Textbox(label="Recognized Text"),
24
+ live=True, # Important for real-time streaming
25
+ title="Real-time Voice to Text",
26
+ description="Speak into your microphone and get real-time transcription!",
27
+ )
28
+
29
+ if __name__ == "__main__":
30
+ iface.launch()