Spaces:
Running
Running
# app.py | |
import gradio as gr | |
import torch | |
import whisper | |
# Load a fast Whisper model | |
model = whisper.load_model("small") # You can use "tiny" if you want even faster | |
def transcribe_audio(audio): | |
# Audio is received as a tuple (sample_rate, numpy_array) | |
audio = audio[1] # Get the raw audio waveform | |
# Whisper expects 16000 Hz sample rate | |
result = model.transcribe(audio, fp16=torch.cuda.is_available()) | |
text = result["text"] | |
return text | |
# Gradio Interface | |
iface = gr.Interface( | |
fn=transcribe_audio, | |
inputs=gr.Audio(sources=["microphone"], type="numpy", streaming=True), | |
outputs=gr.Textbox(label="Recognized Text"), | |
live=True, # Important for real-time streaming | |
title="Real-time Voice to Text", | |
description="Speak into your microphone and get real-time transcription!", | |
) | |
if __name__ == "__main__": | |
iface.launch() |