Spaces:

Meckyhugging
/

CCIgLASGOW_STT

Running

CCIgLASGOW_STT / app.py

Update app.py

3dfe891 verified about 1 month ago

874 Bytes

	# app.py

	import gradio as gr
	import torch
	import whisper

	# Load a fast Whisper model
	model = whisper.load_model("small") # You can use "tiny" if you want even faster

	def transcribe_audio(audio):
	# Audio is received as a tuple (sample_rate, numpy_array)
	audio = audio[1] # Get the raw audio waveform

	# Whisper expects 16000 Hz sample rate
	result = model.transcribe(audio, fp16=torch.cuda.is_available())
	text = result["text"]
	return text

	# Gradio Interface
	iface = gr.Interface(
	fn=transcribe_audio,
	inputs=gr.Audio(sources=["microphone"], type="numpy", streaming=True),
	outputs=gr.Textbox(label="Recognized Text"),
	live=True, # Important for real-time streaming
	title="Real-time Voice to Text",
	description="Speak into your microphone and get real-time transcription!",
	)

	if __name__ == "__main__":
	iface.launch()