Voice-To-Text / app.py
arshadrana's picture
Update app.py
a562e5f verified
raw
history blame
1.5 kB
import gradio as gr
import speech_recognition as sr
from pydub import AudioSegment
from io import BytesIO
import tempfile
def transcribe_audio(audio_input):
recognizer = sr.Recognizer()
if isinstance(audio_input, tuple) and len(audio_input) == 2:
audio_data_bytes = audio_input[1]
else:
raise ValueError("Expected audio_input to be a tuple with audio data bytes.")
# Load audio as raw data
try:
audio_segment = AudioSegment.from_file(BytesIO(audio_data_bytes), format="mp3")
except Exception as e:
return f"Error loading audio file: {e}"
# Save as WAV to a temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as wav_file:
audio_segment.export(wav_file.name, format="wav")
wav_file_path = wav_file.name
# Transcribe the audio
try:
with sr.AudioFile(wav_file_path) as source:
audio_data = recognizer.record(source)
text = recognizer.recognize_google(audio_data)
return text
except sr.UnknownValueError:
return "Google Speech Recognition could not understand audio"
except sr.RequestError as e:
return f"Could not request results from Google Speech Recognition service; {e}"
# Gradio Interface
iface = gr.Interface(
fn=transcribe_audio,
inputs="audio",
outputs="text",
title="Voice to Text Converter",
description="Upload an audio file and get the transcribed text."
)
iface.launch()