voice-to-text / app.py
arshadrana's picture
Update app.py
da8d82c verified
raw
history blame
1.58 kB
import gradio as gr
import speech_recognition as sr
from io import BytesIO
from pydub import AudioSegment
def transcribe_audio(audio_input):
recognizer = sr.Recognizer()
# Ensure the input is a tuple and get the audio data bytes
if isinstance(audio_input, tuple) and len(audio_input) == 2:
audio_data_bytes = audio_input[1]
else:
raise ValueError("Expected audio_input to be a tuple with audio data bytes.")
# Use BytesIO to create a file-like object from the audio bytes
audio_file = BytesIO(audio_data_bytes)
# Convert audio to WAV format using pydub
audio_segment = AudioSegment.from_file(audio_file)
wav_io = BytesIO()
audio_segment.export(wav_io, format="wav")
wav_io.seek(0) # Move to the beginning of the file-like object
# Load the audio file from the file-like object in WAV format
with sr.AudioFile(wav_io) as source:
audio_data = recognizer.record(source)
try:
# Transcribe the audio data
text = recognizer.recognize_google(audio_data)
return text
except sr.UnknownValueError:
return "Google Speech Recognition could not understand audio"
except sr.RequestError as e:
return f"Could not request results from Google Speech Recognition service; {e}"
# Create the Gradio interface
iface = gr.Interface(
fn=transcribe_audio,
inputs="audio",
outputs="text",
title="Voice to Text Converter",
description="Upload an audio file and get the transcribed text."
)
# Launch the interface
iface.launch()