Spaces:
Running
Running
import gradio as gr | |
import speech_recognition as sr | |
from io import BytesIO | |
from pydub import AudioSegment | |
def transcribe_audio(audio_input): | |
recognizer = sr.Recognizer() | |
# Ensure the input is a tuple and get the audio data bytes | |
if isinstance(audio_input, tuple) and len(audio_input) == 2: | |
audio_data_bytes = audio_input[1] | |
else: | |
raise ValueError("Expected audio_input to be a tuple with audio data bytes.") | |
# Use BytesIO to create a file-like object from the audio bytes | |
audio_file = BytesIO(audio_data_bytes) | |
# Convert audio to WAV format using pydub | |
audio_segment = AudioSegment.from_file(audio_file) | |
wav_io = BytesIO() | |
audio_segment.export(wav_io, format="wav") | |
wav_io.seek(0) # Move to the beginning of the file-like object | |
# Load the audio file from the file-like object in WAV format | |
with sr.AudioFile(wav_io) as source: | |
audio_data = recognizer.record(source) | |
try: | |
# Transcribe the audio data | |
text = recognizer.recognize_google(audio_data) | |
return text | |
except sr.UnknownValueError: | |
return "Google Speech Recognition could not understand audio" | |
except sr.RequestError as e: | |
return f"Could not request results from Google Speech Recognition service; {e}" | |
# Create the Gradio interface | |
iface = gr.Interface( | |
fn=transcribe_audio, | |
inputs="audio", | |
outputs="text", | |
title="Voice to Text Converter", | |
description="Upload an audio file and get the transcribed text." | |
) | |
# Launch the interface | |
iface.launch() | |