Voice-To-Text

Sleeping

File size: 1,501 Bytes

309b067
 
da8d82c
a562e5f
5f94e97
309b067
0fe9a40
309b067
fafa615
 
 
0fe9a40
fafa615
a562e5f
 
 
 
 
 
0fe9a40
a562e5f
5f94e97
 
 
a562e5f
 
309b067
a562e5f
 
 
 
309b067
 
 
 
 
a562e5f
309b067
 
 
 
 
 
 
 
0fe9a40

import gradio as gr
import speech_recognition as sr
from pydub import AudioSegment
from io import BytesIO
import tempfile

def transcribe_audio(audio_input):
    recognizer = sr.Recognizer()
    
    if isinstance(audio_input, tuple) and len(audio_input) == 2:
        audio_data_bytes = audio_input[1]
    else:
        raise ValueError("Expected audio_input to be a tuple with audio data bytes.")
    
    # Load audio as raw data
    try:
        audio_segment = AudioSegment.from_file(BytesIO(audio_data_bytes), format="mp3")
    except Exception as e:
        return f"Error loading audio file: {e}"

    # Save as WAV to a temporary file
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as wav_file:
        audio_segment.export(wav_file.name, format="wav")
        wav_file_path = wav_file.name
    
    # Transcribe the audio
    try:
        with sr.AudioFile(wav_file_path) as source:
            audio_data = recognizer.record(source)
            text = recognizer.recognize_google(audio_data)
            return text
    except sr.UnknownValueError:
        return "Google Speech Recognition could not understand audio"
    except sr.RequestError as e:
        return f"Could not request results from Google Speech Recognition service; {e}"

# Gradio Interface
iface = gr.Interface(
    fn=transcribe_audio,
    inputs="audio",
    outputs="text",
    title="Voice to Text Converter",
    description="Upload an audio file and get the transcribed text."
)

iface.launch()