import gradio as gr import speech_recognition as sr from pydub import AudioSegment from io import BytesIO import tempfile def transcribe_audio(audio_input): recognizer = sr.Recognizer() if isinstance(audio_input, tuple) and len(audio_input) == 2: audio_data_bytes = audio_input[1] else: raise ValueError("Expected audio_input to be a tuple with audio data bytes.") # Load audio as raw data try: audio_segment = AudioSegment.from_file(BytesIO(audio_data_bytes), format="mp3") except Exception as e: return f"Error loading audio file: {e}" # Save as WAV to a temporary file with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as wav_file: audio_segment.export(wav_file.name, format="wav") wav_file_path = wav_file.name # Transcribe the audio try: with sr.AudioFile(wav_file_path) as source: audio_data = recognizer.record(source) text = recognizer.recognize_google(audio_data) return text except sr.UnknownValueError: return "Google Speech Recognition could not understand audio" except sr.RequestError as e: return f"Could not request results from Google Speech Recognition service; {e}" # Gradio Interface iface = gr.Interface( fn=transcribe_audio, inputs="audio", outputs="text", title="Voice to Text Converter", description="Upload an audio file and get the transcribed text." ) iface.launch()