Spaces:

arshadrana
/

voice-to-text

Sleeping

voice-to-text / app.py

Update app.py

da8d82c verified 8 months ago

1.58 kB

	import gradio as gr
	import speech_recognition as sr
	from io import BytesIO
	from pydub import AudioSegment

	def transcribe_audio(audio_input):
	recognizer = sr.Recognizer()

	# Ensure the input is a tuple and get the audio data bytes
	if isinstance(audio_input, tuple) and len(audio_input) == 2:
	audio_data_bytes = audio_input[1]
	else:
	raise ValueError("Expected audio_input to be a tuple with audio data bytes.")

	# Use BytesIO to create a file-like object from the audio bytes
	audio_file = BytesIO(audio_data_bytes)

	# Convert audio to WAV format using pydub
	audio_segment = AudioSegment.from_file(audio_file)
	wav_io = BytesIO()
	audio_segment.export(wav_io, format="wav")
	wav_io.seek(0) # Move to the beginning of the file-like object

	# Load the audio file from the file-like object in WAV format
	with sr.AudioFile(wav_io) as source:
	audio_data = recognizer.record(source)

	try:
	# Transcribe the audio data
	text = recognizer.recognize_google(audio_data)
	return text
	except sr.UnknownValueError:
	return "Google Speech Recognition could not understand audio"
	except sr.RequestError as e:
	return f"Could not request results from Google Speech Recognition service; {e}"

	# Create the Gradio interface
	iface = gr.Interface(
	fn=transcribe_audio,
	inputs="audio",
	outputs="text",
	title="Voice to Text Converter",
	description="Upload an audio file and get the transcribed text."
	)

	# Launch the interface
	iface.launch()