Spaces:

aminahmed78
/

voice-to-voice-for-kids

Build error

App Files Files Community

voice-to-voice-for-kids / app.py

aminahmed78

Update app.py

9182d2f verified 5 months ago

raw

history blame

2.18 kB

	!pip install pydub
	import gradio as gr
	import whisper
	from gtts import gTTS
	import soundfile as sf
	import numpy as np
	import io
	import tempfile
	import os
	from pydub import AudioSegment

	# Load the Whisper model
	whisper_model = whisper.load_model("medium")

	# Define transcription function
	def transcribe_and_generate_response(audio_input):
	try:
	if audio_input is None:
	return "No audio input detected.", None

	# Convert audio to WAV using pydub
	audio = AudioSegment.from_file(audio_input)
	temp_audio_path = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name
	audio.export(temp_audio_path, format="wav")

	# Whisper transcribe
	result = whisper_model.transcribe(temp_audio_path, language="ur")
	transcription = result.get('text')

	if transcription is None:
	return "Transcription failed.", None

	# Get a reply from a chatbot model here (replace with your chatbot logic)
	chatbot_response = f"Your input was: {transcription}" # Replace with actual chatbot response logic

	# Text-to-speech with gTTS
	response_audio = io.BytesIO()
	tts = gTTS(text=chatbot_response, lang="ur")
	tts.write_to_fp(response_audio)
	response_audio.seek(0)

	# Save audio to a temporary file
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio_file:
	sf.write(temp_audio_file.name, np.frombuffer(response_audio.read(), dtype=np.int16), 22050)
	temp_audio_path_response = temp_audio_file.name

	return transcription, temp_audio_path_response

	except Exception as e:
	return f"An error occurred during processing: {str(e)}", None

	# Gradio Interface
	with gr.Blocks() as demo:
	audio_input = gr.Audio(type="filepath", label="Upload Audio")
	transcription_output = gr.Textbox(label="Transcription")
	chatbot_response_audio = gr.Audio(label="Chatbot Response", type="filepath")

	submit_btn = gr.Button("Submit")
	submit_btn.click(transcribe_and_generate_response, inputs=audio_input, outputs=[transcription_output, chatbot_response_audio])

	demo.launch()