Spaces:
Build error
Build error
!pip install pydub | |
import gradio as gr | |
import whisper | |
from gtts import gTTS | |
import soundfile as sf | |
import numpy as np | |
import io | |
import tempfile | |
import os | |
from pydub import AudioSegment | |
# Load the Whisper model | |
whisper_model = whisper.load_model("medium") | |
# Define transcription function | |
def transcribe_and_generate_response(audio_input): | |
try: | |
if audio_input is None: | |
return "No audio input detected.", None | |
# Convert audio to WAV using pydub | |
audio = AudioSegment.from_file(audio_input) | |
temp_audio_path = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name | |
audio.export(temp_audio_path, format="wav") | |
# Whisper transcribe | |
result = whisper_model.transcribe(temp_audio_path, language="ur") | |
transcription = result.get('text') | |
if transcription is None: | |
return "Transcription failed.", None | |
# Get a reply from a chatbot model here (replace with your chatbot logic) | |
chatbot_response = f"Your input was: {transcription}" # Replace with actual chatbot response logic | |
# Text-to-speech with gTTS | |
response_audio = io.BytesIO() | |
tts = gTTS(text=chatbot_response, lang="ur") | |
tts.write_to_fp(response_audio) | |
response_audio.seek(0) | |
# Save audio to a temporary file | |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio_file: | |
sf.write(temp_audio_file.name, np.frombuffer(response_audio.read(), dtype=np.int16), 22050) | |
temp_audio_path_response = temp_audio_file.name | |
return transcription, temp_audio_path_response | |
except Exception as e: | |
return f"An error occurred during processing: {str(e)}", None | |
# Gradio Interface | |
with gr.Blocks() as demo: | |
audio_input = gr.Audio(type="filepath", label="Upload Audio") | |
transcription_output = gr.Textbox(label="Transcription") | |
chatbot_response_audio = gr.Audio(label="Chatbot Response", type="filepath") | |
submit_btn = gr.Button("Submit") | |
submit_btn.click(transcribe_and_generate_response, inputs=audio_input, outputs=[transcription_output, chatbot_response_audio]) | |
demo.launch() | |