aminahmed78's picture
Update app.py
9182d2f verified
raw
history blame
2.18 kB
!pip install pydub
import gradio as gr
import whisper
from gtts import gTTS
import soundfile as sf
import numpy as np
import io
import tempfile
import os
from pydub import AudioSegment
# Load the Whisper model
whisper_model = whisper.load_model("medium")
# Define transcription function
def transcribe_and_generate_response(audio_input):
try:
if audio_input is None:
return "No audio input detected.", None
# Convert audio to WAV using pydub
audio = AudioSegment.from_file(audio_input)
temp_audio_path = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name
audio.export(temp_audio_path, format="wav")
# Whisper transcribe
result = whisper_model.transcribe(temp_audio_path, language="ur")
transcription = result.get('text')
if transcription is None:
return "Transcription failed.", None
# Get a reply from a chatbot model here (replace with your chatbot logic)
chatbot_response = f"Your input was: {transcription}" # Replace with actual chatbot response logic
# Text-to-speech with gTTS
response_audio = io.BytesIO()
tts = gTTS(text=chatbot_response, lang="ur")
tts.write_to_fp(response_audio)
response_audio.seek(0)
# Save audio to a temporary file
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio_file:
sf.write(temp_audio_file.name, np.frombuffer(response_audio.read(), dtype=np.int16), 22050)
temp_audio_path_response = temp_audio_file.name
return transcription, temp_audio_path_response
except Exception as e:
return f"An error occurred during processing: {str(e)}", None
# Gradio Interface
with gr.Blocks() as demo:
audio_input = gr.Audio(type="filepath", label="Upload Audio")
transcription_output = gr.Textbox(label="Transcription")
chatbot_response_audio = gr.Audio(label="Chatbot Response", type="filepath")
submit_btn = gr.Button("Submit")
submit_btn.click(transcribe_and_generate_response, inputs=audio_input, outputs=[transcription_output, chatbot_response_audio])
demo.launch()