Spaces:

abdullahzunorain
/

voice-to-voice-Chatbot

Sleeping

File size: 10,868 Bytes

# import whisper
# import os
# from gtts import gTTS
# import gradio as gr
# from groq import Groq
# from datetime import datetime
# import tempfile

# # Load a smaller Whisper model for faster processing
# try:
#     model = whisper.load_model("tiny")
# except Exception as e:
#     print(f"Error loading Whisper model: {e}")
#     model = None

# # Set up Groq API client using environment variable
# GROQ_API_TOKEN = os.getenv("GROQ_API")
# if not GROQ_API_TOKEN:
#     raise ValueError("Groq API token is missing. Set 'GROQ_API' in your environment variables.")
# client = Groq(api_key=GROQ_API_TOKEN)

# # Initialize the chat history
# chat_history = []

# # Function to get the LLM response from Groq with timeout handling
# def get_llm_response(user_input, role="detailed responder"):
#     prompt = f"As an expert, provide a detailed and knowledgeable response: {user_input}" if role == "expert" else \
#              f"As a good assistant, provide a clear, concise, and helpful response: {user_input}" if role == "good assistant" else \
#              f"Provide a thorough and detailed response: {user_input}"

#     try:
#         chat_completion = client.chat.completions.create(
#             messages=[{"role": "user", "content": user_input}],
#             model="llama3-8b-8192",  # Replace with your desired model
#             timeout=20  # Increased timeout to 20 seconds
#         )
#         return chat_completion.choices[0].message.content
#     except Exception as e:
#         print(f"Error during LLM response retrieval: {e}")
#         return "Sorry, there was an error retrieving the response. Please try again."

# # Function to convert text to speech using gTTS and handle temporary files
# def text_to_speech(text):
#     try:
#         tts = gTTS(text)
#         with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
#             output_audio = temp_file.name
#             tts.save(output_audio)
#         return output_audio
#     except Exception as e:
#         print(f"Error generating TTS: {e}")
#         return None

# # Main chatbot function to handle audio input and output with chat history
# def chatbot(audio):
#     if not model:
#         return "Error: Whisper model is not available.", None, chat_history

#     if not audio:
#         return "No audio provided. Please upload a valid audio file.", None, chat_history

#     try:
#         # Step 1: Transcribe the audio using Whisper
#         result = model.transcribe(audio)
#         user_text = result.get("text", "")
#         if not user_text.strip():
#             return "Could not understand the audio. Please try speaking more clearly.", None, chat_history

#         # Get current timestamp
#         timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

#         # Display transcription in chat history
#         chat_history.append((timestamp, "User", user_text))

#         # Step 2: Get LLM response from Groq
#         response_text = get_llm_response(user_text)

#         # Step 3: Convert the response text to speech
#         output_audio = text_to_speech(response_text)

#         # Append the latest interaction to the chat history
#         chat_history.append((timestamp, "Chatbot", response_text))

#         # Format the chat history for display with timestamps and clear labels
#         formatted_history = "\n".join([f"[{time}] {speaker}: {text}" for time, speaker, text in chat_history])
        
#         return formatted_history, output_audio, chat_history

#     except Exception as e:
#         print(f"Error in chatbot function: {e}")
#         return "Sorry, there was an error processing your request.", None, chat_history

# # Gradio interface for real-time interaction with chat history display
# iface = gr.Interface(
#     fn=chatbot,
#     inputs=gr.Audio(type="filepath"),
#     outputs=[
#         gr.Textbox(label="Chat History", lines=10, interactive=False),  # Display chat history
#         gr.Audio(type="filepath", label="Response Audio"),
#     ],
#     live=True,
#     title="Voice to Voice Chatbot",
#     description="Upload your audio, and the chatbot will transcribe and respond to it with a synthesized response.",
#     theme="default",
#     css=''' 
#         body {
#             background-image: url("https://huggingface.co/spaces/abdullahzunorain/voice-to-voice-Chatbot/resolve/main/BG_1.jpg");
#             background-size: cover;
#             background-position: center;
#             background-repeat: no-repeat;
#             color: white;
#             font-family: 'Helvetica Neue', sans-serif;
#         }
#         .gradio-container {
#             background-color: rgba(0, 0, 0, 0.7);
#             padding: 20px;
#             border-radius: 10px;
#             box-shadow: 0 4px 20px rgba(0, 0, 0, 0.5);
#         }
#         h1, h2, p, .gradio-label {
#             color: #FFD700;  /* Gold color for labels and titles */
#             text-align: center;
#         }
#         .gradio-button {
#             background-color: #FFD700;
#             color: black;
#             border-radius: 5px;
#             font-weight: bold;
#             transition: background-color 0.3s, transform 0.2s;
#         }
#         .gradio-button:hover {
#             background-color: #FFC107;  /* Lighter gold on hover */
#             transform: scale(1.05);
#         }
#         .gradio-input {
#             background-color: rgba(255, 255, 255, 0.9);
#             border-radius: 4px;
#             border: 2px solid #FFD700; /* Gold border */
#         }
#         .gradio-audio {
#             border: 2px solid #FFD700; /* Gold border for audio */
#         }
#     '''
# )

# # Launch the Gradio app
# if __name__ == "__main__":
#     iface.launch()










import whisper
import os
from gtts import gTTS
import gradio as gr
from groq import Groq
from datetime import datetime
import tempfile

# Load a smaller Whisper model for faster processing
try:
    model = whisper.load_model("tiny")
except Exception as e:
    print(f"Error loading Whisper model: {e}")
    model = None

# Set up Groq API client using environment variable
GROQ_API_TOKEN = os.getenv("GROQ_API")
if not GROQ_API_TOKEN:
    raise ValueError("Groq API token is missing. Set 'GROQ_API' in your environment variables.")
client = Groq(api_key=GROQ_API_TOKEN)

# Initialize the chat history
chat_history = []

# Function to get the LLM response from Groq with timeout handling
def get_llm_response(user_input, role="detailed responder"):
    prompt = f"As an expert, provide a detailed and knowledgeable response: {user_input}" if role == "expert" else \
             f"As a good assistant, provide a clear, concise, and helpful response: {user_input}" if role == "good assistant" else \
             f"Provide a thorough and detailed response: {user_input}"

    try:
        chat_completion = client.chat.completions.create(
            messages=[{"role": "user", "content": user_input}],
            model="llama3-8b-8192",  # Replace with your desired model
            timeout=20  # Increased timeout to 20 seconds
        )
        return chat_completion.choices[0].message.content
    except Exception as e:
        print(f"Error during LLM response retrieval: {e}")
        return "Sorry, there was an error retrieving the response. Please try again."

# Function to convert text to speech using gTTS and handle temporary files
def text_to_speech(text):
    try:
        tts = gTTS(text)
        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
            output_audio = temp_file.name
            tts.save(output_audio)
        return output_audio
    except Exception as e:
        print(f"Error generating TTS: {e}")
        return None

# Main chatbot function to handle audio input and output with chat history
def chatbot(audio):
    if not model:
        return "Error: Whisper model is not available.", None, chat_history

    if not audio:
        return "No audio provided. Please upload a valid audio file.", None, chat_history

    try:
        # Step 1: Transcribe the audio using Whisper
        result = model.transcribe(audio)
        user_text = result.get("text", "")
        if not user_text.strip():
            return "Could not understand the audio. Please try speaking more clearly.", None, chat_history

        # Get current timestamp
        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

        # Display transcription in chat history
        chat_history.append((timestamp, "User", user_text))

        # Step 2: Get LLM response from Groq
        response_text = get_llm_response(user_text)

        # Step 3: Convert the response text to speech
        output_audio = text_to_speech(response_text)

        # Append the latest interaction to the chat history
        chat_history.append((timestamp, "Chatbot", response_text))

        # Format the chat history for display with timestamps and clear labels
        formatted_history = "\n".join([f"[{time}] {speaker}: {text}" for time, speaker, text in chat_history])
        
        return formatted_history, output_audio, chat_history

    except Exception as e:
        print(f"Error in chatbot function: {e}")
        return "Sorry, there was an error processing your request.", None, chat_history

# Gradio interface for real-time interaction with chat history display
iface = gr.Interface(
    fn=chatbot,
    inputs=gr.Audio(type="filepath"),
    outputs=[
        gr.Textbox(label="Chat History"),  # Display chat history
        gr.Audio(type="filepath", label="Response Audio"),
    ],
    live=True,
    title="Voice to Voice Chatbot",
    description="Upload your audio, and the chatbot will transcribe and respond to it with a synthesized response.",
    theme="default",
    css=''' 
        body {
            background-image: url("https://huggingface.co/spaces/abdullahzunorain/voice-to-voice-Chatbot/resolve/main/BG_1.jpg");
            background-size: cover;
            background-position: center;
            background-repeat: no-repeat;
            color: white;
            font-family: Arial, sans-serif;
        }
        # .gradio-container {
        #     background-color: rgba(0, 0, 0, 0.6);
        #     padding: 20px;
        #     border-radius: 8px;
        #     box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2);
        # }
        # h1, h2, p, .gradio-label {
        #     color: #FFD700;  /* Gold color for labels and titles */
        # }
        # .gradio-button {
        #     background-color: #FFD700;
        #     color: black;
        #     border-radius: 4px;
        #     font-weight: bold;
        # }
        # .gradio-input {
        #     background-color: rgba(255, 255, 255, 0.9);
        #     border-radius: 4px;
        # }
    '''
)

# Launch the Gradio app
if __name__ == "__main__":
    iface.launch()