Spaces:

abdullahzunorain
/

voice-to-voice-Chatbot

Sleeping

App Files Files Community

voice-to-voice-Chatbot / app.py

abdullahzunorain

Update app.py

b35c19e verified 9 months ago

raw

history blame

9.48 kB

	import whisper
	import os
	from gtts import gTTS
	import gradio as gr
	from groq import Groq
	from datetime import datetime # Import datetime to handle timestamps
	from IPython.display import HTML

	# Load a smaller Whisper model for faster processing
	try:
	model = whisper.load_model("tiny")
	except Exception as e:
	print(f"Error loading Whisper model: {e}")
	model = None

	# Set up Groq API client using environment variable
	GROQ_API_TOKEN = os.getenv("GROQ_API")
	if not GROQ_API_TOKEN:
	raise ValueError("Groq API token is missing. Set 'GROQ_API' in your environment variables.")
	client = Groq(api_key=GROQ_API_TOKEN)

	# Initialize the chat history
	chat_history = []

	# Function to get the LLM response from Groq with timeout handling
	def get_llm_response(user_input, role="detailed responder"):
	prompt = f"As an expert, provide a detailed and knowledgeable response: {user_input}" if role == "expert" else \
	f"As a good assistant, provide a clear, concise, and helpful response: {user_input}" if role == "good assistant" else \
	f"Provide a thorough and detailed response: {user_input}"

	try:
	chat_completion = client.chat.completions.create(
	messages=[{"role": "user", "content": user_input}],
	model="llama3-8b-8192", # Replace with your desired model
	timeout=20 # Increased timeout to 20 seconds
	)
	return chat_completion.choices[0].message.content
	except Exception as e:
	print(f"Error during LLM response retrieval: {e}")
	return "Sorry, there was an error retrieving the response. Please try again."

	# Function to convert text to speech using gTTS
	def text_to_speech(text):
	try:
	tts = gTTS(text)
	output_audio = "output_audio.mp3"
	tts.save(output_audio)
	return output_audio
	except Exception as e:
	print(f"Error generating TTS: {e}")
	return None

	# Main chatbot function to handle audio input and output with chat history
	def chatbot(audio):
	if not model:
	return "Error: Whisper model is not available.", None, chat_history

	if not audio:
	return "No audio provided. Please upload a valid audio file.", None, chat_history

	try:
	# Step 1: Transcribe the audio using Whisper
	result = model.transcribe(audio)
	user_text = result.get("text", "")
	if not user_text.strip():
	return "Could not understand the audio. Please try speaking more clearly.", None, chat_history

	# Get current timestamp
	timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

	# Display transcription in chat history
	chat_history.append((timestamp, "User", user_text))

	# Step 2: Get LLM response from Groq
	response_text = get_llm_response(user_text)

	# Step 3: Convert the response text to speech
	output_audio = text_to_speech(response_text)

	# Append the latest interaction to the chat history
	chat_history.append((timestamp, "Chatbot", response_text))

	# Format the chat history for display with timestamps and clear labels
	formatted_history = "\n".join([f"[{time}] {speaker}: {text}" for time, speaker, text in chat_history])

	return formatted_history, output_audio, chat_history

	except Exception as e:
	print(f"Error in chatbot function: {e}")
	return "Sorry, there was an error processing your request.", None, chat_history

	# Inject custom CSS for background and styling
	def set_background():
	return HTML('''
	<style>
	body {
	background-image: url("https://raw.githubusercontent.com/username/repository/main/path/to/your-image.png");
	background-size: cover;
	background-position: center;
	background-repeat: no-repeat;
	color: white;
	font-family: Arial, sans-serif;
	}
	.gradio-container {
	background-color: rgba(0, 0, 0, 0.6);
	padding: 20px;
	border-radius: 8px;
	box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2);
	}
	h1, h2, p, .gradio-label {
	color: #FFD700; /* Gold color for labels and titles */
	}
	.gradio-button {
	background-color: #FFD700;
	color: black;
	border-radius: 4px;
	font-weight: bold;
	}
	.gradio-input {
	background-color: rgba(255, 255, 255, 0.9);
	border-radius: 4px;
	}
	</style>
	''')

	# Display custom background styling
	set_background()

	# Gradio interface for real-time interaction with chat history display
	iface = gr.Interface(
	fn=chatbot,
	inputs=gr.Audio(type="filepath"),
	outputs=[
	gr.Textbox(label="Chat History"), # Display chat history
	gr.Audio(type="filepath", label="Response Audio"),
	],
	live=True,
	title="Stylish Audio Chatbot with Groq API",
	description="Upload your audio, and the chatbot will transcribe and respond to it with a synthesized response.",
	theme="default"
	)

	# Launch the Gradio app
	if __name__ == "__main__":
	iface.launch()











	# import whisper
	# import os
	# from gtts import gTTS
	# import gradio as gr
	# from groq import Groq
	# from datetime import datetime # Import datetime to handle timestamps

	# # Load a smaller Whisper model for faster processing
	# try:
	# model = whisper.load_model("tiny")
	# except Exception as e:
	# print(f"Error loading Whisper model: {e}")
	# model = None

	# # Set up Groq API client using environment variable
	# GROQ_API_TOKEN = os.getenv("GROQ_API")
	# if not GROQ_API_TOKEN:
	# raise ValueError("Groq API token is missing. Set 'GROQ_API' in your environment variables.")
	# client = Groq(api_key=GROQ_API_TOKEN)

	# # Initialize the chat history
	# chat_history = []

	# # Function to get the LLM response from Groq with timeout handling
	# def get_llm_response(user_input, role="detailed responder"):
	# prompt = f"As an expert, provide a detailed and knowledgeable response: {user_input}" if role == "expert" else \
	# f"As a good assistant, provide a clear, concise, and helpful response: {user_input}" if role == "good assistant" else \
	# f"Provide a thorough and detailed response: {user_input}"

	# try:
	# chat_completion = client.chat.completions.create(
	# messages=[{"role": "user", "content": user_input}],
	# model="llama3-8b-8192", # Replace with your desired model
	# timeout=20 # Increased timeout to 20 seconds
	# )
	# return chat_completion.choices[0].message.content
	# except Exception as e:
	# print(f"Error during LLM response retrieval: {e}")
	# return "Sorry, there was an error retrieving the response. Please try again."

	# # Function to convert text to speech using gTTS
	# def text_to_speech(text):
	# try:
	# tts = gTTS(text)
	# output_audio = "output_audio.mp3"
	# tts.save(output_audio)
	# return output_audio
	# except Exception as e:
	# print(f"Error generating TTS: {e}")
	# return None

	# # Main chatbot function to handle audio input and output with chat history
	# def chatbot(audio):
	# if not model:
	# return "Error: Whisper model is not available.", None, chat_history

	# if not audio:
	# return "No audio provided. Please upload a valid audio file.", None, chat_history

	# try:
	# # Step 1: Transcribe the audio using Whisper
	# result = model.transcribe(audio)
	# user_text = result.get("text", "")
	# if not user_text.strip():
	# return "Could not understand the audio. Please try speaking more clearly.", None, chat_history

	# # Get current timestamp
	# timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

	# # Display transcription in chat history
	# chat_history.append((timestamp, "User", user_text))

	# # Step 2: Get LLM response from Groq
	# response_text = get_llm_response(user_text)

	# # Step 3: Convert the response text to speech
	# output_audio = text_to_speech(response_text)

	# # Append the latest interaction to the chat history
	# chat_history.append((timestamp, "Chatbot", response_text))

	# # Format the chat history for display with timestamps and clear labels
	# formatted_history = "\n".join([f"[{time}] {speaker}: {text}" for time, speaker, text in chat_history])

	# return formatted_history, output_audio, chat_history

	# except Exception as e:
	# print(f"Error in chatbot function: {e}")
	# return "Sorry, there was an error processing your request.", None, chat_history

	# # Gradio interface for real-time interaction with chat history display
	# iface = gr.Interface(
	# fn=chatbot,
	# inputs=gr.Audio(type="filepath"),
	# outputs=[
	# gr.Textbox(label="Chat History"), # Display chat history
	# gr.Audio(type="filepath", label="Response Audio"),
	# ],
	# live=True,
	# title="Audio Chatbot with Groq API",
	# description="Upload your audio, and the chatbot will transcribe and respond to it with a synthesized response.",
	# theme="default"
	# )

	# # Launch the Gradio app
	# if __name__ == "__main__":
	# iface.launch()