import whisper import os from gtts import gTTS import gradio as gr from groq import Groq from datetime import datetime # Import datetime to handle timestamps from IPython.display import HTML # Load a smaller Whisper model for faster processing try: model = whisper.load_model("tiny") except Exception as e: print(f"Error loading Whisper model: {e}") model = None # Set up Groq API client using environment variable GROQ_API_TOKEN = os.getenv("GROQ_API") if not GROQ_API_TOKEN: raise ValueError("Groq API token is missing. Set 'GROQ_API' in your environment variables.") client = Groq(api_key=GROQ_API_TOKEN) # Initialize the chat history chat_history = [] # Function to get the LLM response from Groq with timeout handling def get_llm_response(user_input, role="detailed responder"): prompt = f"As an expert, provide a detailed and knowledgeable response: {user_input}" if role == "expert" else \ f"As a good assistant, provide a clear, concise, and helpful response: {user_input}" if role == "good assistant" else \ f"Provide a thorough and detailed response: {user_input}" try: chat_completion = client.chat.completions.create( messages=[{"role": "user", "content": user_input}], model="llama3-8b-8192", # Replace with your desired model timeout=20 # Increased timeout to 20 seconds ) return chat_completion.choices[0].message.content except Exception as e: print(f"Error during LLM response retrieval: {e}") return "Sorry, there was an error retrieving the response. Please try again." # Function to convert text to speech using gTTS def text_to_speech(text): try: tts = gTTS(text) output_audio = "output_audio.mp3" tts.save(output_audio) return output_audio except Exception as e: print(f"Error generating TTS: {e}") return None # Main chatbot function to handle audio input and output with chat history def chatbot(audio): if not model: return "Error: Whisper model is not available.", None, chat_history if not audio: return "No audio provided. Please upload a valid audio file.", None, chat_history try: # Step 1: Transcribe the audio using Whisper result = model.transcribe(audio) user_text = result.get("text", "") if not user_text.strip(): return "Could not understand the audio. Please try speaking more clearly.", None, chat_history # Get current timestamp timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") # Display transcription in chat history chat_history.append((timestamp, "User", user_text)) # Step 2: Get LLM response from Groq response_text = get_llm_response(user_text) # Step 3: Convert the response text to speech output_audio = text_to_speech(response_text) # Append the latest interaction to the chat history chat_history.append((timestamp, "Chatbot", response_text)) # Format the chat history for display with timestamps and clear labels formatted_history = "\n".join([f"[{time}] {speaker}: {text}" for time, speaker, text in chat_history]) return formatted_history, output_audio, chat_history except Exception as e: print(f"Error in chatbot function: {e}") return "Sorry, there was an error processing your request.", None, chat_history # Inject custom CSS for background and styling def set_background(): return HTML(''' ''') # Display custom background styling set_background() # Gradio interface for real-time interaction with chat history display iface = gr.Interface( fn=chatbot, inputs=gr.Audio(type="filepath"), outputs=[ gr.Textbox(label="Chat History"), # Display chat history gr.Audio(type="filepath", label="Response Audio"), ], live=True, title="Stylish Audio Chatbot with Groq API", description="Upload your audio, and the chatbot will transcribe and respond to it with a synthesized response.", theme="default" ) # Launch the Gradio app if __name__ == "__main__": iface.launch() # import whisper # import os # from gtts import gTTS # import gradio as gr # from groq import Groq # from datetime import datetime # Import datetime to handle timestamps # # Load a smaller Whisper model for faster processing # try: # model = whisper.load_model("tiny") # except Exception as e: # print(f"Error loading Whisper model: {e}") # model = None # # Set up Groq API client using environment variable # GROQ_API_TOKEN = os.getenv("GROQ_API") # if not GROQ_API_TOKEN: # raise ValueError("Groq API token is missing. Set 'GROQ_API' in your environment variables.") # client = Groq(api_key=GROQ_API_TOKEN) # # Initialize the chat history # chat_history = [] # # Function to get the LLM response from Groq with timeout handling # def get_llm_response(user_input, role="detailed responder"): # prompt = f"As an expert, provide a detailed and knowledgeable response: {user_input}" if role == "expert" else \ # f"As a good assistant, provide a clear, concise, and helpful response: {user_input}" if role == "good assistant" else \ # f"Provide a thorough and detailed response: {user_input}" # try: # chat_completion = client.chat.completions.create( # messages=[{"role": "user", "content": user_input}], # model="llama3-8b-8192", # Replace with your desired model # timeout=20 # Increased timeout to 20 seconds # ) # return chat_completion.choices[0].message.content # except Exception as e: # print(f"Error during LLM response retrieval: {e}") # return "Sorry, there was an error retrieving the response. Please try again." # # Function to convert text to speech using gTTS # def text_to_speech(text): # try: # tts = gTTS(text) # output_audio = "output_audio.mp3" # tts.save(output_audio) # return output_audio # except Exception as e: # print(f"Error generating TTS: {e}") # return None # # Main chatbot function to handle audio input and output with chat history # def chatbot(audio): # if not model: # return "Error: Whisper model is not available.", None, chat_history # if not audio: # return "No audio provided. Please upload a valid audio file.", None, chat_history # try: # # Step 1: Transcribe the audio using Whisper # result = model.transcribe(audio) # user_text = result.get("text", "") # if not user_text.strip(): # return "Could not understand the audio. Please try speaking more clearly.", None, chat_history # # Get current timestamp # timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") # # Display transcription in chat history # chat_history.append((timestamp, "User", user_text)) # # Step 2: Get LLM response from Groq # response_text = get_llm_response(user_text) # # Step 3: Convert the response text to speech # output_audio = text_to_speech(response_text) # # Append the latest interaction to the chat history # chat_history.append((timestamp, "Chatbot", response_text)) # # Format the chat history for display with timestamps and clear labels # formatted_history = "\n".join([f"[{time}] {speaker}: {text}" for time, speaker, text in chat_history]) # return formatted_history, output_audio, chat_history # except Exception as e: # print(f"Error in chatbot function: {e}") # return "Sorry, there was an error processing your request.", None, chat_history # # Gradio interface for real-time interaction with chat history display # iface = gr.Interface( # fn=chatbot, # inputs=gr.Audio(type="filepath"), # outputs=[ # gr.Textbox(label="Chat History"), # Display chat history # gr.Audio(type="filepath", label="Response Audio"), # ], # live=True, # title="Audio Chatbot with Groq API", # description="Upload your audio, and the chatbot will transcribe and respond to it with a synthesized response.", # theme="default" # ) # # Launch the Gradio app # if __name__ == "__main__": # iface.launch()