Spaces:
Build error
Build error
| import whisper | |
| import os | |
| from gtts import gTTS | |
| import gradio as gr | |
| from groq import Groq | |
| from datetime import datetime # Import datetime to handle timestamps | |
| from IPython.display import HTML | |
| # Load a smaller Whisper model for faster processing | |
| try: | |
| model = whisper.load_model("tiny") | |
| except Exception as e: | |
| print(f"Error loading Whisper model: {e}") | |
| model = None | |
| # Set up Groq API client using environment variable | |
| GROQ_API_TOKEN = os.getenv("GROQ_API") | |
| if not GROQ_API_TOKEN: | |
| raise ValueError("Groq API token is missing. Set 'GROQ_API' in your environment variables.") | |
| client = Groq(api_key=GROQ_API_TOKEN) | |
| # Initialize the chat history | |
| chat_history = [] | |
| # Function to get the LLM response from Groq with timeout handling | |
| def get_llm_response(user_input, role="detailed responder"): | |
| prompt = f"As an expert, provide a detailed and knowledgeable response: {user_input}" if role == "expert" else \ | |
| f"As a good assistant, provide a clear, concise, and helpful response: {user_input}" if role == "good assistant" else \ | |
| f"Provide a thorough and detailed response: {user_input}" | |
| try: | |
| chat_completion = client.chat.completions.create( | |
| messages=[{"role": "user", "content": user_input}], | |
| model="llama3-8b-8192", # Replace with your desired model | |
| timeout=20 # Increased timeout to 20 seconds | |
| ) | |
| return chat_completion.choices[0].message.content | |
| except Exception as e: | |
| print(f"Error during LLM response retrieval: {e}") | |
| return "Sorry, there was an error retrieving the response. Please try again." | |
| # Function to convert text to speech using gTTS | |
| def text_to_speech(text): | |
| try: | |
| tts = gTTS(text) | |
| output_audio = "output_audio.mp3" | |
| tts.save(output_audio) | |
| return output_audio | |
| except Exception as e: | |
| print(f"Error generating TTS: {e}") | |
| return None | |
| # Main chatbot function to handle audio input and output with chat history | |
| def chatbot(audio): | |
| if not model: | |
| return "Error: Whisper model is not available.", None, chat_history | |
| if not audio: | |
| return "No audio provided. Please upload a valid audio file.", None, chat_history | |
| try: | |
| # Step 1: Transcribe the audio using Whisper | |
| result = model.transcribe(audio) | |
| user_text = result.get("text", "") | |
| if not user_text.strip(): | |
| return "Could not understand the audio. Please try speaking more clearly.", None, chat_history | |
| # Get current timestamp | |
| timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
| # Display transcription in chat history | |
| chat_history.append((timestamp, "User", user_text)) | |
| # Step 2: Get LLM response from Groq | |
| response_text = get_llm_response(user_text) | |
| # Step 3: Convert the response text to speech | |
| output_audio = text_to_speech(response_text) | |
| # Append the latest interaction to the chat history | |
| chat_history.append((timestamp, "Chatbot", response_text)) | |
| # Format the chat history for display with timestamps and clear labels | |
| formatted_history = "\n".join([f"[{time}] {speaker}: {text}" for time, speaker, text in chat_history]) | |
| return formatted_history, output_audio, chat_history | |
| except Exception as e: | |
| print(f"Error in chatbot function: {e}") | |
| return "Sorry, there was an error processing your request.", None, chat_history | |
| # Inject custom CSS for background and styling | |
| def set_background(): | |
| return HTML(''' | |
| <style> | |
| body { | |
| background-image: url("https://raw.githubusercontent.com/username/repository/main/path/to/your-image.png"); | |
| background-size: cover; | |
| background-position: center; | |
| background-repeat: no-repeat; | |
| color: white; | |
| font-family: Arial, sans-serif; | |
| } | |
| .gradio-container { | |
| background-color: rgba(0, 0, 0, 0.6); | |
| padding: 20px; | |
| border-radius: 8px; | |
| box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2); | |
| } | |
| h1, h2, p, .gradio-label { | |
| color: #FFD700; /* Gold color for labels and titles */ | |
| } | |
| .gradio-button { | |
| background-color: #FFD700; | |
| color: black; | |
| border-radius: 4px; | |
| font-weight: bold; | |
| } | |
| .gradio-input { | |
| background-color: rgba(255, 255, 255, 0.9); | |
| border-radius: 4px; | |
| } | |
| </style> | |
| ''') | |
| # Display custom background styling | |
| set_background() | |
| # Gradio interface for real-time interaction with chat history display | |
| iface = gr.Interface( | |
| fn=chatbot, | |
| inputs=gr.Audio(type="filepath"), | |
| outputs=[ | |
| gr.Textbox(label="Chat History"), # Display chat history | |
| gr.Audio(type="filepath", label="Response Audio"), | |
| ], | |
| live=True, | |
| title="Stylish Audio Chatbot with Groq API", | |
| description="Upload your audio, and the chatbot will transcribe and respond to it with a synthesized response.", | |
| theme="default" | |
| ) | |
| # Launch the Gradio app | |
| if __name__ == "__main__": | |
| iface.launch() | |
| # import whisper | |
| # import os | |
| # from gtts import gTTS | |
| # import gradio as gr | |
| # from groq import Groq | |
| # from datetime import datetime # Import datetime to handle timestamps | |
| # # Load a smaller Whisper model for faster processing | |
| # try: | |
| # model = whisper.load_model("tiny") | |
| # except Exception as e: | |
| # print(f"Error loading Whisper model: {e}") | |
| # model = None | |
| # # Set up Groq API client using environment variable | |
| # GROQ_API_TOKEN = os.getenv("GROQ_API") | |
| # if not GROQ_API_TOKEN: | |
| # raise ValueError("Groq API token is missing. Set 'GROQ_API' in your environment variables.") | |
| # client = Groq(api_key=GROQ_API_TOKEN) | |
| # # Initialize the chat history | |
| # chat_history = [] | |
| # # Function to get the LLM response from Groq with timeout handling | |
| # def get_llm_response(user_input, role="detailed responder"): | |
| # prompt = f"As an expert, provide a detailed and knowledgeable response: {user_input}" if role == "expert" else \ | |
| # f"As a good assistant, provide a clear, concise, and helpful response: {user_input}" if role == "good assistant" else \ | |
| # f"Provide a thorough and detailed response: {user_input}" | |
| # try: | |
| # chat_completion = client.chat.completions.create( | |
| # messages=[{"role": "user", "content": user_input}], | |
| # model="llama3-8b-8192", # Replace with your desired model | |
| # timeout=20 # Increased timeout to 20 seconds | |
| # ) | |
| # return chat_completion.choices[0].message.content | |
| # except Exception as e: | |
| # print(f"Error during LLM response retrieval: {e}") | |
| # return "Sorry, there was an error retrieving the response. Please try again." | |
| # # Function to convert text to speech using gTTS | |
| # def text_to_speech(text): | |
| # try: | |
| # tts = gTTS(text) | |
| # output_audio = "output_audio.mp3" | |
| # tts.save(output_audio) | |
| # return output_audio | |
| # except Exception as e: | |
| # print(f"Error generating TTS: {e}") | |
| # return None | |
| # # Main chatbot function to handle audio input and output with chat history | |
| # def chatbot(audio): | |
| # if not model: | |
| # return "Error: Whisper model is not available.", None, chat_history | |
| # if not audio: | |
| # return "No audio provided. Please upload a valid audio file.", None, chat_history | |
| # try: | |
| # # Step 1: Transcribe the audio using Whisper | |
| # result = model.transcribe(audio) | |
| # user_text = result.get("text", "") | |
| # if not user_text.strip(): | |
| # return "Could not understand the audio. Please try speaking more clearly.", None, chat_history | |
| # # Get current timestamp | |
| # timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
| # # Display transcription in chat history | |
| # chat_history.append((timestamp, "User", user_text)) | |
| # # Step 2: Get LLM response from Groq | |
| # response_text = get_llm_response(user_text) | |
| # # Step 3: Convert the response text to speech | |
| # output_audio = text_to_speech(response_text) | |
| # # Append the latest interaction to the chat history | |
| # chat_history.append((timestamp, "Chatbot", response_text)) | |
| # # Format the chat history for display with timestamps and clear labels | |
| # formatted_history = "\n".join([f"[{time}] {speaker}: {text}" for time, speaker, text in chat_history]) | |
| # return formatted_history, output_audio, chat_history | |
| # except Exception as e: | |
| # print(f"Error in chatbot function: {e}") | |
| # return "Sorry, there was an error processing your request.", None, chat_history | |
| # # Gradio interface for real-time interaction with chat history display | |
| # iface = gr.Interface( | |
| # fn=chatbot, | |
| # inputs=gr.Audio(type="filepath"), | |
| # outputs=[ | |
| # gr.Textbox(label="Chat History"), # Display chat history | |
| # gr.Audio(type="filepath", label="Response Audio"), | |
| # ], | |
| # live=True, | |
| # title="Audio Chatbot with Groq API", | |
| # description="Upload your audio, and the chatbot will transcribe and respond to it with a synthesized response.", | |
| # theme="default" | |
| # ) | |
| # # Launch the Gradio app | |
| # if __name__ == "__main__": | |
| # iface.launch() | |