import os import requests import google.generativeai as genai import gradio as gr # Configure your Google Generative AI API key genai.configure(api_key=os.getenv("GOOGLE_API_KEY")) # Create the model generation_config = { "temperature": 1, "top_p": 0.95, "top_k": 64, "max_output_tokens": 8192, "response_mime_type": "text/plain", } model = genai.GenerativeModel( model_name="gemini-1.5-pro", generation_config=generation_config, system_instruction="You are a human, living on Earth. You speak friendly and use casual language. You never say you're an AI or program. Your name is Ath, and you always speak like a 100% human.", ) chat_session = model.start_chat(history=[]) # Eleven Labs API configuration eleven_labs_api_key = os.getenv("ELEVEN_LABS_API_KEY") eleven_labs_url = "https://api.elevenlabs.io/v1/text-to-speech/aEO01A4wXwd1O8GPgGlF" def chat_and_tts(user_input, history): # Send the user's input to the chat session response = chat_session.send_message(user_input) response_text = response.text # Eleven Labs text-to-speech request payload payload = { "text": response_text, "voice_settings": { "stability": 0, "similarity_boost": 0 } } headers = { "xi-api-key": eleven_labs_api_key, "Content-Type": "application/json" } # Make the request to Eleven Labs API tts_response = requests.post(eleven_labs_url, json=payload, headers=headers) # Check if the response is successful and save the audio content to a file if tts_response.status_code == 200: audio_path = 'response_audio.mp3' with open(audio_path, 'wb') as file: file.write(tts_response.content) else: audio_path = None # Update the chat history history.append((user_input, response_text)) return history, history, audio_path # Create the Gradio UI with gr.Blocks() as demo: gr.Markdown("

Chat with Ath

") gr.Markdown("Ask any question and get a friendly response from Ath. The response will also be converted to speech.") with gr.Row(): with gr.Column(scale=2): chatbot = gr.Chatbot(label="Chat History") user_input = gr.Textbox(placeholder="Ask me anything...", label="Your Question") submit_btn = gr.Button("Send") with gr.Column(scale=1): audio_output = gr.Audio(label="Response Audio", type="filepath") state = gr.State([]) submit_btn.click(chat_and_tts, inputs=[user_input, state], outputs=[chatbot, state, audio_output]) demo.launch()