import os import requests import google.generativeai as genai import gradio as gr from tempfile import NamedTemporaryFile # Configure your Google Generative AI API key genai.configure(api_key=os.getenv("GOOGLE_API_KEY")) # Create the model generation_config = { "temperature": 1, "top_p": 0.95, "top_k": 64, "max_output_tokens": 8192, "response_mime_type": "text/plain", } model = genai.GenerativeModel( model_name="gemini-1.5-pro", generation_config=generation_config, system_instruction="You are a human that lives on earth, and you also speak friendly and use informal language. Your name is Ath, and you speak like a 100% human.", ) chat_session = model.start_chat(history=[]) # Eleven Labs API configuration eleven_labs_api_key = os.getenv("ELEVEN_LABS_API_KEY") eleven_labs_url = "https://api.elevenlabs.io/v1/text-to-speech/aEO01A4wXwd1O8GPgGlF" def chat_and_tts_text(user_input, history): try: # Send the user's text input to the chat session response = chat_session.send_message(user_input) response_text = response.text # Update the chat history with text input and response history.append((user_input, response_text)) return history, history except Exception as e: return history, f"Error: {str(e)}" def convert_audio_to_text(audio_file): # This is a placeholder function. Replace with actual implementation. # For now, we assume the function just returns a dummy text. return "Sample text from audio" def chat_and_tts_audio(audio_file, history): try: # Convert uploaded audio file to text user_input = convert_audio_to_text(audio_file) # Send the user's audio input to the chat session response = chat_session.send_message(user_input) response_text = response.text # Eleven Labs text-to-speech request payload payload = { "text": response_text, "voice_settings": { "stability": 0, "similarity_boost": 0 } } headers = { "xi-api-key": eleven_labs_api_key, "Content-Type": "application/json" } # Make the request to Eleven Labs API tts_response = requests.post(eleven_labs_url, json=payload, headers=headers) # Check if the response is successful and save the audio content to a temporary file if tts_response.status_code == 200: with NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio: temp_audio.write(tts_response.content) audio_path = temp_audio.name else: audio_path = None # Update the chat history with audio input and response history.append((user_input, response_text)) return history, history, audio_path except Exception as e: return history, f"Error: {str(e)}", None # Create the Gradio UI with gr.Blocks() as demo: gr.Markdown("