Spaces:

Artificial-superintelligence
/

ATHAI

Running

ATHAI / app.py

Ath

Update app.py

f58d77b verified about 1 year ago

3.85 kB

	import os
	import requests
	import google.generativeai as genai
	import gradio as gr
	from tempfile import NamedTemporaryFile
	from pydub import AudioSegment
	from pydub.exceptions import CouldntDecodeError

	# Configure your Google Generative AI API key
	genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))

	# Create the model
	generation_config = {
	"temperature": 1,
	"top_p": 0.95,
	"top_k": 64,
	"max_output_tokens": 8192,
	"response_mime_type": "text/plain",
	}

	model = genai.GenerativeModel(
	model_name="gemini-1.5-pro",
	generation_config=generation_config,
	system_instruction="You are a human that lives on earth, and you also speak friendly and use informal language. Your name is Ath, and you speak like a 100% human.",
	)

	chat_session = model.start_chat(history=[])

	# Eleven Labs API configuration
	eleven_labs_api_key = os.getenv("ELEVEN_LABS_API_KEY")
	eleven_labs_url = "https://api.elevenlabs.io/v1/text-to-speech/aEO01A4wXwd1O8GPgGlF"

	def chat_and_tts_text(user_input, history):
	# Send the user's text input to the chat session
	response = chat_session.send_message(user_input)
	response_text = response.text

	# Update the chat history with text input and response
	history.append((user_input, response_text))

	return history, response_text

	def convert_audio_to_text(audio_file):
	# Function to convert audio to text (you can replace this with your preferred method)
	try:
	sound = AudioSegment.from_file(audio_file)
	return sound.export(format="wav") # Export as WAV for TTS API (adjust format as needed)
	except CouldntDecodeError:
	return None

	def chat_and_tts_audio(audio_file):
	# Convert uploaded audio file to text
	converted_audio = convert_audio_to_text(audio_file)
	if not converted_audio:
	return "Error: Could not decode audio file.", None

	# Send the audio text to the chat session
	response = chat_session.send_message(converted_audio)
	response_text = response.text

	# Eleven Labs text-to-speech request payload
	payload = {
	"text": response_text,
	"voice_settings": {
	"stability": 0,
	"similarity_boost": 0
	}
	}
	headers = {
	"xi-api-key": eleven_labs_api_key,
	"Content-Type": "application/json"
	}

	# Make the request to Eleven Labs API
	tts_response = requests.post(eleven_labs_url, json=payload, headers=headers)

	# Check if the response is successful and save the audio content to a temporary file
	if tts_response.status_code == 200:
	with NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
	temp_audio.write(tts_response.content)
	audio_path = temp_audio.name
	else:
	audio_path = None

	return response_text, audio_path

	# Create the Gradio UI
	with gr.Blocks() as demo:
	gr.Markdown("<h1 style='text-align: center;'>Chat with Ath</h1>")
	gr.Markdown("Ask any question by typing or upload an audio file to receive a response from Ath in text and audio format.")

	with gr.Row():
	with gr.Column(scale=2):
	chatbot = gr.Chatbot(label="Chat History")
	user_input_text = gr.Textbox(placeholder="Type your question...", label="Text Input")
	submit_btn_text = gr.Button("Send")

	with gr.Column(scale=2):
	user_input_audio = gr.File(label="Upload Audio", type="audio")
	submit_btn_audio = gr.Button("Send")

	with gr.Column(scale=1):
	audio_output = gr.Audio(label="Response Audio", type="filepath")

	state = gr.State([])

	submit_btn_text.click(chat_and_tts_text, inputs=[user_input_text, state], outputs=[chatbot, state])
	submit_btn_audio.click(chat_and_tts_audio, inputs=[user_input_audio], outputs=[chatbot, audio_output])

	demo.launch()