Ath
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -2,6 +2,7 @@ import os
|
|
2 |
import requests
|
3 |
import google.generativeai as genai
|
4 |
import gradio as gr
|
|
|
5 |
|
6 |
# Configure your Google Generative AI API key
|
7 |
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
|
@@ -18,7 +19,7 @@ generation_config = {
|
|
18 |
model = genai.GenerativeModel(
|
19 |
model_name="gemini-1.5-pro",
|
20 |
generation_config=generation_config,
|
21 |
-
system_instruction="You are
|
22 |
)
|
23 |
|
24 |
chat_session = model.start_chat(history=[])
|
@@ -27,12 +28,27 @@ chat_session = model.start_chat(history=[])
|
|
27 |
eleven_labs_api_key = os.getenv("ELEVEN_LABS_API_KEY")
|
28 |
eleven_labs_url = "https://api.elevenlabs.io/v1/text-to-speech/aEO01A4wXwd1O8GPgGlF"
|
29 |
|
30 |
-
def
|
31 |
-
# Send the user's input to the chat session
|
32 |
response = chat_session.send_message(user_input)
|
33 |
response_text = response.text
|
34 |
|
35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
# Eleven Labs text-to-speech request payload
|
38 |
payload = {
|
@@ -50,40 +66,37 @@ def chat_and_tts(user_input, history):
|
|
50 |
# Make the request to Eleven Labs API
|
51 |
tts_response = requests.post(eleven_labs_url, json=payload, headers=headers)
|
52 |
|
53 |
-
|
54 |
-
|
55 |
-
# Check if the response is successful and save the audio content to a file
|
56 |
if tts_response.status_code == 200:
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
print("Audio file saved successfully:", audio_path) # Debug audio file path
|
61 |
else:
|
62 |
-
print("Failed to generate audio. Status code:", tts_response.status_code)
|
63 |
-
print("Response:", tts_response.text)
|
64 |
audio_path = None
|
65 |
|
66 |
-
|
67 |
-
history.append((user_input, response_text))
|
68 |
-
|
69 |
-
return history, history, audio_path
|
70 |
|
71 |
# Create the Gradio UI
|
72 |
with gr.Blocks() as demo:
|
73 |
gr.Markdown("<h1 style='text-align: center;'>Chat with Ath</h1>")
|
74 |
-
gr.Markdown("Ask any question
|
75 |
|
76 |
with gr.Row():
|
77 |
with gr.Column(scale=2):
|
78 |
chatbot = gr.Chatbot(label="Chat History")
|
79 |
-
|
80 |
-
|
|
|
|
|
|
|
|
|
81 |
|
82 |
with gr.Column(scale=1):
|
83 |
audio_output = gr.Audio(label="Response Audio", type="filepath")
|
84 |
|
85 |
state = gr.State([])
|
86 |
|
87 |
-
|
|
|
88 |
|
89 |
demo.launch()
|
|
|
2 |
import requests
|
3 |
import google.generativeai as genai
|
4 |
import gradio as gr
|
5 |
+
from tempfile import NamedTemporaryFile
|
6 |
|
7 |
# Configure your Google Generative AI API key
|
8 |
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
|
|
|
19 |
model = genai.GenerativeModel(
|
20 |
model_name="gemini-1.5-pro",
|
21 |
generation_config=generation_config,
|
22 |
+
system_instruction="You are a human that lives on earth, and you also speak friendly and use informal language. Your name is Ath, and you speak like a 100% human.",
|
23 |
)
|
24 |
|
25 |
chat_session = model.start_chat(history=[])
|
|
|
28 |
eleven_labs_api_key = os.getenv("ELEVEN_LABS_API_KEY")
|
29 |
eleven_labs_url = "https://api.elevenlabs.io/v1/text-to-speech/aEO01A4wXwd1O8GPgGlF"
|
30 |
|
31 |
+
def chat_and_tts_text(user_input, history):
|
32 |
+
# Send the user's text input to the chat session
|
33 |
response = chat_session.send_message(user_input)
|
34 |
response_text = response.text
|
35 |
|
36 |
+
# Update the chat history with text input and response
|
37 |
+
history.append((user_input, response_text))
|
38 |
+
|
39 |
+
return history, response_text
|
40 |
+
|
41 |
+
def convert_audio_to_text(audio_file):
|
42 |
+
# Function to convert audio to text (you can replace this with your preferred method)
|
43 |
+
return "Sample text from audio"
|
44 |
+
|
45 |
+
def chat_and_tts_audio(audio_file):
|
46 |
+
# Convert uploaded audio file to text
|
47 |
+
user_input = convert_audio_to_text(audio_file)
|
48 |
+
|
49 |
+
# Send the user's audio input to the chat session
|
50 |
+
response = chat_session.send_message(user_input)
|
51 |
+
response_text = response.text
|
52 |
|
53 |
# Eleven Labs text-to-speech request payload
|
54 |
payload = {
|
|
|
66 |
# Make the request to Eleven Labs API
|
67 |
tts_response = requests.post(eleven_labs_url, json=payload, headers=headers)
|
68 |
|
69 |
+
# Check if the response is successful and save the audio content to a temporary file
|
|
|
|
|
70 |
if tts_response.status_code == 200:
|
71 |
+
with NamedTemporaryFile(delete=False) as temp_audio:
|
72 |
+
temp_audio.write(tts_response.content)
|
73 |
+
audio_path = temp_audio.name
|
|
|
74 |
else:
|
|
|
|
|
75 |
audio_path = None
|
76 |
|
77 |
+
return response_text, audio_path
|
|
|
|
|
|
|
78 |
|
79 |
# Create the Gradio UI
|
80 |
with gr.Blocks() as demo:
|
81 |
gr.Markdown("<h1 style='text-align: center;'>Chat with Ath</h1>")
|
82 |
+
gr.Markdown("Ask any question by typing or upload an audio file to receive a response from Ath in text and audio format.")
|
83 |
|
84 |
with gr.Row():
|
85 |
with gr.Column(scale=2):
|
86 |
chatbot = gr.Chatbot(label="Chat History")
|
87 |
+
user_input_text = gr.Textbox(placeholder="Type your question...", label="Text Input")
|
88 |
+
submit_btn_text = gr.Button("Send")
|
89 |
+
|
90 |
+
with gr.Column(scale=2):
|
91 |
+
user_input_audio = gr.File(label="Upload Audio", type="audio", accept=".wav,.mp3,.ogg")
|
92 |
+
submit_btn_audio = gr.Button("Send")
|
93 |
|
94 |
with gr.Column(scale=1):
|
95 |
audio_output = gr.Audio(label="Response Audio", type="filepath")
|
96 |
|
97 |
state = gr.State([])
|
98 |
|
99 |
+
submit_btn_text.click(chat_and_tts_text, inputs=[user_input_text, state], outputs=[chatbot, state])
|
100 |
+
submit_btn_audio.click(chat_and_tts_audio, inputs=[user_input_audio], outputs=[chatbot, audio_output])
|
101 |
|
102 |
demo.launch()
|