Spaces:

Artificial-superintelligence
/

ATHAI

Running

App Files Files Community

Ath commited on Jun 13, 2024

Commit

b0d06ab

verified ·

1 Parent(s): dc524ae

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -21

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import os
 import requests
 import google.generativeai as genai
 import gradio as gr
 # Configure your Google Generative AI API key
 genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
@@ -18,7 +19,7 @@ generation_config = {
 model = genai.GenerativeModel(
     model_name="gemini-1.5-pro",
     generation_config=generation_config,
-    system_instruction="You are an human that lives on earth, and you also speak friendly,amd you also use bro words,and you never say your AI, your name is ath,you 100% human like speak",
 )
 chat_session = model.start_chat(history=[])
@@ -27,12 +28,27 @@ chat_session = model.start_chat(history=[])
 eleven_labs_api_key = os.getenv("ELEVEN_LABS_API_KEY")
 eleven_labs_url = "https://api.elevenlabs.io/v1/text-to-speech/aEO01A4wXwd1O8GPgGlF"
-def chat_and_tts(user_input, history):
-    # Send the user's input to the chat session
     response = chat_session.send_message(user_input)
     response_text = response.text
-    print("Response from chat session:", response_text)  # Debug response text
     # Eleven Labs text-to-speech request payload
     payload = {
@@ -50,40 +66,37 @@ def chat_and_tts(user_input, history):
     # Make the request to Eleven Labs API
     tts_response = requests.post(eleven_labs_url, json=payload, headers=headers)
-    print("TTS API response status code:", tts_response.status_code)  # Debug status code
-    # Check if the response is successful and save the audio content to a file
     if tts_response.status_code == 200:
-        audio_path = 'response_audio.mp3'
-        with open(audio_path, 'wb') as file:
-            file.write(tts_response.content)
-        print("Audio file saved successfully:", audio_path)  # Debug audio file path
     else:
-        print("Failed to generate audio. Status code:", tts_response.status_code)
-        print("Response:", tts_response.text)
         audio_path = None
-    # Update the chat history
-    history.append((user_input, response_text))
-    return history, history, audio_path
 # Create the Gradio UI
 with gr.Blocks() as demo:
     gr.Markdown("<h1 style='text-align: center;'>Chat with Ath</h1>")
-    gr.Markdown("Ask any question and get a friendly response from Ath. The response will also be converted to speech.")
     with gr.Row():
         with gr.Column(scale=2):
             chatbot = gr.Chatbot(label="Chat History")
-            user_input = gr.Textbox(placeholder="Ask me anything...", label="Your Question")
-            submit_btn = gr.Button("Send")
         with gr.Column(scale=1):
             audio_output = gr.Audio(label="Response Audio", type="filepath")
     state = gr.State([])
-    submit_btn.click(chat_and_tts, inputs=[user_input, state], outputs=[chatbot, state, audio_output])
 demo.launch()

 import requests
 import google.generativeai as genai
 import gradio as gr
+from tempfile import NamedTemporaryFile
 # Configure your Google Generative AI API key
 genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
 model = genai.GenerativeModel(
     model_name="gemini-1.5-pro",
     generation_config=generation_config,
+    system_instruction="You are a human that lives on earth, and you also speak friendly and use informal language. Your name is Ath, and you speak like a 100% human.",
 )
 chat_session = model.start_chat(history=[])
 eleven_labs_api_key = os.getenv("ELEVEN_LABS_API_KEY")
 eleven_labs_url = "https://api.elevenlabs.io/v1/text-to-speech/aEO01A4wXwd1O8GPgGlF"
+def chat_and_tts_text(user_input, history):
+    # Send the user's text input to the chat session
     response = chat_session.send_message(user_input)
     response_text = response.text
+    # Update the chat history with text input and response
+    history.append((user_input, response_text))
+    return history, response_text
+def convert_audio_to_text(audio_file):
+    # Function to convert audio to text (you can replace this with your preferred method)
+    return "Sample text from audio"
+def chat_and_tts_audio(audio_file):
+    # Convert uploaded audio file to text
+    user_input = convert_audio_to_text(audio_file)
+    # Send the user's audio input to the chat session
+    response = chat_session.send_message(user_input)
+    response_text = response.text
     # Eleven Labs text-to-speech request payload
     payload = {
     # Make the request to Eleven Labs API
     tts_response = requests.post(eleven_labs_url, json=payload, headers=headers)
+    # Check if the response is successful and save the audio content to a temporary file
     if tts_response.status_code == 200:
+        with NamedTemporaryFile(delete=False) as temp_audio:
+            temp_audio.write(tts_response.content)
+            audio_path = temp_audio.name
     else:
         audio_path = None
+    return response_text, audio_path
 # Create the Gradio UI
 with gr.Blocks() as demo:
     gr.Markdown("<h1 style='text-align: center;'>Chat with Ath</h1>")
+    gr.Markdown("Ask any question by typing or upload an audio file to receive a response from Ath in text and audio format.")
     with gr.Row():
         with gr.Column(scale=2):
             chatbot = gr.Chatbot(label="Chat History")
+            user_input_text = gr.Textbox(placeholder="Type your question...", label="Text Input")
+            submit_btn_text = gr.Button("Send")
+        with gr.Column(scale=2):
+            user_input_audio = gr.File(label="Upload Audio", type="audio", accept=".wav,.mp3,.ogg")
+            submit_btn_audio = gr.Button("Send")
         with gr.Column(scale=1):
             audio_output = gr.Audio(label="Response Audio", type="filepath")
     state = gr.State([])
+    submit_btn_text.click(chat_and_tts_text, inputs=[user_input_text, state], outputs=[chatbot, state])
+    submit_btn_audio.click(chat_and_tts_audio, inputs=[user_input_audio], outputs=[chatbot, audio_output])
 demo.launch()