Ath commited on
Commit
b0d06ab
·
verified ·
1 Parent(s): dc524ae

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -21
app.py CHANGED
@@ -2,6 +2,7 @@ import os
2
  import requests
3
  import google.generativeai as genai
4
  import gradio as gr
 
5
 
6
  # Configure your Google Generative AI API key
7
  genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
@@ -18,7 +19,7 @@ generation_config = {
18
  model = genai.GenerativeModel(
19
  model_name="gemini-1.5-pro",
20
  generation_config=generation_config,
21
- system_instruction="You are an human that lives on earth, and you also speak friendly,amd you also use bro words,and you never say your AI, your name is ath,you 100% human like speak",
22
  )
23
 
24
  chat_session = model.start_chat(history=[])
@@ -27,12 +28,27 @@ chat_session = model.start_chat(history=[])
27
  eleven_labs_api_key = os.getenv("ELEVEN_LABS_API_KEY")
28
  eleven_labs_url = "https://api.elevenlabs.io/v1/text-to-speech/aEO01A4wXwd1O8GPgGlF"
29
 
30
- def chat_and_tts(user_input, history):
31
- # Send the user's input to the chat session
32
  response = chat_session.send_message(user_input)
33
  response_text = response.text
34
 
35
- print("Response from chat session:", response_text) # Debug response text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
  # Eleven Labs text-to-speech request payload
38
  payload = {
@@ -50,40 +66,37 @@ def chat_and_tts(user_input, history):
50
  # Make the request to Eleven Labs API
51
  tts_response = requests.post(eleven_labs_url, json=payload, headers=headers)
52
 
53
- print("TTS API response status code:", tts_response.status_code) # Debug status code
54
-
55
- # Check if the response is successful and save the audio content to a file
56
  if tts_response.status_code == 200:
57
- audio_path = 'response_audio.mp3'
58
- with open(audio_path, 'wb') as file:
59
- file.write(tts_response.content)
60
- print("Audio file saved successfully:", audio_path) # Debug audio file path
61
  else:
62
- print("Failed to generate audio. Status code:", tts_response.status_code)
63
- print("Response:", tts_response.text)
64
  audio_path = None
65
 
66
- # Update the chat history
67
- history.append((user_input, response_text))
68
-
69
- return history, history, audio_path
70
 
71
  # Create the Gradio UI
72
  with gr.Blocks() as demo:
73
  gr.Markdown("<h1 style='text-align: center;'>Chat with Ath</h1>")
74
- gr.Markdown("Ask any question and get a friendly response from Ath. The response will also be converted to speech.")
75
 
76
  with gr.Row():
77
  with gr.Column(scale=2):
78
  chatbot = gr.Chatbot(label="Chat History")
79
- user_input = gr.Textbox(placeholder="Ask me anything...", label="Your Question")
80
- submit_btn = gr.Button("Send")
 
 
 
 
81
 
82
  with gr.Column(scale=1):
83
  audio_output = gr.Audio(label="Response Audio", type="filepath")
84
 
85
  state = gr.State([])
86
 
87
- submit_btn.click(chat_and_tts, inputs=[user_input, state], outputs=[chatbot, state, audio_output])
 
88
 
89
  demo.launch()
 
2
  import requests
3
  import google.generativeai as genai
4
  import gradio as gr
5
+ from tempfile import NamedTemporaryFile
6
 
7
  # Configure your Google Generative AI API key
8
  genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
 
19
  model = genai.GenerativeModel(
20
  model_name="gemini-1.5-pro",
21
  generation_config=generation_config,
22
+ system_instruction="You are a human that lives on earth, and you also speak friendly and use informal language. Your name is Ath, and you speak like a 100% human.",
23
  )
24
 
25
  chat_session = model.start_chat(history=[])
 
28
  eleven_labs_api_key = os.getenv("ELEVEN_LABS_API_KEY")
29
  eleven_labs_url = "https://api.elevenlabs.io/v1/text-to-speech/aEO01A4wXwd1O8GPgGlF"
30
 
31
+ def chat_and_tts_text(user_input, history):
32
+ # Send the user's text input to the chat session
33
  response = chat_session.send_message(user_input)
34
  response_text = response.text
35
 
36
+ # Update the chat history with text input and response
37
+ history.append((user_input, response_text))
38
+
39
+ return history, response_text
40
+
41
+ def convert_audio_to_text(audio_file):
42
+ # Function to convert audio to text (you can replace this with your preferred method)
43
+ return "Sample text from audio"
44
+
45
+ def chat_and_tts_audio(audio_file):
46
+ # Convert uploaded audio file to text
47
+ user_input = convert_audio_to_text(audio_file)
48
+
49
+ # Send the user's audio input to the chat session
50
+ response = chat_session.send_message(user_input)
51
+ response_text = response.text
52
 
53
  # Eleven Labs text-to-speech request payload
54
  payload = {
 
66
  # Make the request to Eleven Labs API
67
  tts_response = requests.post(eleven_labs_url, json=payload, headers=headers)
68
 
69
+ # Check if the response is successful and save the audio content to a temporary file
 
 
70
  if tts_response.status_code == 200:
71
+ with NamedTemporaryFile(delete=False) as temp_audio:
72
+ temp_audio.write(tts_response.content)
73
+ audio_path = temp_audio.name
 
74
  else:
 
 
75
  audio_path = None
76
 
77
+ return response_text, audio_path
 
 
 
78
 
79
  # Create the Gradio UI
80
  with gr.Blocks() as demo:
81
  gr.Markdown("<h1 style='text-align: center;'>Chat with Ath</h1>")
82
+ gr.Markdown("Ask any question by typing or upload an audio file to receive a response from Ath in text and audio format.")
83
 
84
  with gr.Row():
85
  with gr.Column(scale=2):
86
  chatbot = gr.Chatbot(label="Chat History")
87
+ user_input_text = gr.Textbox(placeholder="Type your question...", label="Text Input")
88
+ submit_btn_text = gr.Button("Send")
89
+
90
+ with gr.Column(scale=2):
91
+ user_input_audio = gr.File(label="Upload Audio", type="audio", accept=".wav,.mp3,.ogg")
92
+ submit_btn_audio = gr.Button("Send")
93
 
94
  with gr.Column(scale=1):
95
  audio_output = gr.Audio(label="Response Audio", type="filepath")
96
 
97
  state = gr.State([])
98
 
99
+ submit_btn_text.click(chat_and_tts_text, inputs=[user_input_text, state], outputs=[chatbot, state])
100
+ submit_btn_audio.click(chat_and_tts_audio, inputs=[user_input_audio], outputs=[chatbot, audio_output])
101
 
102
  demo.launch()