Ath
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -3,8 +3,6 @@ import requests
|
|
3 |
import google.generativeai as genai
|
4 |
import gradio as gr
|
5 |
from tempfile import NamedTemporaryFile
|
6 |
-
from pydub import AudioSegment
|
7 |
-
from pydub.exceptions import CouldntDecodeError
|
8 |
|
9 |
# Configure your Google Generative AI API key
|
10 |
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
|
@@ -41,21 +39,16 @@ def chat_and_tts_text(user_input, history):
|
|
41 |
return history, response_text
|
42 |
|
43 |
def convert_audio_to_text(audio_file):
|
44 |
-
#
|
45 |
-
|
46 |
-
|
47 |
-
return sound.export(format="wav") # Export as WAV for TTS API (adjust format as needed)
|
48 |
-
except CouldntDecodeError:
|
49 |
-
return None
|
50 |
|
51 |
def chat_and_tts_audio(audio_file):
|
52 |
# Convert uploaded audio file to text
|
53 |
-
|
54 |
-
if not converted_audio:
|
55 |
-
return "Error: Could not decode audio file.", None
|
56 |
|
57 |
-
# Send the audio
|
58 |
-
response = chat_session.send_message(
|
59 |
response_text = response.text
|
60 |
|
61 |
# Eleven Labs text-to-speech request payload
|
@@ -96,7 +89,7 @@ with gr.Blocks() as demo:
|
|
96 |
submit_btn_text = gr.Button("Send")
|
97 |
|
98 |
with gr.Column(scale=2):
|
99 |
-
user_input_audio = gr.File(label="Upload Audio", type="
|
100 |
submit_btn_audio = gr.Button("Send")
|
101 |
|
102 |
with gr.Column(scale=1):
|
@@ -107,4 +100,4 @@ with gr.Blocks() as demo:
|
|
107 |
submit_btn_text.click(chat_and_tts_text, inputs=[user_input_text, state], outputs=[chatbot, state])
|
108 |
submit_btn_audio.click(chat_and_tts_audio, inputs=[user_input_audio], outputs=[chatbot, audio_output])
|
109 |
|
110 |
-
demo.launch()
|
|
|
3 |
import google.generativeai as genai
|
4 |
import gradio as gr
|
5 |
from tempfile import NamedTemporaryFile
|
|
|
|
|
6 |
|
7 |
# Configure your Google Generative AI API key
|
8 |
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
|
|
|
39 |
return history, response_text
|
40 |
|
41 |
def convert_audio_to_text(audio_file):
|
42 |
+
# This is a placeholder function. Replace with actual implementation.
|
43 |
+
# For now, we assume the function just returns a dummy text.
|
44 |
+
return "Sample text from audio"
|
|
|
|
|
|
|
45 |
|
46 |
def chat_and_tts_audio(audio_file):
|
47 |
# Convert uploaded audio file to text
|
48 |
+
user_input = convert_audio_to_text(audio_file)
|
|
|
|
|
49 |
|
50 |
+
# Send the user's audio input to the chat session
|
51 |
+
response = chat_session.send_message(user_input)
|
52 |
response_text = response.text
|
53 |
|
54 |
# Eleven Labs text-to-speech request payload
|
|
|
89 |
submit_btn_text = gr.Button("Send")
|
90 |
|
91 |
with gr.Column(scale=2):
|
92 |
+
user_input_audio = gr.File(label="Upload Audio", type="binary")
|
93 |
submit_btn_audio = gr.Button("Send")
|
94 |
|
95 |
with gr.Column(scale=1):
|
|
|
100 |
submit_btn_text.click(chat_and_tts_text, inputs=[user_input_text, state], outputs=[chatbot, state])
|
101 |
submit_btn_audio.click(chat_and_tts_audio, inputs=[user_input_audio], outputs=[chatbot, audio_output])
|
102 |
|
103 |
+
demo.launch()
|