Spaces:

Artificial-superintelligence
/

ATHAI

Running

App Files Files Community

Ath commited on Jun 13, 2024

Commit

f58d77b

verified ·

1 Parent(s): b0d06ab

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -6

app.py CHANGED Viewed

@@ -3,6 +3,8 @@ import requests
 import google.generativeai as genai
 import gradio as gr
 from tempfile import NamedTemporaryFile
 # Configure your Google Generative AI API key
 genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
@@ -40,14 +42,20 @@ def chat_and_tts_text(user_input, history):
 def convert_audio_to_text(audio_file):
     # Function to convert audio to text (you can replace this with your preferred method)
-    return "Sample text from audio"
 def chat_and_tts_audio(audio_file):
     # Convert uploaded audio file to text
-    user_input = convert_audio_to_text(audio_file)
-    # Send the user's audio input to the chat session
-    response = chat_session.send_message(user_input)
     response_text = response.text
     # Eleven Labs text-to-speech request payload
@@ -68,7 +76,7 @@ def chat_and_tts_audio(audio_file):
     # Check if the response is successful and save the audio content to a temporary file
     if tts_response.status_code == 200:
-        with NamedTemporaryFile(delete=False) as temp_audio:
             temp_audio.write(tts_response.content)
             audio_path = temp_audio.name
     else:
@@ -88,7 +96,7 @@ with gr.Blocks() as demo:
             submit_btn_text = gr.Button("Send")
         with gr.Column(scale=2):
-            user_input_audio = gr.File(label="Upload Audio", type="audio", accept=".wav,.mp3,.ogg")
             submit_btn_audio = gr.Button("Send")
         with gr.Column(scale=1):

 import google.generativeai as genai
 import gradio as gr
 from tempfile import NamedTemporaryFile
+from pydub import AudioSegment
+from pydub.exceptions import CouldntDecodeError
 # Configure your Google Generative AI API key
 genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
 def convert_audio_to_text(audio_file):
     # Function to convert audio to text (you can replace this with your preferred method)
+    try:
+        sound = AudioSegment.from_file(audio_file)
+        return sound.export(format="wav")  # Export as WAV for TTS API (adjust format as needed)
+    except CouldntDecodeError:
+        return None
 def chat_and_tts_audio(audio_file):
     # Convert uploaded audio file to text
+    converted_audio = convert_audio_to_text(audio_file)
+    if not converted_audio:
+        return "Error: Could not decode audio file.", None
+    # Send the audio text to the chat session
+    response = chat_session.send_message(converted_audio)
     response_text = response.text
     # Eleven Labs text-to-speech request payload
     # Check if the response is successful and save the audio content to a temporary file
     if tts_response.status_code == 200:
+        with NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
             temp_audio.write(tts_response.content)
             audio_path = temp_audio.name
     else:
             submit_btn_text = gr.Button("Send")
         with gr.Column(scale=2):
+            user_input_audio = gr.File(label="Upload Audio", type="audio")
             submit_btn_audio = gr.Button("Send")
         with gr.Column(scale=1):