Ath commited on
Commit
f58d77b
·
verified ·
1 Parent(s): b0d06ab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -6
app.py CHANGED
@@ -3,6 +3,8 @@ import requests
3
  import google.generativeai as genai
4
  import gradio as gr
5
  from tempfile import NamedTemporaryFile
 
 
6
 
7
  # Configure your Google Generative AI API key
8
  genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
@@ -40,14 +42,20 @@ def chat_and_tts_text(user_input, history):
40
 
41
  def convert_audio_to_text(audio_file):
42
  # Function to convert audio to text (you can replace this with your preferred method)
43
- return "Sample text from audio"
 
 
 
 
44
 
45
  def chat_and_tts_audio(audio_file):
46
  # Convert uploaded audio file to text
47
- user_input = convert_audio_to_text(audio_file)
 
 
48
 
49
- # Send the user's audio input to the chat session
50
- response = chat_session.send_message(user_input)
51
  response_text = response.text
52
 
53
  # Eleven Labs text-to-speech request payload
@@ -68,7 +76,7 @@ def chat_and_tts_audio(audio_file):
68
 
69
  # Check if the response is successful and save the audio content to a temporary file
70
  if tts_response.status_code == 200:
71
- with NamedTemporaryFile(delete=False) as temp_audio:
72
  temp_audio.write(tts_response.content)
73
  audio_path = temp_audio.name
74
  else:
@@ -88,7 +96,7 @@ with gr.Blocks() as demo:
88
  submit_btn_text = gr.Button("Send")
89
 
90
  with gr.Column(scale=2):
91
- user_input_audio = gr.File(label="Upload Audio", type="audio", accept=".wav,.mp3,.ogg")
92
  submit_btn_audio = gr.Button("Send")
93
 
94
  with gr.Column(scale=1):
 
3
  import google.generativeai as genai
4
  import gradio as gr
5
  from tempfile import NamedTemporaryFile
6
+ from pydub import AudioSegment
7
+ from pydub.exceptions import CouldntDecodeError
8
 
9
  # Configure your Google Generative AI API key
10
  genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
 
42
 
43
  def convert_audio_to_text(audio_file):
44
  # Function to convert audio to text (you can replace this with your preferred method)
45
+ try:
46
+ sound = AudioSegment.from_file(audio_file)
47
+ return sound.export(format="wav") # Export as WAV for TTS API (adjust format as needed)
48
+ except CouldntDecodeError:
49
+ return None
50
 
51
  def chat_and_tts_audio(audio_file):
52
  # Convert uploaded audio file to text
53
+ converted_audio = convert_audio_to_text(audio_file)
54
+ if not converted_audio:
55
+ return "Error: Could not decode audio file.", None
56
 
57
+ # Send the audio text to the chat session
58
+ response = chat_session.send_message(converted_audio)
59
  response_text = response.text
60
 
61
  # Eleven Labs text-to-speech request payload
 
76
 
77
  # Check if the response is successful and save the audio content to a temporary file
78
  if tts_response.status_code == 200:
79
+ with NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
80
  temp_audio.write(tts_response.content)
81
  audio_path = temp_audio.name
82
  else:
 
96
  submit_btn_text = gr.Button("Send")
97
 
98
  with gr.Column(scale=2):
99
+ user_input_audio = gr.File(label="Upload Audio", type="audio")
100
  submit_btn_audio = gr.Button("Send")
101
 
102
  with gr.Column(scale=1):