Ath commited on
Commit
23ea754
·
verified ·
1 Parent(s): f58d77b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -15
app.py CHANGED
@@ -3,8 +3,6 @@ import requests
3
  import google.generativeai as genai
4
  import gradio as gr
5
  from tempfile import NamedTemporaryFile
6
- from pydub import AudioSegment
7
- from pydub.exceptions import CouldntDecodeError
8
 
9
  # Configure your Google Generative AI API key
10
  genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
@@ -41,21 +39,16 @@ def chat_and_tts_text(user_input, history):
41
  return history, response_text
42
 
43
  def convert_audio_to_text(audio_file):
44
- # Function to convert audio to text (you can replace this with your preferred method)
45
- try:
46
- sound = AudioSegment.from_file(audio_file)
47
- return sound.export(format="wav") # Export as WAV for TTS API (adjust format as needed)
48
- except CouldntDecodeError:
49
- return None
50
 
51
  def chat_and_tts_audio(audio_file):
52
  # Convert uploaded audio file to text
53
- converted_audio = convert_audio_to_text(audio_file)
54
- if not converted_audio:
55
- return "Error: Could not decode audio file.", None
56
 
57
- # Send the audio text to the chat session
58
- response = chat_session.send_message(converted_audio)
59
  response_text = response.text
60
 
61
  # Eleven Labs text-to-speech request payload
@@ -96,7 +89,7 @@ with gr.Blocks() as demo:
96
  submit_btn_text = gr.Button("Send")
97
 
98
  with gr.Column(scale=2):
99
- user_input_audio = gr.File(label="Upload Audio", type="audio")
100
  submit_btn_audio = gr.Button("Send")
101
 
102
  with gr.Column(scale=1):
@@ -107,4 +100,4 @@ with gr.Blocks() as demo:
107
  submit_btn_text.click(chat_and_tts_text, inputs=[user_input_text, state], outputs=[chatbot, state])
108
  submit_btn_audio.click(chat_and_tts_audio, inputs=[user_input_audio], outputs=[chatbot, audio_output])
109
 
110
- demo.launch()
 
3
  import google.generativeai as genai
4
  import gradio as gr
5
  from tempfile import NamedTemporaryFile
 
 
6
 
7
  # Configure your Google Generative AI API key
8
  genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
 
39
  return history, response_text
40
 
41
  def convert_audio_to_text(audio_file):
42
+ # This is a placeholder function. Replace with actual implementation.
43
+ # For now, we assume the function just returns a dummy text.
44
+ return "Sample text from audio"
 
 
 
45
 
46
  def chat_and_tts_audio(audio_file):
47
  # Convert uploaded audio file to text
48
+ user_input = convert_audio_to_text(audio_file)
 
 
49
 
50
+ # Send the user's audio input to the chat session
51
+ response = chat_session.send_message(user_input)
52
  response_text = response.text
53
 
54
  # Eleven Labs text-to-speech request payload
 
89
  submit_btn_text = gr.Button("Send")
90
 
91
  with gr.Column(scale=2):
92
+ user_input_audio = gr.File(label="Upload Audio", type="binary")
93
  submit_btn_audio = gr.Button("Send")
94
 
95
  with gr.Column(scale=1):
 
100
  submit_btn_text.click(chat_and_tts_text, inputs=[user_input_text, state], outputs=[chatbot, state])
101
  submit_btn_audio.click(chat_and_tts_audio, inputs=[user_input_audio], outputs=[chatbot, audio_output])
102
 
103
+ demo.launch()