shukdevdatta123 commited on
Commit
14868b1
·
verified ·
1 Parent(s): 730c789

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +76 -5
app.py CHANGED
@@ -3,6 +3,13 @@ import openai
3
  import base64
4
  from PIL import Image
5
  import io
 
 
 
 
 
 
 
6
 
7
  # Function to send the request to OpenAI API with an image or text input
8
  def generate_response(input_text, image, openai_api_key, reasoning_effort="medium", model_choice="o1"):
@@ -132,7 +139,6 @@ custom_css = """
132
  #submit-btn:active {
133
  transform: scale(0.95);
134
  }
135
- /* Clear History Button: Light Red */
136
  #clear-history {
137
  background-color: #f04e4e; /* Slightly Darker red */
138
  color: white;
@@ -205,7 +211,7 @@ custom_css = """
205
  }
206
  """
207
 
208
- # Gradio interface setup
209
  def create_interface():
210
  with gr.Blocks(css=custom_css) as demo:
211
  gr.Markdown("""
@@ -261,7 +267,72 @@ def create_interface():
261
 
262
  return demo
263
 
264
- # Run the interface
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
265
  if __name__ == "__main__":
266
- demo = create_interface()
267
- demo.launch()
 
 
 
 
3
  import base64
4
  from PIL import Image
5
  import io
6
+ import os
7
+ import streamlit as st
8
+ from helpers import text_to_speech, autoplay_audio, speech_to_text, get_api_key
9
+ from generate_answer import base_model_chatbot, with_pdf_chatbot
10
+ from audio_recorder_streamlit import audio_recorder
11
+ from streamlit_float import *
12
+ from PIL import Image as stImage
13
 
14
  # Function to send the request to OpenAI API with an image or text input
15
  def generate_response(input_text, image, openai_api_key, reasoning_effort="medium", model_choice="o1"):
 
139
  #submit-btn:active {
140
  transform: scale(0.95);
141
  }
 
142
  #clear-history {
143
  background-color: #f04e4e; /* Slightly Darker red */
144
  color: white;
 
211
  }
212
  """
213
 
214
+ # Gradio interface setup for multimodal chatbot
215
  def create_interface():
216
  with gr.Blocks(css=custom_css) as demo:
217
  gr.Markdown("""
 
267
 
268
  return demo
269
 
270
+ # Streamlit voice chat app code
271
+ def voice_chat():
272
+ # Float feature initialization
273
+ float_init()
274
+
275
+ # Prompt for API key
276
+ api_key = get_api_key()
277
+ if not api_key:
278
+ st.error("You must provide a valid OpenAI API Key to proceed.")
279
+ st.stop()
280
+
281
+ def initialize_session_state():
282
+ if "messages" not in st.session_state:
283
+ st.session_state.messages = [
284
+ {"role": "assistant", "content": "Hi! How may I assist you today? (Please Speak Clearly)"}
285
+ ]
286
+
287
+ initialize_session_state()
288
+
289
+ st.title("OpenAI Conversational Chatbot (Voice Interaction) 🤖")
290
+
291
+ # Create footer container for the microphone
292
+ footer_container = st.container()
293
+ with footer_container:
294
+ audio_bytes = audio_recorder()
295
+
296
+ for message in st.session_state.messages:
297
+ with st.chat_message(message["role"]):
298
+ st.write(message["content"])
299
+
300
+ if audio_bytes:
301
+ # Write the audio bytes to a file
302
+ with st.spinner("Transcribing..."):
303
+ webm_file_path = "temp_audio.mp3"
304
+ with open(webm_file_path, "wb") as f:
305
+ f.write(audio_bytes)
306
+
307
+ transcript = speech_to_text(webm_file_path)
308
+ if transcript:
309
+ st.session_state.messages.append({"role": "user", "content": transcript})
310
+ with st.chat_message("user"):
311
+ st.write(transcript)
312
+ os.remove(webm_file_path)
313
+
314
+ if st.session_state.messages[-1]["role"] != "assistant":
315
+ with st.chat_message("assistant"):
316
+ with st.spinner("Thinking🤔..."):
317
+ final_response = base_model_chatbot(st.session_state.messages)
318
+
319
+ # Add final check for punctuation and completeness
320
+ if not final_response.strip()[-1] in ".!?":
321
+ final_response += " This is the end of the response. Let me know if you need anything else."
322
+
323
+ with st.spinner("Generating audio response..."):
324
+ audio_file = text_to_speech(final_response)
325
+ autoplay_audio(audio_file)
326
+ st.write(final_response)
327
+ st.session_state.messages.append({"role": "assistant", "content": final_response})
328
+ os.remove(audio_file)
329
+
330
+ # Float the footer container and provide CSS to target it with
331
+ footer_container.float("bottom: 0rem;")
332
+
333
  if __name__ == "__main__":
334
+ demo = create_interface() # Gradio multimodal chatbot
335
+ demo.launch()
336
+
337
+ # Streamlit voice chat
338
+ voice_chat()