shukdevdatta123 commited on
Commit
e555f36
·
verified ·
1 Parent(s): c014521

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -39
app.py CHANGED
@@ -1,13 +1,8 @@
1
  import gradio as gr
2
  import openai
3
  import base64
4
- import os
5
  import io
6
- from helpers import text_to_speech, autoplay_audio, speech_to_text, get_api_key
7
- from generate_answer import base_model_chatbot, with_pdf_chatbot
8
- from audio_recorder_streamlit import audio_recorder
9
- from streamlit_float import *
10
- from PIL import Image as stImage
11
 
12
  # Function to send the request to OpenAI API with an image or text input
13
  def generate_response(input_text, image, openai_api_key, reasoning_effort="medium", model_choice="o1"):
@@ -16,28 +11,29 @@ def generate_response(input_text, image, openai_api_key, reasoning_effort="mediu
16
 
17
  openai.api_key = openai_api_key
18
 
19
- # If the user uploaded an image, convert it to base64 and use it for API call
20
  if image:
21
  # Convert the image to base64 string
22
  image_info = get_base64_string_from_image(image)
23
  input_text = f"data:image/png;base64,{image_info}"
24
 
25
- # Check for text input and pass to API
26
- if not input_text:
27
- return "Error: Please provide either text, image, or voice input."
28
-
29
- # Prepare the messages for OpenAI API based on the selected model
30
- if model_choice == "o1" and input_text:
31
- messages = [{"role": "user", "content": input_text}]
32
- elif model_choice == "o3-mini" and input_text:
33
- messages = [{"role": "user", "content": input_text}]
34
-
35
  try:
36
  # Call OpenAI API with the selected model
37
  response = openai.ChatCompletion.create(
38
  model=model_choice, # Dynamically choose the model (o1 or o3-mini)
39
  messages=messages,
40
- max_tokens=2000 # Limit response tokens to 2000
 
41
  )
42
 
43
  return response["choices"][0]["message"]["content"]
@@ -54,11 +50,7 @@ def get_base64_string_from_image(pil_image):
54
  return base64_str
55
 
56
  # The function that will be used by Gradio interface
57
- def chatbot(input_text, image, voice_audio, openai_api_key, reasoning_effort, model_choice, history=[]):
58
- # If voice_audio is provided, convert it to text
59
- if voice_audio:
60
- input_text = speech_to_text(voice_audio) # Convert speech to text
61
-
62
  response = generate_response(input_text, image, openai_api_key, reasoning_effort, model_choice)
63
 
64
  # Append the response to the history
@@ -140,6 +132,7 @@ custom_css = """
140
  #submit-btn:active {
141
  transform: scale(0.95);
142
  }
 
143
  #clear-history {
144
  background-color: #f04e4e; /* Slightly Darker red */
145
  color: white;
@@ -212,13 +205,13 @@ custom_css = """
212
  }
213
  """
214
 
215
- # Gradio interface setup for multimodal chatbot with voice functionality
216
  def create_interface():
217
  with gr.Blocks(css=custom_css) as demo:
218
  gr.Markdown("""
219
  <div class="gradio-header">
220
- <h1>Multimodal Chatbot (Text + Image + Voice)</h1>
221
- <h3>Interact with a chatbot using text, image, or voice inputs</h3>
222
  </div>
223
  """)
224
 
@@ -226,10 +219,9 @@ def create_interface():
226
  with gr.Accordion("Click to expand for details", open=False):
227
  gr.Markdown("""
228
  ### Description:
229
- This is a multimodal chatbot that can handle text, image, and voice inputs.
230
  - You can ask questions or provide text, and the assistant will respond.
231
- - You can upload an image, and the assistant will process it and answer questions about the image.
232
- - You can also speak to the assistant, and it will process your speech.
233
  - Enter your OpenAI API key to start interacting with the model.
234
  - You can use the 'Clear History' button to remove the conversation history.
235
  - "o1" is for image chat and "o3-mini" is for text chat.
@@ -258,22 +250,18 @@ def create_interface():
258
  choices=["o1", "o3-mini"],
259
  value="o1" # Default to 'o1' for image-related tasks
260
  )
261
-
262
- # Audio input (voice interaction)
263
- with gr.Row():
264
- voice_input = gr.Audio(label="Speak to the Assistant", type="filepath")
265
-
266
- submit_btn = gr.Button("Ask!", elem_id="submit-btn")
267
- clear_btn = gr.Button("Clear History", elem_id="clear-history")
268
 
269
  chat_history = gr.Chatbot()
270
 
271
  # Button interactions
272
- submit_btn.click(fn=chatbot, inputs=[input_text, image_input, voice_input, openai_api_key, reasoning_effort, model_choice, chat_history], outputs=[input_text, chat_history])
273
  clear_btn.click(fn=clear_history, inputs=[], outputs=[chat_history, chat_history])
274
 
275
  return demo
276
 
 
277
  if __name__ == "__main__":
278
- demo = create_interface() # Gradio multimodal chatbot
279
- demo.launch()
 
1
  import gradio as gr
2
  import openai
3
  import base64
4
+ from PIL import Image
5
  import io
 
 
 
 
 
6
 
7
  # Function to send the request to OpenAI API with an image or text input
8
  def generate_response(input_text, image, openai_api_key, reasoning_effort="medium", model_choice="o1"):
 
11
 
12
  openai.api_key = openai_api_key
13
 
14
+ # Process the input depending on whether it's text or an image
15
  if image:
16
  # Convert the image to base64 string
17
  image_info = get_base64_string_from_image(image)
18
  input_text = f"data:image/png;base64,{image_info}"
19
 
20
+ # Prepare the messages for OpenAI API
21
+ if model_choice == "o1":
22
+ messages = [
23
+ {"role": "user", "content": [{"type": "image_url", "image_url": {"url": input_text}}]}
24
+ ]
25
+ elif model_choice == "o3-mini":
26
+ messages = [
27
+ {"role": "user", "content": [{"type": "text", "text": input_text}]}
28
+ ]
29
+
30
  try:
31
  # Call OpenAI API with the selected model
32
  response = openai.ChatCompletion.create(
33
  model=model_choice, # Dynamically choose the model (o1 or o3-mini)
34
  messages=messages,
35
+ reasoning_effort=reasoning_effort, # Set reasoning_effort for the response
36
+ max_completion_tokens=2000 # Limit response tokens to 2000
37
  )
38
 
39
  return response["choices"][0]["message"]["content"]
 
50
  return base64_str
51
 
52
  # The function that will be used by Gradio interface
53
+ def chatbot(input_text, image, openai_api_key, reasoning_effort, model_choice, history=[]):
 
 
 
 
54
  response = generate_response(input_text, image, openai_api_key, reasoning_effort, model_choice)
55
 
56
  # Append the response to the history
 
132
  #submit-btn:active {
133
  transform: scale(0.95);
134
  }
135
+ /* Clear History Button: Light Red */
136
  #clear-history {
137
  background-color: #f04e4e; /* Slightly Darker red */
138
  color: white;
 
205
  }
206
  """
207
 
208
+ # Gradio interface setup
209
  def create_interface():
210
  with gr.Blocks(css=custom_css) as demo:
211
  gr.Markdown("""
212
  <div class="gradio-header">
213
+ <h1>Multimodal Chatbot (Text + Image)</h1>
214
+ <h3>Interact with a chatbot using text or image inputs</h3>
215
  </div>
216
  """)
217
 
 
219
  with gr.Accordion("Click to expand for details", open=False):
220
  gr.Markdown("""
221
  ### Description:
222
+ This is a multimodal chatbot that can handle both text and image inputs.
223
  - You can ask questions or provide text, and the assistant will respond.
224
+ - You can also upload an image, and the assistant will process it and answer questions about the image.
 
225
  - Enter your OpenAI API key to start interacting with the model.
226
  - You can use the 'Clear History' button to remove the conversation history.
227
  - "o1" is for image chat and "o3-mini" is for text chat.
 
250
  choices=["o1", "o3-mini"],
251
  value="o1" # Default to 'o1' for image-related tasks
252
  )
253
+ submit_btn = gr.Button("Ask!", elem_id="submit-btn")
254
+ clear_btn = gr.Button("Clear History", elem_id="clear-history")
 
 
 
 
 
255
 
256
  chat_history = gr.Chatbot()
257
 
258
  # Button interactions
259
+ submit_btn.click(fn=chatbot, inputs=[input_text, image_input, openai_api_key, reasoning_effort, model_choice, chat_history], outputs=[input_text, chat_history])
260
  clear_btn.click(fn=clear_history, inputs=[], outputs=[chat_history, chat_history])
261
 
262
  return demo
263
 
264
+ # Run the interface
265
  if __name__ == "__main__":
266
+ demo = create_interface()
267
+ demo.launch()