shukdevdatta123 commited on
Commit
c4ff6ca
·
verified ·
1 Parent(s): e555f36

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -9
app.py CHANGED
@@ -3,6 +3,8 @@ import openai
3
  import base64
4
  from PIL import Image
5
  import io
 
 
6
 
7
  # Function to send the request to OpenAI API with an image or text input
8
  def generate_response(input_text, image, openai_api_key, reasoning_effort="medium", model_choice="o1"):
@@ -49,8 +51,25 @@ def get_base64_string_from_image(pil_image):
49
  base64_str = base64.b64encode(img_bytes).decode("utf-8")
50
  return base64_str
51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  # The function that will be used by Gradio interface
53
- def chatbot(input_text, image, openai_api_key, reasoning_effort, model_choice, history=[]):
 
 
 
 
54
  response = generate_response(input_text, image, openai_api_key, reasoning_effort, model_choice)
55
 
56
  # Append the response to the history
@@ -98,7 +117,7 @@ custom_css = """
98
  animation: fadeIn 2s ease-out;
99
  }
100
  /* Input field styles */
101
- .gradio-textbox, .gradio-dropdown, .gradio-image {
102
  border-radius: 8px;
103
  border: 2px solid #ccc;
104
  padding: 10px;
@@ -107,7 +126,7 @@ custom_css = """
107
  font-size: 1rem;
108
  transition: all 0.3s ease;
109
  }
110
- .gradio-textbox:focus, .gradio-dropdown:focus, .gradio-image:focus {
111
  border-color: #007bff;
112
  }
113
  /* Button styles */
@@ -132,7 +151,6 @@ custom_css = """
132
  #submit-btn:active {
133
  transform: scale(0.95);
134
  }
135
- /* Clear History Button: Light Red */
136
  #clear-history {
137
  background-color: #f04e4e; /* Slightly Darker red */
138
  color: white;
@@ -195,7 +213,7 @@ custom_css = """
195
  .gradio-chatbot {
196
  max-height: 400px;
197
  }
198
- .gradio-textbox, .gradio-dropdown, .gradio-image {
199
  width: 100%;
200
  }
201
  #submit-btn, #clear-history {
@@ -210,8 +228,8 @@ def create_interface():
210
  with gr.Blocks(css=custom_css) as demo:
211
  gr.Markdown("""
212
  <div class="gradio-header">
213
- <h1>Multimodal Chatbot (Text + Image)</h1>
214
- <h3>Interact with a chatbot using text or image inputs</h3>
215
  </div>
216
  """)
217
 
@@ -219,9 +237,10 @@ def create_interface():
219
  with gr.Accordion("Click to expand for details", open=False):
220
  gr.Markdown("""
221
  ### Description:
222
- This is a multimodal chatbot that can handle both text and image inputs.
223
  - You can ask questions or provide text, and the assistant will respond.
224
  - You can also upload an image, and the assistant will process it and answer questions about the image.
 
225
  - Enter your OpenAI API key to start interacting with the model.
226
  - You can use the 'Clear History' button to remove the conversation history.
227
  - "o1" is for image chat and "o3-mini" is for text chat.
@@ -238,6 +257,7 @@ def create_interface():
238
  with gr.Row():
239
  image_input = gr.Image(label="Upload an Image", type="pil") # Image upload input
240
  input_text = gr.Textbox(label="Enter Text Question", placeholder="Ask a question or provide text", lines=2)
 
241
 
242
  with gr.Row():
243
  reasoning_effort = gr.Dropdown(
@@ -256,7 +276,7 @@ def create_interface():
256
  chat_history = gr.Chatbot()
257
 
258
  # Button interactions
259
- submit_btn.click(fn=chatbot, inputs=[input_text, image_input, openai_api_key, reasoning_effort, model_choice, chat_history], outputs=[input_text, chat_history])
260
  clear_btn.click(fn=clear_history, inputs=[], outputs=[chat_history, chat_history])
261
 
262
  return demo
 
3
  import base64
4
  from PIL import Image
5
  import io
6
+ import openai
7
+ import os
8
 
9
  # Function to send the request to OpenAI API with an image or text input
10
  def generate_response(input_text, image, openai_api_key, reasoning_effort="medium", model_choice="o1"):
 
51
  base64_str = base64.b64encode(img_bytes).decode("utf-8")
52
  return base64_str
53
 
54
+ # Function to transcribe audio to text using OpenAI Whisper API
55
+ def transcribe_audio(audio, openai_api_key):
56
+ if not openai_api_key:
57
+ return "Error: No API key provided."
58
+
59
+ openai.api_key = openai_api_key
60
+ try:
61
+ # Transcribe the audio to text
62
+ audio_file = openai.Audio.create(file=audio, model="whisper-1")
63
+ return audio_file['text']
64
+ except Exception as e:
65
+ return f"Error transcribing audio: {str(e)}"
66
+
67
  # The function that will be used by Gradio interface
68
+ def chatbot(input_text, image, audio, openai_api_key, reasoning_effort, model_choice, history=[]):
69
+ # If there's audio, transcribe it to text
70
+ if audio:
71
+ input_text = transcribe_audio(audio, openai_api_key)
72
+
73
  response = generate_response(input_text, image, openai_api_key, reasoning_effort, model_choice)
74
 
75
  # Append the response to the history
 
117
  animation: fadeIn 2s ease-out;
118
  }
119
  /* Input field styles */
120
+ .gradio-textbox, .gradio-dropdown, .gradio-image, .gradio-audio {
121
  border-radius: 8px;
122
  border: 2px solid #ccc;
123
  padding: 10px;
 
126
  font-size: 1rem;
127
  transition: all 0.3s ease;
128
  }
129
+ .gradio-textbox:focus, .gradio-dropdown:focus, .gradio-image:focus, .gradio-audio:focus {
130
  border-color: #007bff;
131
  }
132
  /* Button styles */
 
151
  #submit-btn:active {
152
  transform: scale(0.95);
153
  }
 
154
  #clear-history {
155
  background-color: #f04e4e; /* Slightly Darker red */
156
  color: white;
 
213
  .gradio-chatbot {
214
  max-height: 400px;
215
  }
216
+ .gradio-textbox, .gradio-dropdown, .gradio-image, .gradio-audio {
217
  width: 100%;
218
  }
219
  #submit-btn, #clear-history {
 
228
  with gr.Blocks(css=custom_css) as demo:
229
  gr.Markdown("""
230
  <div class="gradio-header">
231
+ <h1>Multimodal Chatbot (Text + Image + Voice)</h1>
232
+ <h3>Interact with a chatbot using text, image, or voice inputs</h3>
233
  </div>
234
  """)
235
 
 
237
  with gr.Accordion("Click to expand for details", open=False):
238
  gr.Markdown("""
239
  ### Description:
240
+ This is a multimodal chatbot that can handle text, image, and voice inputs.
241
  - You can ask questions or provide text, and the assistant will respond.
242
  - You can also upload an image, and the assistant will process it and answer questions about the image.
243
+ - Voice input is supported: You can upload or record an audio file, and it will be transcribed to text and sent to the assistant.
244
  - Enter your OpenAI API key to start interacting with the model.
245
  - You can use the 'Clear History' button to remove the conversation history.
246
  - "o1" is for image chat and "o3-mini" is for text chat.
 
257
  with gr.Row():
258
  image_input = gr.Image(label="Upload an Image", type="pil") # Image upload input
259
  input_text = gr.Textbox(label="Enter Text Question", placeholder="Ask a question or provide text", lines=2)
260
+ audio_input = gr.Audio(label="Upload or Record Audio", type="file") # Audio upload or record input
261
 
262
  with gr.Row():
263
  reasoning_effort = gr.Dropdown(
 
276
  chat_history = gr.Chatbot()
277
 
278
  # Button interactions
279
+ submit_btn.click(fn=chatbot, inputs=[input_text, image_input, audio_input, openai_api_key, reasoning_effort, model_choice, chat_history], outputs=[input_text, chat_history])
280
  clear_btn.click(fn=clear_history, inputs=[], outputs=[chat_history, chat_history])
281
 
282
  return demo