shukdevdatta123 commited on
Commit
511c89a
·
verified ·
1 Parent(s): 7661e71

Create abc3.txt

Browse files
Files changed (1) hide show
  1. abc3.txt +434 -0
abc3.txt ADDED
@@ -0,0 +1,434 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import openai
3
+ import base64
4
+ from PIL import Image
5
+ import io
6
+ import os
7
+ import tempfile
8
+ import fitz # PyMuPDF for PDF handling
9
+
10
+ # Function to extract text from PDF files
11
+ def extract_text_from_pdf(pdf_file):
12
+ try:
13
+ text = ""
14
+ pdf_document = fitz.open(pdf_file)
15
+
16
+ for page_num in range(len(pdf_document)):
17
+ page = pdf_document[page_num]
18
+ text += page.get_text()
19
+
20
+ pdf_document.close()
21
+ return text
22
+ except Exception as e:
23
+ return f"Error extracting text from PDF: {str(e)}"
24
+
25
+ # Function to send the request to OpenAI API with an image, text or PDF input
26
+ def generate_response(input_text, image, pdf_content, openai_api_key, reasoning_effort="medium", model_choice="o1"):
27
+ if not openai_api_key:
28
+ return "Error: No API key provided."
29
+
30
+ openai.api_key = openai_api_key
31
+
32
+ # Process the input depending on whether it's text, image, or a PDF-related query
33
+ if pdf_content and input_text:
34
+ # For PDF queries, we combine the PDF content with the user's question
35
+ prompt = f"Based on the following document content, please answer this question: '{input_text}'\n\nDocument content:\n{pdf_content}"
36
+ input_content = prompt
37
+ elif image:
38
+ # Convert the image to base64 string
39
+ image_info = get_base64_string_from_image(image)
40
+ input_content = f"data:image/png;base64,{image_info}"
41
+ else:
42
+ # Plain text input
43
+ input_content = input_text
44
+
45
+ # Prepare the messages for OpenAI API
46
+ if model_choice == "o1":
47
+ if image and not pdf_content:
48
+ messages = [
49
+ {"role": "user", "content": [{"type": "image_url", "image_url": {"url": input_content}}]}
50
+ ]
51
+ else:
52
+ messages = [
53
+ {"role": "user", "content": [{"type": "text", "text": input_content}]}
54
+ ]
55
+ elif model_choice == "o3-mini":
56
+ messages = [
57
+ {"role": "user", "content": [{"type": "text", "text": input_content}]}
58
+ ]
59
+
60
+ try:
61
+ # Call OpenAI API with the selected model
62
+ response = openai.ChatCompletion.create(
63
+ model=model_choice,
64
+ messages=messages,
65
+ reasoning_effort=reasoning_effort,
66
+ max_completion_tokens=2000
67
+ )
68
+
69
+ return response["choices"][0]["message"]["content"]
70
+ except Exception as e:
71
+ return f"Error calling OpenAI API: {str(e)}"
72
+
73
+ # Function to convert an uploaded image to a base64 string
74
+ def get_base64_string_from_image(pil_image):
75
+ # Convert PIL Image to bytes
76
+ buffered = io.BytesIO()
77
+ pil_image.save(buffered, format="PNG")
78
+ img_bytes = buffered.getvalue()
79
+ base64_str = base64.b64encode(img_bytes).decode("utf-8")
80
+ return base64_str
81
+
82
+ # Function to transcribe audio to text using OpenAI Whisper API
83
+ def transcribe_audio(audio, openai_api_key):
84
+ if not openai_api_key:
85
+ return "Error: No API key provided."
86
+
87
+ openai.api_key = openai_api_key
88
+
89
+ try:
90
+ # Open the audio file and pass it as a file object
91
+ with open(audio, 'rb') as audio_file:
92
+ audio_file_content = audio_file.read()
93
+
94
+ # Use the correct transcription API call
95
+ audio_file_obj = io.BytesIO(audio_file_content)
96
+ audio_file_obj.name = 'audio.wav' # Set a name for the file object (as OpenAI expects it)
97
+
98
+ # Transcribe the audio to text using OpenAI's whisper model
99
+ audio_file_transcription = openai.Audio.transcribe(file=audio_file_obj, model="whisper-1")
100
+ return audio_file_transcription['text']
101
+ except Exception as e:
102
+ return f"Error transcribing audio: {str(e)}"
103
+
104
+ # The function that will be used by Gradio interface
105
+ def chatbot(input_text, image, audio, pdf_file, openai_api_key, reasoning_effort, model_choice, pdf_content, history=[]):
106
+ # If there's audio, transcribe it to text
107
+ if audio:
108
+ input_text = transcribe_audio(audio, openai_api_key)
109
+
110
+ # If a new PDF is uploaded, extract its text
111
+ new_pdf_content = pdf_content
112
+ if pdf_file is not None:
113
+ new_pdf_content = extract_text_from_pdf(pdf_file)
114
+
115
+ # Generate the response
116
+ response = generate_response(input_text, image, new_pdf_content, openai_api_key, reasoning_effort, model_choice)
117
+
118
+ # Append the response to the history
119
+ if input_text:
120
+ history.append((f"User: {input_text}", f"Assistant: {response}"))
121
+ else:
122
+ history.append((f"User: [Uploaded content]", f"Assistant: {response}"))
123
+
124
+ return "", None, None, None, new_pdf_content, history
125
+
126
+ # Function to clear the chat history and PDF content
127
+ def clear_history():
128
+ return "", None, None, None, "", []
129
+
130
+ # Function to process a newly uploaded PDF
131
+ def process_pdf(pdf_file):
132
+ if pdf_file is None:
133
+ return ""
134
+ return extract_text_from_pdf(pdf_file)
135
+
136
+ # Function to update visible components based on input type selection
137
+ def update_input_type(choice):
138
+ if choice == "Text":
139
+ return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
140
+ elif choice == "Image":
141
+ return gr.update(visible=True), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
142
+ elif choice == "Voice":
143
+ return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
144
+ elif choice == "PDF":
145
+ return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)
146
+
147
+ # Custom CSS styles with animations and button colors
148
+ custom_css = """
149
+ /* General body styles */
150
+ .gradio-container {
151
+ font-family: 'Arial', sans-serif;
152
+ background-color: #f8f9fa;
153
+ color: #333;
154
+ }
155
+ /* Header styles */
156
+ .gradio-header {
157
+ background-color: #007bff;
158
+ color: white;
159
+ padding: 20px;
160
+ text-align: center;
161
+ border-radius: 8px;
162
+ box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
163
+ animation: fadeIn 1s ease-out;
164
+ }
165
+ .gradio-header h1 {
166
+ font-size: 2.5rem;
167
+ }
168
+ .gradio-header h3 {
169
+ font-size: 1.2rem;
170
+ margin-top: 10px;
171
+ }
172
+ /* Chatbot container styles */
173
+ .gradio-chatbot {
174
+ background-color: #fff;
175
+ border-radius: 10px;
176
+ padding: 20px;
177
+ box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
178
+ max-height: 500px;
179
+ overflow-y: auto;
180
+ animation: fadeIn 2s ease-out;
181
+ }
182
+ /* Input field styles */
183
+ .gradio-textbox, .gradio-dropdown, .gradio-image, .gradio-audio, .gradio-file {
184
+ border-radius: 8px;
185
+ border: 2px solid #ccc;
186
+ padding: 10px;
187
+ margin-bottom: 10px;
188
+ width: 100%;
189
+ font-size: 1rem;
190
+ transition: all 0.3s ease;
191
+ }
192
+ .gradio-textbox:focus, .gradio-dropdown:focus, .gradio-image:focus, .gradio-audio:focus, .gradio-file:focus {
193
+ border-color: #007bff;
194
+ }
195
+ /* Button styles */
196
+ /* Send Button: Sky Blue */
197
+ #submit-btn {
198
+ background-color: #00aaff; /* Sky blue */
199
+ color: white;
200
+ border: none;
201
+ border-radius: 8px;
202
+ padding: 10px 19px;
203
+ font-size: 1.1rem;
204
+ cursor: pointer;
205
+ transition: all 0.3s ease;
206
+ margin-left: auto;
207
+ margin-right: auto;
208
+ display: block;
209
+ margin-top: 10px;
210
+ }
211
+ #submit-btn:hover {
212
+ background-color: #0099cc; /* Slightly darker blue */
213
+ }
214
+ #submit-btn:active {
215
+ transform: scale(0.95);
216
+ }
217
+ #clear-history {
218
+ background-color: #f04e4e; /* Slightly Darker red */
219
+ color: white;
220
+ border: none;
221
+ border-radius: 8px;
222
+ padding: 10px 13px;
223
+ font-size: 1.1rem;
224
+ cursor: pointer;
225
+ transition: all 0.3s ease;
226
+ margin-top: 10px;
227
+ }
228
+ #clear-history:hover {
229
+ background-color: #f5a4a4; /* Light red */
230
+ }
231
+ #clear-history:active {
232
+ transform: scale(0.95);
233
+ }
234
+ /* Input type selector buttons */
235
+ #input-type-group {
236
+ display: flex;
237
+ justify-content: center;
238
+ gap: 10px;
239
+ margin-bottom: 20px;
240
+ }
241
+ .input-type-btn {
242
+ background-color: #6c757d;
243
+ color: white;
244
+ border: none;
245
+ border-radius: 8px;
246
+ padding: 10px 15px;
247
+ font-size: 1rem;
248
+ cursor: pointer;
249
+ transition: all 0.3s ease;
250
+ }
251
+ .input-type-btn.selected {
252
+ background-color: #007bff;
253
+ }
254
+ .input-type-btn:hover {
255
+ background-color: #5a6268;
256
+ }
257
+ /* Chat history styles */
258
+ .gradio-chatbot .message {
259
+ margin-bottom: 10px;
260
+ }
261
+ .gradio-chatbot .user {
262
+ background-color: #007bff;
263
+ color: white;
264
+ padding: 10px;
265
+ border-radius: 12px;
266
+ max-width: 70%;
267
+ animation: slideInUser 0.5s ease-out;
268
+ }
269
+ .gradio-chatbot .assistant {
270
+ background-color: #f1f1f1;
271
+ color: #333;
272
+ padding: 10px;
273
+ border-radius: 12px;
274
+ max-width: 70%;
275
+ margin-left: auto;
276
+ animation: slideInAssistant 0.5s ease-out;
277
+ }
278
+ /* Animation keyframes */
279
+ @keyframes fadeIn {
280
+ 0% { opacity: 0; }
281
+ 100% { opacity: 1; }
282
+ }
283
+ @keyframes slideInUser {
284
+ 0% { transform: translateX(-100%); }
285
+ 100% { transform: translateX(0); }
286
+ }
287
+ @keyframes slideInAssistant {
288
+ 0% { transform: translateX(100%); }
289
+ 100% { transform: translateX(0); }
290
+ }
291
+ /* Mobile responsiveness */
292
+ @media (max-width: 768px) {
293
+ .gradio-header h1 {
294
+ font-size: 1.8rem;
295
+ }
296
+ .gradio-header h3 {
297
+ font-size: 1rem;
298
+ }
299
+ .gradio-chatbot {
300
+ max-height: 400px;
301
+ }
302
+ .gradio-textbox, .gradio-dropdown, .gradio-image, .gradio-audio, .gradio-file {
303
+ width: 100%;
304
+ }
305
+ #submit-btn, #clear-history {
306
+ width: 100%;
307
+ margin-left: 0;
308
+ }
309
+ }
310
+ """
311
+
312
+ # Gradio interface setup
313
+ def create_interface():
314
+ with gr.Blocks(css=custom_css) as demo:
315
+ gr.Markdown("""
316
+ <div class="gradio-header">
317
+ <h1>Multimodal Chatbot (Text + Image + Voice + PDF)</h1>
318
+ <h3>Interact with a chatbot using text, image, voice, or PDF inputs</h3>
319
+ </div>
320
+ """)
321
+
322
+ # Add a description with an expandable accordion
323
+ with gr.Accordion("Click to expand for details", open=False):
324
+ gr.Markdown("""
325
+ ### Description:
326
+ This is a multimodal chatbot that can handle text, image, voice, and PDF inputs.
327
+ - You can ask questions or provide text, and the assistant will respond.
328
+ - You can upload an image, and the assistant will process it and answer questions about the image.
329
+ - Voice input is supported: You can upload or record an audio file, and it will be transcribed to text and sent to the assistant.
330
+ - PDF support: Upload a PDF and ask questions about its content.
331
+ - Enter your OpenAI API key to start interacting with the model.
332
+ - You can use the 'Clear History' button to remove the conversation history.
333
+ - "o1" is for image, voice, PDF and text chat and "o3-mini" is for text, PDF and voice chat only.
334
+ ### Reasoning Effort:
335
+ The reasoning effort controls how complex or detailed the assistant's answers should be.
336
+ - **Low**: Provides quick, concise answers with minimal reasoning or details.
337
+ - **Medium**: Offers a balanced response with a reasonable level of detail and thought.
338
+ - **High**: Produces more detailed, analytical, or thoughtful responses, requiring deeper reasoning.
339
+ """)
340
+
341
+ # Store PDF content as a state variable
342
+ pdf_content = gr.State("")
343
+
344
+ with gr.Row():
345
+ openai_api_key = gr.Textbox(label="Enter OpenAI API Key", type="password", placeholder="sk-...", interactive=True)
346
+
347
+ # Input type selector
348
+ with gr.Row():
349
+ input_type = gr.Radio(
350
+ ["Text", "Image", "Voice", "PDF"],
351
+ label="Choose Input Type",
352
+ value="Text"
353
+ )
354
+
355
+ # Create the input components (initially text is visible, others are hidden)
356
+ with gr.Row():
357
+ # Text input
358
+ input_text = gr.Textbox(
359
+ label="Enter Text Question",
360
+ placeholder="Ask a question or provide text",
361
+ lines=2,
362
+ visible=True
363
+ )
364
+
365
+ # Image input
366
+ image_input = gr.Image(
367
+ label="Upload an Image",
368
+ type="pil",
369
+ visible=False
370
+ )
371
+
372
+ # Audio input
373
+ audio_input = gr.Audio(
374
+ label="Upload or Record Audio",
375
+ type="filepath",
376
+ visible=False
377
+ )
378
+
379
+ # PDF input
380
+ pdf_input = gr.File(
381
+ label="Upload your PDF",
382
+ file_types=[".pdf"],
383
+ visible=False
384
+ )
385
+
386
+ with gr.Row():
387
+ reasoning_effort = gr.Dropdown(
388
+ label="Reasoning Effort",
389
+ choices=["low", "medium", "high"],
390
+ value="medium"
391
+ )
392
+ model_choice = gr.Dropdown(
393
+ label="Select Model",
394
+ choices=["o1", "o3-mini"],
395
+ value="o1" # Default to 'o1' for image-related tasks
396
+ )
397
+ submit_btn = gr.Button("Ask!", elem_id="submit-btn")
398
+ clear_btn = gr.Button("Clear History", elem_id="clear-history")
399
+
400
+ chat_history = gr.Chatbot()
401
+
402
+ # Connect the input type selector to the update function
403
+ input_type.change(
404
+ fn=update_input_type,
405
+ inputs=[input_type],
406
+ outputs=[input_text, image_input, audio_input, pdf_input]
407
+ )
408
+
409
+ # Process PDF when uploaded
410
+ pdf_input.change(
411
+ fn=process_pdf,
412
+ inputs=[pdf_input],
413
+ outputs=[pdf_content]
414
+ )
415
+
416
+ # Button interactions
417
+ submit_btn.click(
418
+ fn=chatbot,
419
+ inputs=[input_text, image_input, audio_input, pdf_input, openai_api_key, reasoning_effort, model_choice, pdf_content],
420
+ outputs=[input_text, image_input, audio_input, pdf_input, pdf_content, chat_history]
421
+ )
422
+
423
+ clear_btn.click(
424
+ fn=clear_history,
425
+ inputs=[],
426
+ outputs=[input_text, image_input, audio_input, pdf_input, pdf_content, chat_history]
427
+ )
428
+
429
+ return demo
430
+
431
+ # Run the interface
432
+ if __name__ == "__main__":
433
+ demo = create_interface()
434
+ demo.launch()