shukdevdatta123 commited on
Commit
02e9dd4
·
verified ·
1 Parent(s): ea2202e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -289
app.py CHANGED
@@ -6,53 +6,6 @@ import io
6
  import os
7
  import tempfile
8
  import fitz # PyMuPDF for PDF handling
9
- import uuid
10
- import json
11
-
12
- # Class to manage document storage
13
- class DocumentManager:
14
- def __init__(self):
15
- self.documents = {} # Dictionary to store documents: {doc_id: {"name": name, "content": content, "path": path}}
16
-
17
- def add_document(self, file_path, file_name=None):
18
- """Add a document to the manager and return its ID"""
19
- if file_name is None:
20
- file_name = os.path.basename(file_path)
21
-
22
- doc_id = str(uuid.uuid4())
23
- content = extract_text_from_pdf(file_path)
24
-
25
- self.documents[doc_id] = {
26
- "name": file_name,
27
- "content": content,
28
- "path": file_path
29
- }
30
-
31
- return doc_id
32
-
33
- def get_document_content(self, doc_id):
34
- """Get the content of a document by its ID"""
35
- if doc_id in self.documents:
36
- return self.documents[doc_id]["content"]
37
- return ""
38
-
39
- def get_document_path(self, doc_id):
40
- """Get the file path of a document by its ID"""
41
- if doc_id in self.documents:
42
- return self.documents[doc_id]["path"]
43
- return None
44
-
45
- def get_document_list(self):
46
- """Get a list of document names and IDs for dropdown"""
47
- return [(self.documents[doc_id]["name"], doc_id) for doc_id in self.documents]
48
-
49
- def clear_documents(self):
50
- """Clear all documents"""
51
- self.documents = {}
52
- return []
53
-
54
- # Initialize the document manager
55
- document_manager = DocumentManager()
56
 
57
  # Function to extract text from PDF files
58
  def extract_text_from_pdf(pdf_file):
@@ -148,76 +101,48 @@ def transcribe_audio(audio, openai_api_key):
148
  except Exception as e:
149
  return f"Error transcribing audio: {str(e)}"
150
 
151
- # Function to handle PDF uploads
152
- def handle_pdf_upload(pdf_file):
153
- if pdf_file is None:
154
- return [], None
155
-
156
- # Add the PDF to the document manager
157
- doc_id = document_manager.add_document(pdf_file.name)
158
-
159
- # Return updated dropdown list and the selected document ID
160
- doc_list = document_manager.get_document_list()
161
- # Only set the value if the list is not empty
162
- selected_value = doc_id if doc_list else None
163
-
164
- return doc_list, selected_value
165
-
166
- # Function to get PDF content based on selected document
167
- def get_selected_document_content(doc_id):
168
- if not doc_id:
169
- return "", None
170
-
171
- # Get the document path for the PDF viewer
172
- doc_path = document_manager.get_document_path(doc_id)
173
-
174
- # Return the document content for the AI and the path for the viewer
175
- return document_manager.get_document_content(doc_id), doc_path
176
-
177
  # The function that will be used by Gradio interface
178
- def chatbot(input_text, image, audio, pdf_file, doc_selection, openai_api_key, reasoning_effort, model_choice, current_pdf_content, history=[]):
179
  # If there's audio, transcribe it to text
180
  if audio:
181
  input_text = transcribe_audio(audio, openai_api_key)
182
 
183
- # Determine which PDF content to use
184
- pdf_content_to_use = current_pdf_content
 
 
185
 
186
  # Generate the response
187
- response = generate_response(input_text, image, pdf_content_to_use, openai_api_key, reasoning_effort, model_choice)
188
 
189
  # Append the response to the history
190
  if input_text:
191
- if doc_selection:
192
- # Include the document name in the history
193
- doc_name = next((doc[0] for doc in document_manager.get_document_list() if doc[1] == doc_selection), "Unknown Document")
194
- history.append((f"User: {input_text} [Query on: {doc_name}]", f"Assistant: {response}"))
195
- else:
196
- history.append((f"User: {input_text}", f"Assistant: {response}"))
197
  else:
198
  history.append((f"User: [Uploaded content]", f"Assistant: {response}"))
199
 
200
- return "", None, None, None, doc_selection, current_pdf_content, history
201
 
202
- # Function to clear the chat history and reset selected document
203
  def clear_history():
204
- return "", None, None, None, None, "", []
205
 
206
- # Function to clear all documents
207
- def clear_documents():
208
- document_list = document_manager.clear_documents()
209
- return document_list, None, "", None
 
210
 
211
  # Function to update visible components based on input type selection
212
  def update_input_type(choice):
213
  if choice == "Text":
214
- return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
215
  elif choice == "Image":
216
- return gr.update(visible=True), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
217
  elif choice == "Voice":
218
- return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
219
  elif choice == "PDF":
220
- return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
221
 
222
  # Custom CSS styles with animations and button colors
223
  custom_css = """
@@ -350,29 +275,6 @@ custom_css = """
350
  margin-left: auto;
351
  animation: slideInAssistant 0.5s ease-out;
352
  }
353
- /* PDF preview panel */
354
- .pdf-preview-panel {
355
- border: 2px solid #ccc;
356
- border-radius: 8px;
357
- overflow: hidden;
358
- height: 600px;
359
- background-color: #f5f5f5;
360
- }
361
- /* PDF viewer iframe */
362
- .pdf-viewer {
363
- width: 100%;
364
- height: 100%;
365
- border: none;
366
- }
367
- /* Split view container */
368
- .split-view-container {
369
- display: flex;
370
- gap: 20px;
371
- }
372
- .split-view-panel {
373
- flex: 1;
374
- min-width: 0; /* Allow panels to shrink below their content size */
375
- }
376
  /* Animation keyframes */
377
  @keyframes fadeIn {
378
  0% { opacity: 0; }
@@ -386,27 +288,6 @@ custom_css = """
386
  0% { transform: translateX(100%); }
387
  100% { transform: translateX(0); }
388
  }
389
- /* Document management styles */
390
- .document-manager {
391
- background-color: #fff;
392
- border-radius: 10px;
393
- padding: 15px;
394
- box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
395
- margin-bottom: 20px;
396
- }
397
- .document-manager-header {
398
- display: flex;
399
- justify-content: space-between;
400
- align-items: center;
401
- margin-bottom: 15px;
402
- }
403
- .document-list {
404
- max-height: 200px;
405
- overflow-y: auto;
406
- border: 1px solid #eee;
407
- border-radius: 5px;
408
- padding: 10px;
409
- }
410
  /* Mobile responsiveness */
411
  @media (max-width: 768px) {
412
  .gradio-header h1 {
@@ -425,9 +306,6 @@ custom_css = """
425
  width: 100%;
426
  margin-left: 0;
427
  }
428
- .split-view-container {
429
- flex-direction: column;
430
- }
431
  }
432
  """
433
 
@@ -436,8 +314,8 @@ def create_interface():
436
  with gr.Blocks(css=custom_css) as demo:
437
  gr.Markdown("""
438
  <div class="gradio-header">
439
- <h1>Enhanced Multimodal Chatbot</h1>
440
- <h3>Interact with text, images, voice, and multiple PDFs</h3>
441
  </div>
442
  """)
443
 
@@ -445,26 +323,23 @@ def create_interface():
445
  with gr.Accordion("Click to expand for details", open=False):
446
  gr.Markdown("""
447
  ### Description:
448
- This enhanced multimodal chatbot handles text, image, voice, and PDF inputs with advanced document management.
449
-
450
- - **Text Mode**: Ask questions or provide text for the assistant to respond.
451
- - **Image Mode**: Upload an image for the assistant to analyze and discuss.
452
- - **Voice Mode**: Upload or record audio that will be transcribed and processed.
453
- - **PDF Mode**: Upload multiple PDFs, select which one to query, and view them side-by-side with the chat.
454
-
455
- ### PDF Features:
456
- - Upload and manage multiple PDFs in a single session
457
- - Select which document to query from a dropdown menu
458
- - View PDFs side-by-side with the chat interface
459
- - Clear document library as needed
460
-
461
- ### Model Options:
462
- - "o1" is for image, voice, PDF and text chat
463
- - "o3-mini" is for text, PDF and voice chat only
464
  """)
465
 
466
  # Store PDF content as a state variable
467
- current_pdf_content = gr.State("")
468
 
469
  with gr.Row():
470
  openai_api_key = gr.Textbox(label="Enter OpenAI API Key", type="password", placeholder="sk-...", interactive=True)
@@ -501,162 +376,54 @@ def create_interface():
501
  visible=False
502
  )
503
 
504
- # PDF input and document selection components
505
  pdf_input = gr.File(
506
  label="Upload your PDF",
507
  file_types=[".pdf"],
508
  visible=False
509
  )
510
-
511
- # Dropdown for document selection
512
- doc_selection = gr.Dropdown(
513
- label="Select Document to Query",
514
- choices=[],
515
- interactive=True,
516
- visible=False
517
- )
518
-
519
- # PDF Viewer (initially hidden)
520
- pdf_viewer = gr.HTML(
521
- label="PDF Preview",
522
- visible=False
523
- )
524
 
525
- # Action buttons row
526
  with gr.Row():
527
- with gr.Column(scale=1):
528
- reasoning_effort = gr.Dropdown(
529
- label="Reasoning Effort",
530
- choices=["low", "medium", "high"],
531
- value="medium"
532
- )
533
-
534
- with gr.Column(scale=1):
535
- model_choice = gr.Dropdown(
536
- label="Select Model",
537
- choices=["o1", "o3-mini"],
538
- value="o1"
539
- )
540
-
541
- with gr.Column(scale=1):
542
- submit_btn = gr.Button("Ask!", elem_id="submit-btn")
543
-
544
- with gr.Column(scale=1):
545
- clear_chat_btn = gr.Button("Clear Chat", elem_id="clear-history")
546
-
547
- with gr.Column(scale=1, visible=False) as clear_docs_col:
548
- clear_docs_btn = gr.Button("Clear All Documents", elem_id="clear-docs")
549
-
550
- # Create a container for the split view layout when in PDF mode
551
- with gr.Row(visible=False) as split_view_container:
552
- with gr.Column(scale=1, elem_classes="split-view-panel") as pdf_panel:
553
- pdf_display = gr.HTML(
554
- """<div class="pdf-preview-panel">
555
- <iframe class="pdf-viewer" id="pdf-viewer" src="about:blank"></iframe>
556
- </div>"""
557
- )
558
-
559
- with gr.Column(scale=1, elem_classes="split-view-panel") as chat_panel:
560
- chat_history = gr.Chatbot()
561
-
562
- # Regular chat history display (when not in split view)
563
- with gr.Row(visible=True) as regular_chat_container:
564
- chat_history_regular = gr.Chatbot()
565
-
566
- # Function to handle selection of a document from dropdown
567
- def handle_doc_selection(doc_id):
568
- if not doc_id:
569
- return "", update_pdf_viewer(None)
570
-
571
- content, path = get_selected_document_content(doc_id)
572
- return content, update_pdf_viewer(path)
573
-
574
- # Function to update the PDF viewer
575
- def update_pdf_viewer(pdf_path):
576
- if not pdf_path:
577
- return """<div class="pdf-preview-panel">
578
- <div style="padding: 20px; text-align: center;">No PDF selected</div>
579
- </div>"""
580
-
581
- # Create a data URL or temporary file path to display the PDF
582
- return f"""<div class="pdf-preview-panel">
583
- <iframe class="pdf-viewer" id="pdf-viewer" src="file={pdf_path}" type="application/pdf"></iframe>
584
- </div>"""
585
 
586
- # Function to toggle between split view and regular view based on input type
587
- def toggle_view(choice):
588
- if choice == "PDF":
589
- return gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)
590
- else:
591
- return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
592
 
593
  # Connect the input type selector to the update function
594
  input_type.change(
595
  fn=update_input_type,
596
  inputs=[input_type],
597
- outputs=[input_text, image_input, audio_input, pdf_input, doc_selection, pdf_viewer]
598
- )
599
-
600
- # Toggle between split view and regular view when input type changes
601
- input_type.change(
602
- fn=toggle_view,
603
- inputs=[input_type],
604
- outputs=[split_view_container, regular_chat_container, clear_docs_col]
605
  )
606
 
607
  # Process PDF when uploaded
608
  pdf_input.change(
609
- fn=handle_pdf_upload,
610
  inputs=[pdf_input],
611
- outputs=[doc_selection, doc_selection]
612
- )
613
-
614
- # Update content when document is selected
615
- doc_selection.change(
616
- fn=handle_doc_selection,
617
- inputs=[doc_selection],
618
- outputs=[current_pdf_content, pdf_display]
619
  )
620
 
621
  # Button interactions
622
  submit_btn.click(
623
  fn=chatbot,
624
- inputs=[
625
- input_text, image_input, audio_input, pdf_input,
626
- doc_selection, openai_api_key, reasoning_effort,
627
- model_choice, current_pdf_content, chat_history_regular # Added chat_history_regular to avoid creating new empty list
628
- ],
629
- outputs=[
630
- input_text, image_input, audio_input, pdf_input,
631
- doc_selection, current_pdf_content, chat_history_regular
632
- ]
633
- )
634
-
635
- # Also update the split view chat history when submitting
636
- submit_btn.click(
637
- fn=lambda history: history,
638
- inputs=[chat_history_regular],
639
- outputs=[chat_history]
640
  )
641
 
642
- clear_chat_btn.click(
643
  fn=clear_history,
644
  inputs=[],
645
- outputs=[input_text, image_input, audio_input, pdf_input, doc_selection, current_pdf_content, chat_history_regular]
646
- )
647
-
648
- # Also clear the split view chat history
649
- clear_chat_btn.click(
650
- fn=lambda: [],
651
- inputs=[],
652
- outputs=[chat_history]
653
- )
654
-
655
- # Clear all documents
656
- clear_docs_btn.click(
657
- fn=clear_documents,
658
- inputs=[],
659
- outputs=[doc_selection, doc_selection, current_pdf_content, pdf_display]
660
  )
661
 
662
  return demo
 
6
  import os
7
  import tempfile
8
  import fitz # PyMuPDF for PDF handling
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  # Function to extract text from PDF files
11
  def extract_text_from_pdf(pdf_file):
 
101
  except Exception as e:
102
  return f"Error transcribing audio: {str(e)}"
103
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  # The function that will be used by Gradio interface
105
+ def chatbot(input_text, image, audio, pdf_file, openai_api_key, reasoning_effort, model_choice, pdf_content, history=[]):
106
  # If there's audio, transcribe it to text
107
  if audio:
108
  input_text = transcribe_audio(audio, openai_api_key)
109
 
110
+ # If a new PDF is uploaded, extract its text
111
+ new_pdf_content = pdf_content
112
+ if pdf_file is not None:
113
+ new_pdf_content = extract_text_from_pdf(pdf_file)
114
 
115
  # Generate the response
116
+ response = generate_response(input_text, image, new_pdf_content, openai_api_key, reasoning_effort, model_choice)
117
 
118
  # Append the response to the history
119
  if input_text:
120
+ history.append((f"User: {input_text}", f"Assistant: {response}"))
 
 
 
 
 
121
  else:
122
  history.append((f"User: [Uploaded content]", f"Assistant: {response}"))
123
 
124
+ return "", None, None, None, new_pdf_content, history
125
 
126
+ # Function to clear the chat history and PDF content
127
  def clear_history():
128
+ return "", None, None, None, "", []
129
 
130
+ # Function to process a newly uploaded PDF
131
+ def process_pdf(pdf_file):
132
+ if pdf_file is None:
133
+ return ""
134
+ return extract_text_from_pdf(pdf_file)
135
 
136
  # Function to update visible components based on input type selection
137
  def update_input_type(choice):
138
  if choice == "Text":
139
+ return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
140
  elif choice == "Image":
141
+ return gr.update(visible=True), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
142
  elif choice == "Voice":
143
+ return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
144
  elif choice == "PDF":
145
+ return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)
146
 
147
  # Custom CSS styles with animations and button colors
148
  custom_css = """
 
275
  margin-left: auto;
276
  animation: slideInAssistant 0.5s ease-out;
277
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
278
  /* Animation keyframes */
279
  @keyframes fadeIn {
280
  0% { opacity: 0; }
 
288
  0% { transform: translateX(100%); }
289
  100% { transform: translateX(0); }
290
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
291
  /* Mobile responsiveness */
292
  @media (max-width: 768px) {
293
  .gradio-header h1 {
 
306
  width: 100%;
307
  margin-left: 0;
308
  }
 
 
 
309
  }
310
  """
311
 
 
314
  with gr.Blocks(css=custom_css) as demo:
315
  gr.Markdown("""
316
  <div class="gradio-header">
317
+ <h1>Multimodal Chatbot (Text + Image + Voice + PDF)</h1>
318
+ <h3>Interact with a chatbot using text, image, voice, or PDF inputs</h3>
319
  </div>
320
  """)
321
 
 
323
  with gr.Accordion("Click to expand for details", open=False):
324
  gr.Markdown("""
325
  ### Description:
326
+ This is a multimodal chatbot that can handle text, image, voice, and PDF inputs.
327
+ - You can ask questions or provide text, and the assistant will respond.
328
+ - You can upload an image, and the assistant will process it and answer questions about the image.
329
+ - Voice input is supported: You can upload or record an audio file, and it will be transcribed to text and sent to the assistant.
330
+ - PDF support: Upload a PDF and ask questions about its content.
331
+ - Enter your OpenAI API key to start interacting with the model.
332
+ - You can use the 'Clear History' button to remove the conversation history.
333
+ - "o1" is for image, voice, PDF and text chat and "o3-mini" is for text, PDF and voice chat only.
334
+ ### Reasoning Effort:
335
+ The reasoning effort controls how complex or detailed the assistant's answers should be.
336
+ - **Low**: Provides quick, concise answers with minimal reasoning or details.
337
+ - **Medium**: Offers a balanced response with a reasonable level of detail and thought.
338
+ - **High**: Produces more detailed, analytical, or thoughtful responses, requiring deeper reasoning.
 
 
 
339
  """)
340
 
341
  # Store PDF content as a state variable
342
+ pdf_content = gr.State("")
343
 
344
  with gr.Row():
345
  openai_api_key = gr.Textbox(label="Enter OpenAI API Key", type="password", placeholder="sk-...", interactive=True)
 
376
  visible=False
377
  )
378
 
379
+ # PDF input
380
  pdf_input = gr.File(
381
  label="Upload your PDF",
382
  file_types=[".pdf"],
383
  visible=False
384
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
385
 
 
386
  with gr.Row():
387
+ reasoning_effort = gr.Dropdown(
388
+ label="Reasoning Effort",
389
+ choices=["low", "medium", "high"],
390
+ value="medium"
391
+ )
392
+ model_choice = gr.Dropdown(
393
+ label="Select Model",
394
+ choices=["o1", "o3-mini"],
395
+ value="o1" # Default to 'o1' for image-related tasks
396
+ )
397
+ submit_btn = gr.Button("Ask!", elem_id="submit-btn")
398
+ clear_btn = gr.Button("Clear History", elem_id="clear-history")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
399
 
400
+ chat_history = gr.Chatbot()
 
 
 
 
 
401
 
402
  # Connect the input type selector to the update function
403
  input_type.change(
404
  fn=update_input_type,
405
  inputs=[input_type],
406
+ outputs=[input_text, image_input, audio_input, pdf_input]
 
 
 
 
 
 
 
407
  )
408
 
409
  # Process PDF when uploaded
410
  pdf_input.change(
411
+ fn=process_pdf,
412
  inputs=[pdf_input],
413
+ outputs=[pdf_content]
 
 
 
 
 
 
 
414
  )
415
 
416
  # Button interactions
417
  submit_btn.click(
418
  fn=chatbot,
419
+ inputs=[input_text, image_input, audio_input, pdf_input, openai_api_key, reasoning_effort, model_choice, pdf_content],
420
+ outputs=[input_text, image_input, audio_input, pdf_input, pdf_content, chat_history]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
421
  )
422
 
423
+ clear_btn.click(
424
  fn=clear_history,
425
  inputs=[],
426
+ outputs=[input_text, image_input, audio_input, pdf_input, pdf_content, chat_history]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
427
  )
428
 
429
  return demo