arad1367 commited on
Commit
bc890cd
Β·
verified Β·
1 Parent(s): 0a97d2e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -20
app.py CHANGED
@@ -9,7 +9,7 @@ import torch
9
  import torchvision
10
  import subprocess
11
 
12
- # Run the commands from setup.sh to install poppler-utils -- This is necessary --> Noted by Pejman! --> I found this way instead of setup.sh
13
  def install_poppler():
14
  try:
15
  subprocess.run(["pdfinfo"], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
@@ -34,13 +34,13 @@ processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", trust_rem
34
  @spaces.GPU()
35
  def process_pdf_and_query(pdf_file, user_query):
36
  # Convert the PDF to images
37
- images = convert_from_path(pdf_file.name)
38
  num_images = len(images)
39
 
40
  # Indexing the PDF in RAG
41
  RAG.index(
42
  input_path=pdf_file.name,
43
- index_name="image_index", # index will be saved at index_root/index_name/
44
  store_collection_with_index=False,
45
  overwrite=True
46
  )
@@ -88,11 +88,47 @@ def process_pdf_and_query(pdf_file, user_query):
88
 
89
  return output_text[0], num_images
90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  description = """
92
- **Multimodal RAG** is a retrieval-augmented generation (RAG) model that works with multiple modalities, such as text and images, to retrieve relevant information from a knowledge base and generate coherent responses. In this demo, we use **ColPali**, a multimodal retriever capable of efficiently retrieving from large datasets, along with **Qwen2-VL-2B-Instruct**, a powerful large language model for answering questions based on the retrieved information.
93
- Byaldi, developed by **Answer.ai**, is used to simplify the integration of ColPali into our pipeline.
 
 
94
  """
95
 
 
 
 
 
 
96
  footer = """
97
  <div style="text-align: center; margin-top: 20px;">
98
  <a href="https://www.linkedin.com/in/pejman-ebrahimi-4a60151a7/" target="_blank">LinkedIn</a> |
@@ -102,24 +138,23 @@ footer = """
102
  <a href="https://github.com/AnswerDotAI/byaldi" target="_blank">Byaldi</a> |
103
  <a href="https://github.com/illuin-tech/colpali" target="_blank">ColPali</a>
104
  <br>
105
- Made with πŸ’– by Pejman Ebrahimi
106
  </div>
107
  """
108
 
109
- with gr.Blocks(theme='freddyaboulton/dracula_revamped') as demo:
110
- gr.Markdown("<h1 style='text-align: center; font-weight: bold;'>Multimodal RAG with Image Query - By <a href='https://github.com/arad1367' target='_blank'>Pejman Ebrahimi</a></h1>")
111
- gr.Markdown(description)
112
-
113
- pdf_input = gr.File(label="Upload PDF")
114
- query_input = gr.Textbox(label="Enter your query", placeholder="Ask a question about the PDF")
115
- output_text = gr.Textbox(label="Model Answer")
116
- output_images = gr.Textbox(label="Number of Images in PDF")
117
- duplicate_button = gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button")
 
118
 
119
- gr.Row([pdf_input, query_input])
120
- gr.Row([output_text, output_images])
121
- gr.Row([duplicate_button])
122
 
123
- gr.HTML(footer)
124
 
125
- demo.launch(debug=True)
 
9
  import torchvision
10
  import subprocess
11
 
12
+ # Run the commands from setup.sh to install poppler-utils
13
  def install_poppler():
14
  try:
15
  subprocess.run(["pdfinfo"], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 
34
  @spaces.GPU()
35
  def process_pdf_and_query(pdf_file, user_query):
36
  # Convert the PDF to images
37
+ images = convert_from_path(pdf_file.name)
38
  num_images = len(images)
39
 
40
  # Indexing the PDF in RAG
41
  RAG.index(
42
  input_path=pdf_file.name,
43
+ index_name="image_index",
44
  store_collection_with_index=False,
45
  overwrite=True
46
  )
 
88
 
89
  return output_text[0], num_images
90
 
91
+ css = """
92
+ <style>
93
+ .title {
94
+ text-align: center;
95
+ font-size: 32px;
96
+ font-weight: bold;
97
+ margin-bottom: 20px;
98
+ }
99
+ .duplicate-button {
100
+ background-color: #FFD700; /* Yellow */
101
+ color: black;
102
+ border: none;
103
+ padding: 10px 20px;
104
+ cursor: pointer;
105
+ font-size: 16px;
106
+ border-radius: 5px;
107
+ }
108
+ .gr-button {
109
+ background-color: #4CAF50; /* Green */
110
+ color: white;
111
+ border: none;
112
+ padding: 10px 20px;
113
+ cursor: pointer;
114
+ font-size: 16px;
115
+ border-radius: 5px;
116
+ }
117
+ </style>
118
+ """
119
+
120
  description = """
121
+ <div style="text-align: center; margin-bottom: 20px;">
122
+ <p>Welcome to the Multimodal RAG interface! This tool allows you to query PDF documents using a combination of image and text information.</p>
123
+ <p>We utilize ColPali as a multimodal retriever, and Byaldi is a new library by Answer.ai that simplifies the use of ColPali. Our system incorporates the Qwen/Qwen2-VL-2B-Instruct LLM for generating insightful responses based on the information retrieved.</p>
124
+ </div>
125
  """
126
 
127
+ pdf_input = gr.File(label="Upload PDF")
128
+ query_input = gr.Textbox(label="Enter your query", placeholder="Ask a question about the PDF")
129
+ output_text = gr.Textbox(label="Model Answer")
130
+ output_images = gr.Textbox(label="Number of Images in PDF")
131
+
132
  footer = """
133
  <div style="text-align: center; margin-top: 20px;">
134
  <a href="https://www.linkedin.com/in/pejman-ebrahimi-4a60151a7/" target="_blank">LinkedIn</a> |
 
138
  <a href="https://github.com/AnswerDotAI/byaldi" target="_blank">Byaldi</a> |
139
  <a href="https://github.com/illuin-tech/colpali" target="_blank">ColPali</a>
140
  <br>
141
+ Made with πŸ’– by <a href="https://github.com/arad1367" target="_blank">Pejman Ebrahimi</a>
142
  </div>
143
  """
144
 
145
+ # Launch the Gradio app
146
+ demo = gr.Interface(
147
+ fn=process_pdf_and_query,
148
+ inputs=[pdf_input, query_input],
149
+ outputs=[output_text, output_images],
150
+ title="<div class='title'>Multimodal RAG with Image Query</div>",
151
+ description=description,
152
+ theme='freddyaboulton/dracula_revamped',
153
+ css=css
154
+ )
155
 
156
+ demo.add_component(gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button"))
 
 
157
 
158
+ demo.add_component(gr.HTML(footer))
159
 
160
+ demo.launch(debug=True)