Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -9,7 +9,7 @@ import torch
|
|
9 |
import torchvision
|
10 |
import subprocess
|
11 |
|
12 |
-
# Run the commands from setup.sh to install poppler-utils
|
13 |
def install_poppler():
|
14 |
try:
|
15 |
subprocess.run(["pdfinfo"], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
@@ -34,7 +34,7 @@ processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", trust_rem
|
|
34 |
@spaces.GPU()
|
35 |
def process_pdf_and_query(pdf_file, user_query):
|
36 |
# Convert the PDF to images
|
37 |
-
images = convert_from_path(pdf_file.name)
|
38 |
num_images = len(images)
|
39 |
|
40 |
# Indexing the PDF in RAG
|
@@ -88,44 +88,9 @@ def process_pdf_and_query(pdf_file, user_query):
|
|
88 |
|
89 |
return output_text[0], num_images
|
90 |
|
91 |
-
|
92 |
-
pdf_input = gr.File(label="Upload PDF")
|
93 |
-
query_input = gr.Textbox(label="Enter your query", placeholder="Ask a question about the PDF")
|
94 |
-
output_text = gr.Textbox(label="Model Answer")
|
95 |
-
output_images = gr.Textbox(label="Number of Images in PDF")
|
96 |
-
|
97 |
-
# CSS styling
|
98 |
-
css = """
|
99 |
-
body {
|
100 |
-
background-color: #282a36;
|
101 |
-
font-family: Arial, sans-serif;
|
102 |
-
color: #f8f8f2;
|
103 |
-
}
|
104 |
-
h1 {
|
105 |
-
text-align: center;
|
106 |
-
font-size: 2.5em;
|
107 |
-
font-weight: bold;
|
108 |
-
margin-bottom: 20px;
|
109 |
-
}
|
110 |
-
footer {
|
111 |
-
margin-top: 20px;
|
112 |
-
}
|
113 |
-
.duplicate-button {
|
114 |
-
text-align: center;
|
115 |
-
background-color: #50fa7b;
|
116 |
-
color: #282a36;
|
117 |
-
font-weight: bold;
|
118 |
-
border: none;
|
119 |
-
padding: 10px;
|
120 |
-
cursor: pointer;
|
121 |
-
}
|
122 |
-
"""
|
123 |
-
|
124 |
description = """
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
In this demo, **ColPali** is used as a multimodal retriever, and the **Byaldi** library from answer.ai simplifies the use of ColPali. We are utilizing **Qwen2-VL-2B-Instruct** for text generation, enabling both text and image-based queries.
|
129 |
"""
|
130 |
|
131 |
footer = """
|
@@ -137,21 +102,24 @@ footer = """
|
|
137 |
<a href="https://github.com/AnswerDotAI/byaldi" target="_blank">Byaldi</a> |
|
138 |
<a href="https://github.com/illuin-tech/colpali" target="_blank">ColPali</a>
|
139 |
<br>
|
140 |
-
Made with π by
|
141 |
</div>
|
142 |
"""
|
143 |
|
144 |
-
|
145 |
-
|
146 |
-
gr.Markdown("<h1>Multimodal RAG with Image Query</h1>")
|
147 |
gr.Markdown(description)
|
148 |
-
with gr.Row():
|
149 |
-
pdf_input = gr.File(label="Upload PDF")
|
150 |
-
query_input = gr.Textbox(label="Enter your query", placeholder="Ask a question about the PDF")
|
151 |
-
output_text = gr.Textbox(label="Model Answer")
|
152 |
-
output_images = gr.Textbox(label="Number of Images in PDF")
|
153 |
|
154 |
-
gr.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
155 |
gr.HTML(footer)
|
156 |
|
157 |
-
|
|
|
9 |
import torchvision
|
10 |
import subprocess
|
11 |
|
12 |
+
# Run the commands from setup.sh to install poppler-utils -- This is necessary --> Noted by Pejman! --> I found this way instead of setup.sh
|
13 |
def install_poppler():
|
14 |
try:
|
15 |
subprocess.run(["pdfinfo"], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
|
34 |
@spaces.GPU()
|
35 |
def process_pdf_and_query(pdf_file, user_query):
|
36 |
# Convert the PDF to images
|
37 |
+
images = convert_from_path(pdf_file.name)
|
38 |
num_images = len(images)
|
39 |
|
40 |
# Indexing the PDF in RAG
|
|
|
88 |
|
89 |
return output_text[0], num_images
|
90 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
description = """
|
92 |
+
**Multimodal RAG** is a retrieval-augmented generation (RAG) model that works with multiple modalities, such as text and images, to retrieve relevant information from a knowledge base and generate coherent responses. In this demo, we use **ColPali**, a multimodal retriever capable of efficiently retrieving from large datasets, along with **Qwen2-VL-2B-Instruct**, a powerful large language model for answering questions based on the retrieved information.
|
93 |
+
Byaldi, developed by **Answer.ai**, is used to simplify the integration of ColPali into our pipeline.
|
|
|
|
|
94 |
"""
|
95 |
|
96 |
footer = """
|
|
|
102 |
<a href="https://github.com/AnswerDotAI/byaldi" target="_blank">Byaldi</a> |
|
103 |
<a href="https://github.com/illuin-tech/colpali" target="_blank">ColPali</a>
|
104 |
<br>
|
105 |
+
Made with π by Pejman Ebrahimi
|
106 |
</div>
|
107 |
"""
|
108 |
|
109 |
+
with gr.Blocks(theme='freddyaboulton/dracula_revamped') as demo:
|
110 |
+
gr.Markdown("<h1 style='text-align: center; font-weight: bold;'>Multimodal RAG with Image Query - By <a href='https://github.com/arad1367' target='_blank'>Pejman Ebrahimi</a></h1>")
|
|
|
111 |
gr.Markdown(description)
|
|
|
|
|
|
|
|
|
|
|
112 |
|
113 |
+
pdf_input = gr.File(label="Upload PDF")
|
114 |
+
query_input = gr.Textbox(label="Enter your query", placeholder="Ask a question about the PDF")
|
115 |
+
output_text = gr.Textbox(label="Model Answer")
|
116 |
+
output_images = gr.Textbox(label="Number of Images in PDF")
|
117 |
+
duplicate_button = gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button")
|
118 |
+
|
119 |
+
gr.Row([pdf_input, query_input])
|
120 |
+
gr.Row([output_text, output_images])
|
121 |
+
gr.Row([duplicate_button])
|
122 |
+
|
123 |
gr.HTML(footer)
|
124 |
|
125 |
+
demo.launch(debug=True)
|