Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -9,7 +9,7 @@ import torch
|
|
| 9 |
import torchvision
|
| 10 |
import subprocess
|
| 11 |
|
| 12 |
-
# Run the commands from setup.sh to install poppler-utils
|
| 13 |
def install_poppler():
|
| 14 |
try:
|
| 15 |
subprocess.run(["pdfinfo"], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
@@ -34,7 +34,7 @@ processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", trust_rem
|
|
| 34 |
@spaces.GPU()
|
| 35 |
def process_pdf_and_query(pdf_file, user_query):
|
| 36 |
# Convert the PDF to images
|
| 37 |
-
images = convert_from_path(pdf_file.name)
|
| 38 |
num_images = len(images)
|
| 39 |
|
| 40 |
# Indexing the PDF in RAG
|
|
@@ -88,44 +88,9 @@ def process_pdf_and_query(pdf_file, user_query):
|
|
| 88 |
|
| 89 |
return output_text[0], num_images
|
| 90 |
|
| 91 |
-
|
| 92 |
-
pdf_input = gr.File(label="Upload PDF")
|
| 93 |
-
query_input = gr.Textbox(label="Enter your query", placeholder="Ask a question about the PDF")
|
| 94 |
-
output_text = gr.Textbox(label="Model Answer")
|
| 95 |
-
output_images = gr.Textbox(label="Number of Images in PDF")
|
| 96 |
-
|
| 97 |
-
# CSS styling
|
| 98 |
-
css = """
|
| 99 |
-
body {
|
| 100 |
-
background-color: #282a36;
|
| 101 |
-
font-family: Arial, sans-serif;
|
| 102 |
-
color: #f8f8f2;
|
| 103 |
-
}
|
| 104 |
-
h1 {
|
| 105 |
-
text-align: center;
|
| 106 |
-
font-size: 2.5em;
|
| 107 |
-
font-weight: bold;
|
| 108 |
-
margin-bottom: 20px;
|
| 109 |
-
}
|
| 110 |
-
footer {
|
| 111 |
-
margin-top: 20px;
|
| 112 |
-
}
|
| 113 |
-
.duplicate-button {
|
| 114 |
-
text-align: center;
|
| 115 |
-
background-color: #50fa7b;
|
| 116 |
-
color: #282a36;
|
| 117 |
-
font-weight: bold;
|
| 118 |
-
border: none;
|
| 119 |
-
padding: 10px;
|
| 120 |
-
cursor: pointer;
|
| 121 |
-
}
|
| 122 |
-
"""
|
| 123 |
-
|
| 124 |
description = """
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
In this demo, **ColPali** is used as a multimodal retriever, and the **Byaldi** library from answer.ai simplifies the use of ColPali. We are utilizing **Qwen2-VL-2B-Instruct** for text generation, enabling both text and image-based queries.
|
| 129 |
"""
|
| 130 |
|
| 131 |
footer = """
|
|
@@ -137,21 +102,24 @@ footer = """
|
|
| 137 |
<a href="https://github.com/AnswerDotAI/byaldi" target="_blank">Byaldi</a> |
|
| 138 |
<a href="https://github.com/illuin-tech/colpali" target="_blank">ColPali</a>
|
| 139 |
<br>
|
| 140 |
-
Made with π by
|
| 141 |
</div>
|
| 142 |
"""
|
| 143 |
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
gr.Markdown("<h1>Multimodal RAG with Image Query</h1>")
|
| 147 |
gr.Markdown(description)
|
| 148 |
-
with gr.Row():
|
| 149 |
-
pdf_input = gr.File(label="Upload PDF")
|
| 150 |
-
query_input = gr.Textbox(label="Enter your query", placeholder="Ask a question about the PDF")
|
| 151 |
-
output_text = gr.Textbox(label="Model Answer")
|
| 152 |
-
output_images = gr.Textbox(label="Number of Images in PDF")
|
| 153 |
|
| 154 |
-
gr.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
gr.HTML(footer)
|
| 156 |
|
| 157 |
-
|
|
|
|
| 9 |
import torchvision
|
| 10 |
import subprocess
|
| 11 |
|
| 12 |
+
# Run the commands from setup.sh to install poppler-utils -- This is necessary --> Noted by Pejman! --> I found this way instead of setup.sh
|
| 13 |
def install_poppler():
|
| 14 |
try:
|
| 15 |
subprocess.run(["pdfinfo"], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
|
|
| 34 |
@spaces.GPU()
|
| 35 |
def process_pdf_and_query(pdf_file, user_query):
|
| 36 |
# Convert the PDF to images
|
| 37 |
+
images = convert_from_path(pdf_file.name)
|
| 38 |
num_images = len(images)
|
| 39 |
|
| 40 |
# Indexing the PDF in RAG
|
|
|
|
| 88 |
|
| 89 |
return output_text[0], num_images
|
| 90 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
description = """
|
| 92 |
+
**Multimodal RAG** is a retrieval-augmented generation (RAG) model that works with multiple modalities, such as text and images, to retrieve relevant information from a knowledge base and generate coherent responses. In this demo, we use **ColPali**, a multimodal retriever capable of efficiently retrieving from large datasets, along with **Qwen2-VL-2B-Instruct**, a powerful large language model for answering questions based on the retrieved information.
|
| 93 |
+
Byaldi, developed by **Answer.ai**, is used to simplify the integration of ColPali into our pipeline.
|
|
|
|
|
|
|
| 94 |
"""
|
| 95 |
|
| 96 |
footer = """
|
|
|
|
| 102 |
<a href="https://github.com/AnswerDotAI/byaldi" target="_blank">Byaldi</a> |
|
| 103 |
<a href="https://github.com/illuin-tech/colpali" target="_blank">ColPali</a>
|
| 104 |
<br>
|
| 105 |
+
Made with π by Pejman Ebrahimi
|
| 106 |
</div>
|
| 107 |
"""
|
| 108 |
|
| 109 |
+
with gr.Blocks(theme='freddyaboulton/dracula_revamped') as demo:
|
| 110 |
+
gr.Markdown("<h1 style='text-align: center; font-weight: bold;'>Multimodal RAG with Image Query - By <a href='https://github.com/arad1367' target='_blank'>Pejman Ebrahimi</a></h1>")
|
|
|
|
| 111 |
gr.Markdown(description)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
|
| 113 |
+
pdf_input = gr.File(label="Upload PDF")
|
| 114 |
+
query_input = gr.Textbox(label="Enter your query", placeholder="Ask a question about the PDF")
|
| 115 |
+
output_text = gr.Textbox(label="Model Answer")
|
| 116 |
+
output_images = gr.Textbox(label="Number of Images in PDF")
|
| 117 |
+
duplicate_button = gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button")
|
| 118 |
+
|
| 119 |
+
gr.Row([pdf_input, query_input])
|
| 120 |
+
gr.Row([output_text, output_images])
|
| 121 |
+
gr.Row([duplicate_button])
|
| 122 |
+
|
| 123 |
gr.HTML(footer)
|
| 124 |
|
| 125 |
+
demo.launch(debug=True)
|