Spaces:

arad1367
/

Multimodal_RAG_Pejman

Runtime error

App Files Files Community

arad1367 commited on Oct 15, 2024

Commit

2eb1042

verified ·

1 Parent(s): bc890cd

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -49

app.py CHANGED Viewed

@@ -34,13 +34,13 @@ processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", trust_rem
 @spaces.GPU()
 def process_pdf_and_query(pdf_file, user_query):
     # Convert the PDF to images
-    images = convert_from_path(pdf_file.name)
     num_images = len(images)
     # Indexing the PDF in RAG
     RAG.index(
         input_path=pdf_file.name,
-        index_name="image_index",
         store_collection_with_index=False,
         overwrite=True
     )
@@ -88,44 +88,9 @@ def process_pdf_and_query(pdf_file, user_query):
     return output_text[0], num_images
-css = """
-<style>
-    .title {
-        text-align: center;
-        font-size: 32px;
-        font-weight: bold;
-        margin-bottom: 20px;
-    }
-    .duplicate-button {
-        background-color: #FFD700; /* Yellow */
-        color: black;
-        border: none;
-        padding: 10px 20px;
-        cursor: pointer;
-        font-size: 16px;
-        border-radius: 5px;
-    }
-    .gr-button {
-        background-color: #4CAF50; /* Green */
-        color: white;
-        border: none;
-        padding: 10px 20px;
-        cursor: pointer;
-        font-size: 16px;
-        border-radius: 5px;
-    }
-</style>
-"""
-description = """
-<div style="text-align: center; margin-bottom: 20px;">
-    <p>Welcome to the Multimodal RAG interface! This tool allows you to query PDF documents using a combination of image and text information.</p>
-    <p>We utilize ColPali as a multimodal retriever, and Byaldi is a new library by Answer.ai that simplifies the use of ColPali. Our system incorporates the Qwen/Qwen2-VL-2B-Instruct LLM for generating insightful responses based on the information retrieved.</p>
-</div>
-"""
-pdf_input = gr.File(label="Upload PDF")
-query_input = gr.Textbox(label="Enter your query", placeholder="Ask a question about the PDF")
 output_text = gr.Textbox(label="Model Answer")
 output_images = gr.Textbox(label="Number of Images in PDF")
@@ -138,23 +103,34 @@ footer = """
     <a href="https://github.com/AnswerDotAI/byaldi" target="_blank">Byaldi</a> |
     <a href="https://github.com/illuin-tech/colpali" target="_blank">ColPali</a>
     <br>
-    Made with 💖 by <a href="https://github.com/arad1367" target="_blank">Pejman Ebrahimi</a>
 </div>
 """
-# Launch the Gradio app
 demo = gr.Interface(
     fn=process_pdf_and_query,
-    inputs=[pdf_input, query_input],
     outputs=[output_text, output_images],
-    title="<div class='title'>Multimodal RAG with Image Query</div>",
-    description=description,
-    theme='freddyaboulton/dracula_revamped',
-    css=css
 )
-demo.add_component(gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button"))
-demo.add_component(gr.HTML(footer))
 demo.launch(debug=True)

 @spaces.GPU()
 def process_pdf_and_query(pdf_file, user_query):
     # Convert the PDF to images
+    images = convert_from_path(pdf_file.name)  # pdf_file.name gives the file path
     num_images = len(images)
     # Indexing the PDF in RAG
     RAG.index(
         input_path=pdf_file.name,
+        index_name="image_index",  # index will be saved at index_root/index_name/
         store_collection_with_index=False,
         overwrite=True
     )
     return output_text[0], num_images
+pdf_input = gr.File(label="Upload PDF")
+query_input = gr.Textbox(label="Enter your query", placeholder="Ask a question about the PDF")
 output_text = gr.Textbox(label="Model Answer")
 output_images = gr.Textbox(label="Number of Images in PDF")
     <a href="https://github.com/AnswerDotAI/byaldi" target="_blank">Byaldi</a> |
     <a href="https://github.com/illuin-tech/colpali" target="_blank">ColPali</a>
     <br>
+    Made with 💖 by Pejman Ebrahimi
+</div>
+"""
+explanation = """
+<div style="text-align: center; margin-bottom: 20px;">
+    <h2 style="font-weight: bold; font-size: 24px;">Multimodal RAG (Retrieval-Augmented Generation)</h2>
+    <p>
+        This application utilizes the ColPali model as a multimodal retriever,
+        which retrieves relevant information from documents and generates answers
+        using the Qwen/Qwen2-VL-2B-Instruct LLM (Large Language Model)
+        via the Byaldi library, developed by Answer.ai.
+    </p>
 </div>
 """
 demo = gr.Interface(
     fn=process_pdf_and_query,
+    inputs=[pdf_input, query_input],
     outputs=[output_text, output_images],
+    title="Multimodal RAG with Image Query - By <a href='https://github.com/arad1367'>Pejman Ebrahimi</a>",
+    theme='freddyaboulton/dracula_revamped',
 )
+with demo:
+    gr.HTML(explanation)
+    gr.HTML(footer)
+    gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button", elem_id="duplicate-button")  # Duplicate button
+    gr.Button("Submit", elem_classes="submit-button", style={"background-color": "green", "color": "white"})  # Custom Submit Button
 demo.launch(debug=True)