arad1367 commited on
Commit
2eb1042
Β·
verified Β·
1 Parent(s): bc890cd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -49
app.py CHANGED
@@ -34,13 +34,13 @@ processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", trust_rem
34
  @spaces.GPU()
35
  def process_pdf_and_query(pdf_file, user_query):
36
  # Convert the PDF to images
37
- images = convert_from_path(pdf_file.name)
38
  num_images = len(images)
39
 
40
  # Indexing the PDF in RAG
41
  RAG.index(
42
  input_path=pdf_file.name,
43
- index_name="image_index",
44
  store_collection_with_index=False,
45
  overwrite=True
46
  )
@@ -88,44 +88,9 @@ def process_pdf_and_query(pdf_file, user_query):
88
 
89
  return output_text[0], num_images
90
 
91
- css = """
92
- <style>
93
- .title {
94
- text-align: center;
95
- font-size: 32px;
96
- font-weight: bold;
97
- margin-bottom: 20px;
98
- }
99
- .duplicate-button {
100
- background-color: #FFD700; /* Yellow */
101
- color: black;
102
- border: none;
103
- padding: 10px 20px;
104
- cursor: pointer;
105
- font-size: 16px;
106
- border-radius: 5px;
107
- }
108
- .gr-button {
109
- background-color: #4CAF50; /* Green */
110
- color: white;
111
- border: none;
112
- padding: 10px 20px;
113
- cursor: pointer;
114
- font-size: 16px;
115
- border-radius: 5px;
116
- }
117
- </style>
118
- """
119
-
120
- description = """
121
- <div style="text-align: center; margin-bottom: 20px;">
122
- <p>Welcome to the Multimodal RAG interface! This tool allows you to query PDF documents using a combination of image and text information.</p>
123
- <p>We utilize ColPali as a multimodal retriever, and Byaldi is a new library by Answer.ai that simplifies the use of ColPali. Our system incorporates the Qwen/Qwen2-VL-2B-Instruct LLM for generating insightful responses based on the information retrieved.</p>
124
- </div>
125
- """
126
 
127
- pdf_input = gr.File(label="Upload PDF")
128
- query_input = gr.Textbox(label="Enter your query", placeholder="Ask a question about the PDF")
129
  output_text = gr.Textbox(label="Model Answer")
130
  output_images = gr.Textbox(label="Number of Images in PDF")
131
 
@@ -138,23 +103,34 @@ footer = """
138
  <a href="https://github.com/AnswerDotAI/byaldi" target="_blank">Byaldi</a> |
139
  <a href="https://github.com/illuin-tech/colpali" target="_blank">ColPali</a>
140
  <br>
141
- Made with πŸ’– by <a href="https://github.com/arad1367" target="_blank">Pejman Ebrahimi</a>
 
 
 
 
 
 
 
 
 
 
 
 
142
  </div>
143
  """
144
 
145
- # Launch the Gradio app
146
  demo = gr.Interface(
147
  fn=process_pdf_and_query,
148
- inputs=[pdf_input, query_input],
149
  outputs=[output_text, output_images],
150
- title="<div class='title'>Multimodal RAG with Image Query</div>",
151
- description=description,
152
- theme='freddyaboulton/dracula_revamped',
153
- css=css
154
  )
155
 
156
- demo.add_component(gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button"))
157
-
158
- demo.add_component(gr.HTML(footer))
 
 
159
 
160
  demo.launch(debug=True)
 
34
  @spaces.GPU()
35
  def process_pdf_and_query(pdf_file, user_query):
36
  # Convert the PDF to images
37
+ images = convert_from_path(pdf_file.name) # pdf_file.name gives the file path
38
  num_images = len(images)
39
 
40
  # Indexing the PDF in RAG
41
  RAG.index(
42
  input_path=pdf_file.name,
43
+ index_name="image_index", # index will be saved at index_root/index_name/
44
  store_collection_with_index=False,
45
  overwrite=True
46
  )
 
88
 
89
  return output_text[0], num_images
90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
+ pdf_input = gr.File(label="Upload PDF")
93
+ query_input = gr.Textbox(label="Enter your query", placeholder="Ask a question about the PDF")
94
  output_text = gr.Textbox(label="Model Answer")
95
  output_images = gr.Textbox(label="Number of Images in PDF")
96
 
 
103
  <a href="https://github.com/AnswerDotAI/byaldi" target="_blank">Byaldi</a> |
104
  <a href="https://github.com/illuin-tech/colpali" target="_blank">ColPali</a>
105
  <br>
106
+ Made with πŸ’– by Pejman Ebrahimi
107
+ </div>
108
+ """
109
+
110
+ explanation = """
111
+ <div style="text-align: center; margin-bottom: 20px;">
112
+ <h2 style="font-weight: bold; font-size: 24px;">Multimodal RAG (Retrieval-Augmented Generation)</h2>
113
+ <p>
114
+ This application utilizes the ColPali model as a multimodal retriever,
115
+ which retrieves relevant information from documents and generates answers
116
+ using the Qwen/Qwen2-VL-2B-Instruct LLM (Large Language Model)
117
+ via the Byaldi library, developed by Answer.ai.
118
+ </p>
119
  </div>
120
  """
121
 
 
122
  demo = gr.Interface(
123
  fn=process_pdf_and_query,
124
+ inputs=[pdf_input, query_input],
125
  outputs=[output_text, output_images],
126
+ title="Multimodal RAG with Image Query - By <a href='https://github.com/arad1367'>Pejman Ebrahimi</a>",
127
+ theme='freddyaboulton/dracula_revamped',
 
 
128
  )
129
 
130
+ with demo:
131
+ gr.HTML(explanation)
132
+ gr.HTML(footer)
133
+ gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button", elem_id="duplicate-button") # Duplicate button
134
+ gr.Button("Submit", elem_classes="submit-button", style={"background-color": "green", "color": "white"}) # Custom Submit Button
135
 
136
  demo.launch(debug=True)