bokesyo commited on
Commit
1b2ece1
·
verified ·
1 Parent(s): 9584a7d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -5
app.py CHANGED
@@ -24,6 +24,7 @@ import torch
24
  import os
25
  import numpy as np
26
  import json
 
27
 
28
  cache_dir = '/data/kb_cache'
29
  os.makedirs(cache_dir, exist_ok=True)
@@ -43,7 +44,8 @@ def calculate_md5_from_binary(binary_data):
43
  @spaces.GPU(duration=100)
44
  def add_pdf_gradio(pdf_file_binary, progress=gr.Progress()):
45
  global model, tokenizer
46
-
 
47
  knowledge_base_name = calculate_md5_from_binary(pdf_file_binary)
48
 
49
  this_cache_dir = os.path.join(cache_dir, knowledge_base_name)
@@ -88,6 +90,8 @@ def add_pdf_gradio(pdf_file_binary, progress=gr.Progress()):
88
  def retrieve_gradio(knowledge_base: str, query: str, topk: int):
89
  global model, tokenizer
90
 
 
 
91
  target_cache_dir = os.path.join(cache_dir, knowledge_base)
92
 
93
  if not os.path.exists(target_cache_dir):
@@ -180,9 +184,36 @@ device = 'cuda'
180
  model_path = 'RhapsodyAI/minicpm-visual-embedding-v0' # replace with your local model path
181
  tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
182
  model = AutoModel.from_pretrained(model_path, trust_remote_code=True)
 
183
  model.to(device)
184
 
185
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
  with gr.Blocks() as app:
187
  gr.Markdown("# Memex: OCR-free Visual Document Embedding Model as Your Personal Librarian")
188
  gr.Markdown("""The model only takes images as document-side inputs and produce vectors representing document pages. Memex is trained with over 200k query-visual document pairs, including textual document, visual document, arxiv figures, plots, charts, industry documents, textbooks, ebooks, and openly-available PDFs, etc. Its performance is on a par with our ablation text embedding model on text-oriented documents, and an advantages on visually-intensive documents.
@@ -214,10 +245,6 @@ Our model is capable of:
214
  topk_input = inputs=gr.Number(value=3, minimum=1, maximum=5, step=1, label="Number of pages to retrieve")
215
  retrieve_button = gr.Button("Retrieve")
216
 
217
- with gr.Row():
218
- downvote_button = gr.Button("🤣Downvote")
219
- upvote_button = gr.Button("🤗Upvote")
220
-
221
  with gr.Row():
222
  images_output = gr.Gallery(label="Retrieved Pages")
223
 
@@ -228,6 +255,18 @@ Our model is capable of:
228
 
229
  gr.Markdown("By using this demo, you agree to share your use data with us for research purpose, to help improve user experience.")
230
 
 
 
 
 
 
 
 
 
 
 
 
 
231
 
232
  app.launch()
233
 
 
24
  import os
25
  import numpy as np
26
  import json
27
+ from io import Bytes
28
 
29
  cache_dir = '/data/kb_cache'
30
  os.makedirs(cache_dir, exist_ok=True)
 
44
  @spaces.GPU(duration=100)
45
  def add_pdf_gradio(pdf_file_binary, progress=gr.Progress()):
46
  global model, tokenizer
47
+ model.eval()
48
+
49
  knowledge_base_name = calculate_md5_from_binary(pdf_file_binary)
50
 
51
  this_cache_dir = os.path.join(cache_dir, knowledge_base_name)
 
90
  def retrieve_gradio(knowledge_base: str, query: str, topk: int):
91
  global model, tokenizer
92
 
93
+ model.eval()
94
+
95
  target_cache_dir = os.path.join(cache_dir, knowledge_base)
96
 
97
  if not os.path.exists(target_cache_dir):
 
184
  model_path = 'RhapsodyAI/minicpm-visual-embedding-v0' # replace with your local model path
185
  tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
186
  model = AutoModel.from_pretrained(model_path, trust_remote_code=True)
187
+ model.eval()
188
  model.to(device)
189
 
190
 
191
+ def answer_question(images, question):
192
+ print("model load begin...")
193
+ gen_model_path = 'openbmb/MiniCPM-V-2_6'
194
+ gen_tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
195
+ gen_model = AutoModel.from_pretrained(model_path, trust_remote_code=True, attn_implementation='sdpa', torch_dtype=torch.bfloat16)
196
+ gen_model.eval()
197
+ gen_model.to(device)
198
+
199
+ print("model load success!")
200
+
201
+ images_ = [image.convert('RGB') for image in images]
202
+
203
+ msgs = [{'role': 'user', 'content': [*images_, question]}]
204
+
205
+ answer = gen_model.chat(
206
+ image=None,
207
+ msgs=msgs,
208
+ tokenizer=gen_tokenizer
209
+ )
210
+
211
+ print(answer)
212
+
213
+ return answer
214
+
215
+
216
+
217
  with gr.Blocks() as app:
218
  gr.Markdown("# Memex: OCR-free Visual Document Embedding Model as Your Personal Librarian")
219
  gr.Markdown("""The model only takes images as document-side inputs and produce vectors representing document pages. Memex is trained with over 200k query-visual document pairs, including textual document, visual document, arxiv figures, plots, charts, industry documents, textbooks, ebooks, and openly-available PDFs, etc. Its performance is on a par with our ablation text embedding model on text-oriented documents, and an advantages on visually-intensive documents.
 
245
  topk_input = inputs=gr.Number(value=3, minimum=1, maximum=5, step=1, label="Number of pages to retrieve")
246
  retrieve_button = gr.Button("Retrieve")
247
 
 
 
 
 
248
  with gr.Row():
249
  images_output = gr.Gallery(label="Retrieved Pages")
250
 
 
255
 
256
  gr.Markdown("By using this demo, you agree to share your use data with us for research purpose, to help improve user experience.")
257
 
258
+ with gr.Row():
259
+ button = gr.Button("Answer Question with Retrieved Pages")
260
+
261
+ gen_model_response = gr.Textbox(label="MiniCPM-V-2.6's Answer")
262
+
263
+ button.click(fn=answer_question, inputs=[images_output, query_input], outputs=gen_model_response)
264
+
265
+ with gr.Row():
266
+ downvote_button = gr.Button("🤣Downvote")
267
+ upvote_button = gr.Button("🤗Upvote")
268
+
269
+
270
 
271
  app.launch()
272