Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -24,6 +24,7 @@ import torch
|
|
24 |
import os
|
25 |
import numpy as np
|
26 |
import json
|
|
|
27 |
|
28 |
cache_dir = '/data/kb_cache'
|
29 |
os.makedirs(cache_dir, exist_ok=True)
|
@@ -43,7 +44,8 @@ def calculate_md5_from_binary(binary_data):
|
|
43 |
@spaces.GPU(duration=100)
|
44 |
def add_pdf_gradio(pdf_file_binary, progress=gr.Progress()):
|
45 |
global model, tokenizer
|
46 |
-
|
|
|
47 |
knowledge_base_name = calculate_md5_from_binary(pdf_file_binary)
|
48 |
|
49 |
this_cache_dir = os.path.join(cache_dir, knowledge_base_name)
|
@@ -88,6 +90,8 @@ def add_pdf_gradio(pdf_file_binary, progress=gr.Progress()):
|
|
88 |
def retrieve_gradio(knowledge_base: str, query: str, topk: int):
|
89 |
global model, tokenizer
|
90 |
|
|
|
|
|
91 |
target_cache_dir = os.path.join(cache_dir, knowledge_base)
|
92 |
|
93 |
if not os.path.exists(target_cache_dir):
|
@@ -180,9 +184,36 @@ device = 'cuda'
|
|
180 |
model_path = 'RhapsodyAI/minicpm-visual-embedding-v0' # replace with your local model path
|
181 |
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
182 |
model = AutoModel.from_pretrained(model_path, trust_remote_code=True)
|
|
|
183 |
model.to(device)
|
184 |
|
185 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
186 |
with gr.Blocks() as app:
|
187 |
gr.Markdown("# Memex: OCR-free Visual Document Embedding Model as Your Personal Librarian")
|
188 |
gr.Markdown("""The model only takes images as document-side inputs and produce vectors representing document pages. Memex is trained with over 200k query-visual document pairs, including textual document, visual document, arxiv figures, plots, charts, industry documents, textbooks, ebooks, and openly-available PDFs, etc. Its performance is on a par with our ablation text embedding model on text-oriented documents, and an advantages on visually-intensive documents.
|
@@ -214,10 +245,6 @@ Our model is capable of:
|
|
214 |
topk_input = inputs=gr.Number(value=3, minimum=1, maximum=5, step=1, label="Number of pages to retrieve")
|
215 |
retrieve_button = gr.Button("Retrieve")
|
216 |
|
217 |
-
with gr.Row():
|
218 |
-
downvote_button = gr.Button("🤣Downvote")
|
219 |
-
upvote_button = gr.Button("🤗Upvote")
|
220 |
-
|
221 |
with gr.Row():
|
222 |
images_output = gr.Gallery(label="Retrieved Pages")
|
223 |
|
@@ -228,6 +255,18 @@ Our model is capable of:
|
|
228 |
|
229 |
gr.Markdown("By using this demo, you agree to share your use data with us for research purpose, to help improve user experience.")
|
230 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
231 |
|
232 |
app.launch()
|
233 |
|
|
|
24 |
import os
|
25 |
import numpy as np
|
26 |
import json
|
27 |
+
from io import Bytes
|
28 |
|
29 |
cache_dir = '/data/kb_cache'
|
30 |
os.makedirs(cache_dir, exist_ok=True)
|
|
|
44 |
@spaces.GPU(duration=100)
|
45 |
def add_pdf_gradio(pdf_file_binary, progress=gr.Progress()):
|
46 |
global model, tokenizer
|
47 |
+
model.eval()
|
48 |
+
|
49 |
knowledge_base_name = calculate_md5_from_binary(pdf_file_binary)
|
50 |
|
51 |
this_cache_dir = os.path.join(cache_dir, knowledge_base_name)
|
|
|
90 |
def retrieve_gradio(knowledge_base: str, query: str, topk: int):
|
91 |
global model, tokenizer
|
92 |
|
93 |
+
model.eval()
|
94 |
+
|
95 |
target_cache_dir = os.path.join(cache_dir, knowledge_base)
|
96 |
|
97 |
if not os.path.exists(target_cache_dir):
|
|
|
184 |
model_path = 'RhapsodyAI/minicpm-visual-embedding-v0' # replace with your local model path
|
185 |
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
186 |
model = AutoModel.from_pretrained(model_path, trust_remote_code=True)
|
187 |
+
model.eval()
|
188 |
model.to(device)
|
189 |
|
190 |
|
191 |
+
def answer_question(images, question):
|
192 |
+
print("model load begin...")
|
193 |
+
gen_model_path = 'openbmb/MiniCPM-V-2_6'
|
194 |
+
gen_tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
195 |
+
gen_model = AutoModel.from_pretrained(model_path, trust_remote_code=True, attn_implementation='sdpa', torch_dtype=torch.bfloat16)
|
196 |
+
gen_model.eval()
|
197 |
+
gen_model.to(device)
|
198 |
+
|
199 |
+
print("model load success!")
|
200 |
+
|
201 |
+
images_ = [image.convert('RGB') for image in images]
|
202 |
+
|
203 |
+
msgs = [{'role': 'user', 'content': [*images_, question]}]
|
204 |
+
|
205 |
+
answer = gen_model.chat(
|
206 |
+
image=None,
|
207 |
+
msgs=msgs,
|
208 |
+
tokenizer=gen_tokenizer
|
209 |
+
)
|
210 |
+
|
211 |
+
print(answer)
|
212 |
+
|
213 |
+
return answer
|
214 |
+
|
215 |
+
|
216 |
+
|
217 |
with gr.Blocks() as app:
|
218 |
gr.Markdown("# Memex: OCR-free Visual Document Embedding Model as Your Personal Librarian")
|
219 |
gr.Markdown("""The model only takes images as document-side inputs and produce vectors representing document pages. Memex is trained with over 200k query-visual document pairs, including textual document, visual document, arxiv figures, plots, charts, industry documents, textbooks, ebooks, and openly-available PDFs, etc. Its performance is on a par with our ablation text embedding model on text-oriented documents, and an advantages on visually-intensive documents.
|
|
|
245 |
topk_input = inputs=gr.Number(value=3, minimum=1, maximum=5, step=1, label="Number of pages to retrieve")
|
246 |
retrieve_button = gr.Button("Retrieve")
|
247 |
|
|
|
|
|
|
|
|
|
248 |
with gr.Row():
|
249 |
images_output = gr.Gallery(label="Retrieved Pages")
|
250 |
|
|
|
255 |
|
256 |
gr.Markdown("By using this demo, you agree to share your use data with us for research purpose, to help improve user experience.")
|
257 |
|
258 |
+
with gr.Row():
|
259 |
+
button = gr.Button("Answer Question with Retrieved Pages")
|
260 |
+
|
261 |
+
gen_model_response = gr.Textbox(label="MiniCPM-V-2.6's Answer")
|
262 |
+
|
263 |
+
button.click(fn=answer_question, inputs=[images_output, query_input], outputs=gen_model_response)
|
264 |
+
|
265 |
+
with gr.Row():
|
266 |
+
downvote_button = gr.Button("🤣Downvote")
|
267 |
+
upvote_button = gr.Button("🤗Upvote")
|
268 |
+
|
269 |
+
|
270 |
|
271 |
app.launch()
|
272 |
|