Spaces:
Restarting
on
Zero
Restarting
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -19,6 +19,7 @@ import fitz
|
|
19 |
|
20 |
from transformers import (
|
21 |
Qwen2_5_VLForConditionalGeneration,
|
|
|
22 |
AutoModelForImageTextToText,
|
23 |
AutoProcessor,
|
24 |
TextIteratorStreamer,
|
@@ -59,10 +60,10 @@ model_t = AutoModelForImageTextToText.from_pretrained(
|
|
59 |
MODEL_ID_T, trust_remote_code=True, torch_dtype=torch.float16
|
60 |
).to(device).eval()
|
61 |
|
62 |
-
MODEL_ID_C = "
|
63 |
processor_c = AutoProcessor.from_pretrained(MODEL_ID_C, trust_remote_code=True)
|
64 |
-
model_c =
|
65 |
-
MODEL_ID_C, trust_remote_code=True, torch_dtype=torch.float16
|
66 |
).to(device).eval()
|
67 |
|
68 |
MODEL_ID_G = "echo840/MonkeyOCR"
|
@@ -173,7 +174,7 @@ def process_document_stream(
|
|
173 |
|
174 |
if model_name == "LFM2-VL-450M": processor, model = processor_m, model_m
|
175 |
elif model_name == "LFM2-VL-1.6B": processor, model = processor_t, model_t
|
176 |
-
elif model_name == "
|
177 |
elif model_name == "MonkeyOCR-Recognition": processor, model = processor_g, model_g
|
178 |
elif model_name == "olmOCR-7B-0725": processor, model = processor_i, model_i
|
179 |
else:
|
@@ -232,7 +233,7 @@ def create_gradio_interface():
|
|
232 |
# Left Column (Inputs)
|
233 |
with gr.Column(scale=1):
|
234 |
model_choice = gr.Dropdown(
|
235 |
-
choices=["LFM2-VL-1.6B", "LFM2-VL-450M", "
|
236 |
label="Select Model", value="Nanonets-OCR-s"
|
237 |
)
|
238 |
prompt_input = gr.Textbox(label="Query Input", placeholder="✦︎ Enter your query")
|
|
|
19 |
|
20 |
from transformers import (
|
21 |
Qwen2_5_VLForConditionalGeneration,
|
22 |
+
AutoModelForVision2Seq,
|
23 |
AutoModelForImageTextToText,
|
24 |
AutoProcessor,
|
25 |
TextIteratorStreamer,
|
|
|
60 |
MODEL_ID_T, trust_remote_code=True, torch_dtype=torch.float16
|
61 |
).to(device).eval()
|
62 |
|
63 |
+
MODEL_ID_C = "HuggingFaceTB/SmolVLM-Instruct-250M"
|
64 |
processor_c = AutoProcessor.from_pretrained(MODEL_ID_C, trust_remote_code=True)
|
65 |
+
model_c = AutoModelForVision2Seq.from_pretrained(
|
66 |
+
MODEL_ID_C, trust_remote_code=True, torch_dtype=torch.float16, _attn_implementation="flash_attention_2"
|
67 |
).to(device).eval()
|
68 |
|
69 |
MODEL_ID_G = "echo840/MonkeyOCR"
|
|
|
174 |
|
175 |
if model_name == "LFM2-VL-450M": processor, model = processor_m, model_m
|
176 |
elif model_name == "LFM2-VL-1.6B": processor, model = processor_t, model_t
|
177 |
+
elif model_name == "SmolVLM-Instruct-250M": processor, model = processor_c, model_c
|
178 |
elif model_name == "MonkeyOCR-Recognition": processor, model = processor_g, model_g
|
179 |
elif model_name == "olmOCR-7B-0725": processor, model = processor_i, model_i
|
180 |
else:
|
|
|
233 |
# Left Column (Inputs)
|
234 |
with gr.Column(scale=1):
|
235 |
model_choice = gr.Dropdown(
|
236 |
+
choices=["LFM2-VL-1.6B", "LFM2-VL-450M", "SmolVLM-Instruct-250M", "olmOCR-7B-0725", "Megalodon-OCR-Sync-0713"],
|
237 |
label="Select Model", value="Nanonets-OCR-s"
|
238 |
)
|
239 |
prompt_input = gr.Textbox(label="Query Input", placeholder="✦︎ Enter your query")
|