Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -19,10 +19,13 @@ import fitz
|
|
19 |
|
20 |
from transformers import (
|
21 |
Qwen2_5_VLForConditionalGeneration,
|
|
|
22 |
AutoProcessor,
|
23 |
TextIteratorStreamer,
|
24 |
)
|
25 |
|
|
|
|
|
26 |
from reportlab.lib.pagesizes import A4
|
27 |
from reportlab.lib.styles import getSampleStyleSheet
|
28 |
from reportlab.platypus import SimpleDocTemplate, Image as RLImage, Paragraph, Spacer
|
@@ -44,15 +47,15 @@ if torch.cuda.is_available():
|
|
44 |
print("Using device:", device)
|
45 |
|
46 |
# --- Model Loading ---
|
47 |
-
MODEL_ID_M = "
|
48 |
processor_m = AutoProcessor.from_pretrained(MODEL_ID_M, trust_remote_code=True)
|
49 |
-
model_m =
|
50 |
MODEL_ID_M, trust_remote_code=True, torch_dtype=torch.float16
|
51 |
).to(device).eval()
|
52 |
|
53 |
-
MODEL_ID_T = "
|
54 |
processor_t = AutoProcessor.from_pretrained(MODEL_ID_T, trust_remote_code=True)
|
55 |
-
model_t =
|
56 |
MODEL_ID_T, trust_remote_code=True, torch_dtype=torch.float16
|
57 |
).to(device).eval()
|
58 |
|
@@ -168,8 +171,8 @@ def process_document_stream(
|
|
168 |
yield "Please enter a prompt.", ""
|
169 |
return
|
170 |
|
171 |
-
if model_name == "
|
172 |
-
elif model_name == "
|
173 |
elif model_name == "Nanonets-OCR-s": processor, model = processor_c, model_c
|
174 |
elif model_name == "MonkeyOCR-Recognition": processor, model = processor_g, model_g
|
175 |
elif model_name == "olmOCR-7B-0725": processor, model = processor_i, model_i
|
@@ -229,7 +232,7 @@ def create_gradio_interface():
|
|
229 |
# Left Column (Inputs)
|
230 |
with gr.Column(scale=1):
|
231 |
model_choice = gr.Dropdown(
|
232 |
-
choices=["
|
233 |
label="Select Model", value="Nanonets-OCR-s"
|
234 |
)
|
235 |
prompt_input = gr.Textbox(label="Query Input", placeholder="✦︎ Enter your query")
|
|
|
19 |
|
20 |
from transformers import (
|
21 |
Qwen2_5_VLForConditionalGeneration,
|
22 |
+
AutoModelForImageTextToText,
|
23 |
AutoProcessor,
|
24 |
TextIteratorStreamer,
|
25 |
)
|
26 |
|
27 |
+
from transformers.image_utils import load_image
|
28 |
+
|
29 |
from reportlab.lib.pagesizes import A4
|
30 |
from reportlab.lib.styles import getSampleStyleSheet
|
31 |
from reportlab.platypus import SimpleDocTemplate, Image as RLImage, Paragraph, Spacer
|
|
|
47 |
print("Using device:", device)
|
48 |
|
49 |
# --- Model Loading ---
|
50 |
+
MODEL_ID_M = "LiquidAI/LFM2-VL-450M"
|
51 |
processor_m = AutoProcessor.from_pretrained(MODEL_ID_M, trust_remote_code=True)
|
52 |
+
model_m = AutoModelForImageTextToText.from_pretrained(
|
53 |
MODEL_ID_M, trust_remote_code=True, torch_dtype=torch.float16
|
54 |
).to(device).eval()
|
55 |
|
56 |
+
MODEL_ID_T = "LiquidAI/LFM2-VL-1.6B"
|
57 |
processor_t = AutoProcessor.from_pretrained(MODEL_ID_T, trust_remote_code=True)
|
58 |
+
model_t = AutoModelForImageTextToText.from_pretrained(
|
59 |
MODEL_ID_T, trust_remote_code=True, torch_dtype=torch.float16
|
60 |
).to(device).eval()
|
61 |
|
|
|
171 |
yield "Please enter a prompt.", ""
|
172 |
return
|
173 |
|
174 |
+
if model_name == "LFM2-VL-450M": processor, model = processor_m, model_m
|
175 |
+
elif model_name == "LFM2-VL-1.6B": processor, model = processor_t, model_t
|
176 |
elif model_name == "Nanonets-OCR-s": processor, model = processor_c, model_c
|
177 |
elif model_name == "MonkeyOCR-Recognition": processor, model = processor_g, model_g
|
178 |
elif model_name == "olmOCR-7B-0725": processor, model = processor_i, model_i
|
|
|
232 |
# Left Column (Inputs)
|
233 |
with gr.Column(scale=1):
|
234 |
model_choice = gr.Dropdown(
|
235 |
+
choices=["LFM2-VL-1.6B", "LFM2-VL-450M", "olmOCR-7B-0725", "Nanonets-OCR-s", "Megalodon-OCR-Sync-0713"],
|
236 |
label="Select Model", value="Nanonets-OCR-s"
|
237 |
)
|
238 |
prompt_input = gr.Textbox(label="Query Input", placeholder="✦︎ Enter your query")
|