Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -20,7 +20,7 @@ import fitz
|
|
20 |
|
21 |
from transformers import (
|
22 |
Qwen2_5_VLForConditionalGeneration,
|
23 |
-
|
24 |
AutoModelForCausalLM,
|
25 |
AutoModelForVision2Seq,
|
26 |
AutoModelForImageTextToText,
|
@@ -79,18 +79,24 @@ model_g = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
|
79 |
MODEL_ID_G, trust_remote_code=True, subfolder=SUBFOLDER, torch_dtype=torch.float16
|
80 |
).to(device).eval()
|
81 |
|
82 |
-
MODEL_ID_I = "
|
83 |
processor_i = AutoProcessor.from_pretrained(MODEL_ID_I, trust_remote_code=True)
|
84 |
-
model_i =
|
85 |
MODEL_ID_I, trust_remote_code=True, torch_dtype=torch.float16
|
86 |
).to(device).eval()
|
87 |
|
88 |
-
MODEL_ID_A = "
|
89 |
processor_a = AutoProcessor.from_pretrained(MODEL_ID_A, trust_remote_code=True)
|
90 |
-
model_a =
|
91 |
MODEL_ID_A, trust_remote_code=True, torch_dtype=torch.float16
|
92 |
).to(device).eval()
|
93 |
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
# --- PDF Generation and Preview Utility Function ---
|
95 |
def generate_and_preview_pdf(image: Image.Image, text_content: str, font_size: int, line_spacing: float, alignment: str, image_size: str):
|
96 |
"""
|
@@ -185,8 +191,9 @@ def process_document_stream(
|
|
185 |
elif model_name == "LFM2-VL-1.6B": processor, model = processor_t, model_t
|
186 |
elif model_name == "SmolVLM-Instruct-250M": processor, model = processor_c, model_c
|
187 |
elif model_name == "MonkeyOCR-pro-1.2B": processor, model = processor_g, model_g
|
188 |
-
elif model_name == "
|
189 |
-
elif model_name == "
|
|
|
190 |
else:
|
191 |
yield "Invalid model selected.", ""
|
192 |
return
|
@@ -243,7 +250,8 @@ def create_gradio_interface():
|
|
243 |
# Left Column (Inputs)
|
244 |
with gr.Column(scale=1):
|
245 |
model_choice = gr.Dropdown(
|
246 |
-
choices=["LFM2-VL-1.6B", "LFM2-VL-450M", "SmolVLM-Instruct-250M", "
|
|
|
247 |
label="Select Model", value="LFM2-VL-1.6B"
|
248 |
)
|
249 |
prompt_input = gr.Textbox(label="Query Input", placeholder="✦︎ Enter your query")
|
|
|
20 |
|
21 |
from transformers import (
|
22 |
Qwen2_5_VLForConditionalGeneration,
|
23 |
+
Qwen2VLForConditionalGeneration,
|
24 |
AutoModelForCausalLM,
|
25 |
AutoModelForVision2Seq,
|
26 |
AutoModelForImageTextToText,
|
|
|
79 |
MODEL_ID_G, trust_remote_code=True, subfolder=SUBFOLDER, torch_dtype=torch.float16
|
80 |
).to(device).eval()
|
81 |
|
82 |
+
MODEL_ID_I = "UCSC-VLAA/VLAA-Thinker-Qwen2VL-2B"
|
83 |
processor_i = AutoProcessor.from_pretrained(MODEL_ID_I, trust_remote_code=True)
|
84 |
+
model_i = Qwen2VLForConditionalGeneration.from_pretrained(
|
85 |
MODEL_ID_I, trust_remote_code=True, torch_dtype=torch.float16
|
86 |
).to(device).eval()
|
87 |
|
88 |
+
MODEL_ID_A = "nanonets/Nanonets-OCR-s"
|
89 |
processor_a = AutoProcessor.from_pretrained(MODEL_ID_A, trust_remote_code=True)
|
90 |
+
model_a = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
91 |
MODEL_ID_A, trust_remote_code=True, torch_dtype=torch.float16
|
92 |
).to(device).eval()
|
93 |
|
94 |
+
MODEL_ID_X = "prithivMLmods/Megalodon-OCR-Sync-0713"
|
95 |
+
processor_x = AutoProcessor.from_pretrained(MODEL_ID_X, trust_remote_code=True)
|
96 |
+
model_x = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
97 |
+
MODEL_ID_X, trust_remote_code=True, torch_dtype=torch.float16
|
98 |
+
).to(device).eval()
|
99 |
+
|
100 |
# --- PDF Generation and Preview Utility Function ---
|
101 |
def generate_and_preview_pdf(image: Image.Image, text_content: str, font_size: int, line_spacing: float, alignment: str, image_size: str):
|
102 |
"""
|
|
|
191 |
elif model_name == "LFM2-VL-1.6B": processor, model = processor_t, model_t
|
192 |
elif model_name == "SmolVLM-Instruct-250M": processor, model = processor_c, model_c
|
193 |
elif model_name == "MonkeyOCR-pro-1.2B": processor, model = processor_g, model_g
|
194 |
+
elif model_name == "VLAA-Thinker-Qwen2VL-2B": processor, model = processor_i, model_i
|
195 |
+
elif model_name == "Nanonets-OCR-s": processor, model = processor_a, model_a
|
196 |
+
elif model_name == "Megalodon-OCR-Sync-0713": processor, model = processor_x, model_x
|
197 |
else:
|
198 |
yield "Invalid model selected.", ""
|
199 |
return
|
|
|
250 |
# Left Column (Inputs)
|
251 |
with gr.Column(scale=1):
|
252 |
model_choice = gr.Dropdown(
|
253 |
+
choices=["LFM2-VL-1.6B", "LFM2-VL-450M", "SmolVLM-Instruct-250M", "VLAA-Thinker-Qwen2VL-2B", "MonkeyOCR-pro-1.2B",
|
254 |
+
"Nanonets-OCR-s", "Megalodon-OCR-Sync-0713"],
|
255 |
label="Select Model", value="LFM2-VL-1.6B"
|
256 |
)
|
257 |
prompt_input = gr.Textbox(label="Query Input", placeholder="✦︎ Enter your query")
|