Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -13,12 +13,18 @@ import gradio as gr
|
|
13 |
import requests
|
14 |
import torch
|
15 |
from PIL import Image
|
|
|
16 |
from transformers import (
|
|
|
17 |
Qwen2_5_VLForConditionalGeneration,
|
|
|
18 |
AutoProcessor,
|
19 |
TextIteratorStreamer,
|
|
|
|
|
20 |
)
|
21 |
|
|
|
22 |
js_func = """
|
23 |
function refresh() {
|
24 |
const url = new URL(window.location);
|
@@ -83,6 +89,24 @@ model_i = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
|
83 |
MODEL_ID_I, trust_remote_code=True, torch_dtype=torch.float16
|
84 |
).to(device).eval()
|
85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
# --- Utility Functions ---
|
87 |
def layoutjson2md(layout_data: List[Dict]) -> str:
|
88 |
"""Converts the structured JSON from Layout Analysis into formatted Markdown."""
|
@@ -133,6 +157,7 @@ def process_document_stream(model_name: str, task_choice: str, image: Image.Imag
|
|
133 |
elif model_name == "Nanonets-OCR-s": processor, model = processor_c, model_c
|
134 |
elif model_name == "MonkeyOCR-Recognition": processor, model = processor_g, model_g
|
135 |
elif model_name == "olmOCR-7B-0725": processor, model = processor_i, model_i
|
|
|
136 |
else:
|
137 |
yield "Invalid model selected.", "Invalid model selected.", None
|
138 |
return
|
@@ -201,6 +226,7 @@ def create_gradio_interface():
|
|
201 |
"MonkeyOCR-Recognition",
|
202 |
"olmOCR-7B-0725",
|
203 |
"Nanonets-OCR-s",
|
|
|
204 |
"Megalodon-OCR-Sync-0713"
|
205 |
],
|
206 |
label="Select Model", value="Nanonets-OCR-s"
|
|
|
13 |
import requests
|
14 |
import torch
|
15 |
from PIL import Image
|
16 |
+
|
17 |
from transformers import (
|
18 |
+
Qwen2VLForConditionalGeneration,
|
19 |
Qwen2_5_VLForConditionalGeneration,
|
20 |
+
AutoModelForImageTextToText,
|
21 |
AutoProcessor,
|
22 |
TextIteratorStreamer,
|
23 |
+
AutoModel,
|
24 |
+
AutoTokenizer,
|
25 |
)
|
26 |
|
27 |
+
# --- Activate Forced Dark Mode ---
|
28 |
js_func = """
|
29 |
function refresh() {
|
30 |
const url = new URL(window.location);
|
|
|
89 |
MODEL_ID_I, trust_remote_code=True, torch_dtype=torch.float16
|
90 |
).to(device).eval()
|
91 |
|
92 |
+
#-----------------------------------------#
|
93 |
+
|
94 |
+
# Load MonkeyOCR
|
95 |
+
MODEL_ID_J = "echo840/MonkeyOCR-pro-1.2B"
|
96 |
+
SUBFOLDER = "Recognition"
|
97 |
+
processor_j = AutoProcessor.from_pretrained(
|
98 |
+
MODEL_ID_J,
|
99 |
+
trust_remote_code=True,
|
100 |
+
subfolder=SUBFOLDER
|
101 |
+
)
|
102 |
+
model_j = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
103 |
+
MODEL_ID_J,
|
104 |
+
trust_remote_code=True,
|
105 |
+
subfolder=SUBFOLDER,
|
106 |
+
torch_dtype=torch.float16
|
107 |
+
).to(device).eval()
|
108 |
+
#-----------------------------------------#
|
109 |
+
|
110 |
# --- Utility Functions ---
|
111 |
def layoutjson2md(layout_data: List[Dict]) -> str:
|
112 |
"""Converts the structured JSON from Layout Analysis into formatted Markdown."""
|
|
|
157 |
elif model_name == "Nanonets-OCR-s": processor, model = processor_c, model_c
|
158 |
elif model_name == "MonkeyOCR-Recognition": processor, model = processor_g, model_g
|
159 |
elif model_name == "olmOCR-7B-0725": processor, model = processor_i, model_i
|
160 |
+
elif model_name == "MonkeyOCR-pro-1.2B": processor, model = processor_j, model_j
|
161 |
else:
|
162 |
yield "Invalid model selected.", "Invalid model selected.", None
|
163 |
return
|
|
|
226 |
"MonkeyOCR-Recognition",
|
227 |
"olmOCR-7B-0725",
|
228 |
"Nanonets-OCR-s",
|
229 |
+
"MonkeyOCR-pro-1.2B",
|
230 |
"Megalodon-OCR-Sync-0713"
|
231 |
],
|
232 |
label="Select Model", value="Nanonets-OCR-s"
|