Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -50,7 +50,7 @@ layout_prompt = """Please output the layout information from the image, includin
|
|
50 |
ocr_prompt = "Perform precise OCR on the image. Extract all text content, maintaining the original structure, paragraphs, and tables as formatted markdown."
|
51 |
|
52 |
# --- Model Loading ---
|
53 |
-
MODEL_ID_M = "prithivMLmods/Camel-Doc-OCR-
|
54 |
processor_m = AutoProcessor.from_pretrained(MODEL_ID_M, trust_remote_code=True)
|
55 |
model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
56 |
MODEL_ID_M, trust_remote_code=True, torch_dtype=torch.float16
|
@@ -77,6 +77,12 @@ model_g = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
|
77 |
MODEL_ID_G, trust_remote_code=True, subfolder=SUBFOLDER, torch_dtype=torch.float16
|
78 |
).to(device).eval()
|
79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
# --- Utility Functions ---
|
81 |
def layoutjson2md(layout_data: List[Dict]) -> str:
|
82 |
"""Converts the structured JSON from Layout Analysis into formatted Markdown."""
|
@@ -122,10 +128,11 @@ def process_document_stream(model_name: str, task_choice: str, image: Image.Imag
|
|
122 |
text_prompt = ocr_prompt if task_choice == "Content Extraction" else layout_prompt
|
123 |
|
124 |
# 2. Select model and processor
|
125 |
-
if model_name == "Camel-Doc-OCR-
|
126 |
elif model_name == "Megalodon-OCR-Sync-0713": processor, model = processor_t, model_t
|
127 |
elif model_name == "Nanonets-OCR-s": processor, model = processor_c, model_c
|
128 |
elif model_name == "MonkeyOCR-Recognition": processor, model = processor_g, model_g
|
|
|
129 |
else:
|
130 |
yield "Invalid model selected.", "Invalid model selected.", None
|
131 |
return
|
@@ -190,10 +197,11 @@ def create_gradio_interface():
|
|
190 |
# Left Column (Inputs)
|
191 |
with gr.Column(scale=1):
|
192 |
model_choice = gr.Dropdown(
|
193 |
-
choices=["Camel-Doc-OCR-
|
194 |
"MonkeyOCR-Recognition",
|
195 |
"Nanonets-OCR-s",
|
196 |
-
"Megalodon-OCR-Sync-0713"
|
|
|
197 |
label="Select Model", value="Nanonets-OCR-s"
|
198 |
)
|
199 |
task_choice = gr.Dropdown(
|
|
|
50 |
ocr_prompt = "Perform precise OCR on the image. Extract all text content, maintaining the original structure, paragraphs, and tables as formatted markdown."
|
51 |
|
52 |
# --- Model Loading ---
|
53 |
+
MODEL_ID_M = "prithivMLmods/Camel-Doc-OCR-080125"
|
54 |
processor_m = AutoProcessor.from_pretrained(MODEL_ID_M, trust_remote_code=True)
|
55 |
model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
56 |
MODEL_ID_M, trust_remote_code=True, torch_dtype=torch.float16
|
|
|
77 |
MODEL_ID_G, trust_remote_code=True, subfolder=SUBFOLDER, torch_dtype=torch.float16
|
78 |
).to(device).eval()
|
79 |
|
80 |
+
MODEL_ID_I = "ChatDOC/OCRFlux-3B"
|
81 |
+
processor_i = AutoProcessor.from_pretrained(MODEL_ID_I, trust_remote_code=True)
|
82 |
+
model_i = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
83 |
+
MODEL_ID_I, trust_remote_code=True, torch_dtype=torch.float16
|
84 |
+
).to(device).eval()
|
85 |
+
|
86 |
# --- Utility Functions ---
|
87 |
def layoutjson2md(layout_data: List[Dict]) -> str:
|
88 |
"""Converts the structured JSON from Layout Analysis into formatted Markdown."""
|
|
|
128 |
text_prompt = ocr_prompt if task_choice == "Content Extraction" else layout_prompt
|
129 |
|
130 |
# 2. Select model and processor
|
131 |
+
if model_name == "Camel-Doc-OCR-080125": processor, model = processor_m, model_m
|
132 |
elif model_name == "Megalodon-OCR-Sync-0713": processor, model = processor_t, model_t
|
133 |
elif model_name == "Nanonets-OCR-s": processor, model = processor_c, model_c
|
134 |
elif model_name == "MonkeyOCR-Recognition": processor, model = processor_g, model_g
|
135 |
+
elif model_name == "OCRFlux-3B": processor, model = processor_i, model_i
|
136 |
else:
|
137 |
yield "Invalid model selected.", "Invalid model selected.", None
|
138 |
return
|
|
|
197 |
# Left Column (Inputs)
|
198 |
with gr.Column(scale=1):
|
199 |
model_choice = gr.Dropdown(
|
200 |
+
choices=["Camel-Doc-OCR-080125",
|
201 |
"MonkeyOCR-Recognition",
|
202 |
"Nanonets-OCR-s",
|
203 |
+
"Megalodon-OCR-Sync-0713",
|
204 |
+
"OCRFlux-3B"],
|
205 |
label="Select Model", value="Nanonets-OCR-s"
|
206 |
)
|
207 |
task_choice = gr.Dropdown(
|