prithivMLmods commited on
Commit
260ceda
·
verified ·
1 Parent(s): 79ad69c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -0
app.py CHANGED
@@ -13,12 +13,18 @@ import gradio as gr
13
  import requests
14
  import torch
15
  from PIL import Image
 
16
  from transformers import (
 
17
  Qwen2_5_VLForConditionalGeneration,
 
18
  AutoProcessor,
19
  TextIteratorStreamer,
 
 
20
  )
21
 
 
22
  js_func = """
23
  function refresh() {
24
  const url = new URL(window.location);
@@ -83,6 +89,24 @@ model_i = Qwen2_5_VLForConditionalGeneration.from_pretrained(
83
  MODEL_ID_I, trust_remote_code=True, torch_dtype=torch.float16
84
  ).to(device).eval()
85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  # --- Utility Functions ---
87
  def layoutjson2md(layout_data: List[Dict]) -> str:
88
  """Converts the structured JSON from Layout Analysis into formatted Markdown."""
@@ -133,6 +157,7 @@ def process_document_stream(model_name: str, task_choice: str, image: Image.Imag
133
  elif model_name == "Nanonets-OCR-s": processor, model = processor_c, model_c
134
  elif model_name == "MonkeyOCR-Recognition": processor, model = processor_g, model_g
135
  elif model_name == "olmOCR-7B-0725": processor, model = processor_i, model_i
 
136
  else:
137
  yield "Invalid model selected.", "Invalid model selected.", None
138
  return
@@ -201,6 +226,7 @@ def create_gradio_interface():
201
  "MonkeyOCR-Recognition",
202
  "olmOCR-7B-0725",
203
  "Nanonets-OCR-s",
 
204
  "Megalodon-OCR-Sync-0713"
205
  ],
206
  label="Select Model", value="Nanonets-OCR-s"
 
13
  import requests
14
  import torch
15
  from PIL import Image
16
+
17
  from transformers import (
18
+ Qwen2VLForConditionalGeneration,
19
  Qwen2_5_VLForConditionalGeneration,
20
+ AutoModelForImageTextToText,
21
  AutoProcessor,
22
  TextIteratorStreamer,
23
+ AutoModel,
24
+ AutoTokenizer,
25
  )
26
 
27
+ # --- Activate Forced Dark Mode ---
28
  js_func = """
29
  function refresh() {
30
  const url = new URL(window.location);
 
89
  MODEL_ID_I, trust_remote_code=True, torch_dtype=torch.float16
90
  ).to(device).eval()
91
 
92
+ #-----------------------------------------#
93
+
94
+ # Load MonkeyOCR
95
+ MODEL_ID_J = "echo840/MonkeyOCR-pro-1.2B"
96
+ SUBFOLDER = "Recognition"
97
+ processor_j = AutoProcessor.from_pretrained(
98
+ MODEL_ID_J,
99
+ trust_remote_code=True,
100
+ subfolder=SUBFOLDER
101
+ )
102
+ model_j = Qwen2_5_VLForConditionalGeneration.from_pretrained(
103
+ MODEL_ID_J,
104
+ trust_remote_code=True,
105
+ subfolder=SUBFOLDER,
106
+ torch_dtype=torch.float16
107
+ ).to(device).eval()
108
+ #-----------------------------------------#
109
+
110
  # --- Utility Functions ---
111
  def layoutjson2md(layout_data: List[Dict]) -> str:
112
  """Converts the structured JSON from Layout Analysis into formatted Markdown."""
 
157
  elif model_name == "Nanonets-OCR-s": processor, model = processor_c, model_c
158
  elif model_name == "MonkeyOCR-Recognition": processor, model = processor_g, model_g
159
  elif model_name == "olmOCR-7B-0725": processor, model = processor_i, model_i
160
+ elif model_name == "MonkeyOCR-pro-1.2B": processor, model = processor_j, model_j
161
  else:
162
  yield "Invalid model selected.", "Invalid model selected.", None
163
  return
 
226
  "MonkeyOCR-Recognition",
227
  "olmOCR-7B-0725",
228
  "Nanonets-OCR-s",
229
+ "MonkeyOCR-pro-1.2B",
230
  "Megalodon-OCR-Sync-0713"
231
  ],
232
  label="Select Model", value="Nanonets-OCR-s"