prithivMLmods commited on
Commit
18c8c76
·
verified ·
1 Parent(s): 4d0a926

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -4
app.py CHANGED
@@ -50,7 +50,7 @@ layout_prompt = """Please output the layout information from the image, includin
50
  ocr_prompt = "Perform precise OCR on the image. Extract all text content, maintaining the original structure, paragraphs, and tables as formatted markdown."
51
 
52
  # --- Model Loading ---
53
- MODEL_ID_M = "prithivMLmods/Camel-Doc-OCR-062825"
54
  processor_m = AutoProcessor.from_pretrained(MODEL_ID_M, trust_remote_code=True)
55
  model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
56
  MODEL_ID_M, trust_remote_code=True, torch_dtype=torch.float16
@@ -77,6 +77,12 @@ model_g = Qwen2_5_VLForConditionalGeneration.from_pretrained(
77
  MODEL_ID_G, trust_remote_code=True, subfolder=SUBFOLDER, torch_dtype=torch.float16
78
  ).to(device).eval()
79
 
 
 
 
 
 
 
80
  # --- Utility Functions ---
81
  def layoutjson2md(layout_data: List[Dict]) -> str:
82
  """Converts the structured JSON from Layout Analysis into formatted Markdown."""
@@ -122,10 +128,11 @@ def process_document_stream(model_name: str, task_choice: str, image: Image.Imag
122
  text_prompt = ocr_prompt if task_choice == "Content Extraction" else layout_prompt
123
 
124
  # 2. Select model and processor
125
- if model_name == "Camel-Doc-OCR-062825": processor, model = processor_m, model_m
126
  elif model_name == "Megalodon-OCR-Sync-0713": processor, model = processor_t, model_t
127
  elif model_name == "Nanonets-OCR-s": processor, model = processor_c, model_c
128
  elif model_name == "MonkeyOCR-Recognition": processor, model = processor_g, model_g
 
129
  else:
130
  yield "Invalid model selected.", "Invalid model selected.", None
131
  return
@@ -190,10 +197,11 @@ def create_gradio_interface():
190
  # Left Column (Inputs)
191
  with gr.Column(scale=1):
192
  model_choice = gr.Dropdown(
193
- choices=["Camel-Doc-OCR-062825",
194
  "MonkeyOCR-Recognition",
195
  "Nanonets-OCR-s",
196
- "Megalodon-OCR-Sync-0713"],
 
197
  label="Select Model", value="Nanonets-OCR-s"
198
  )
199
  task_choice = gr.Dropdown(
 
50
  ocr_prompt = "Perform precise OCR on the image. Extract all text content, maintaining the original structure, paragraphs, and tables as formatted markdown."
51
 
52
  # --- Model Loading ---
53
+ MODEL_ID_M = "prithivMLmods/Camel-Doc-OCR-080125"
54
  processor_m = AutoProcessor.from_pretrained(MODEL_ID_M, trust_remote_code=True)
55
  model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
56
  MODEL_ID_M, trust_remote_code=True, torch_dtype=torch.float16
 
77
  MODEL_ID_G, trust_remote_code=True, subfolder=SUBFOLDER, torch_dtype=torch.float16
78
  ).to(device).eval()
79
 
80
+ MODEL_ID_I = "ChatDOC/OCRFlux-3B"
81
+ processor_i = AutoProcessor.from_pretrained(MODEL_ID_I, trust_remote_code=True)
82
+ model_i = Qwen2_5_VLForConditionalGeneration.from_pretrained(
83
+ MODEL_ID_I, trust_remote_code=True, torch_dtype=torch.float16
84
+ ).to(device).eval()
85
+
86
  # --- Utility Functions ---
87
  def layoutjson2md(layout_data: List[Dict]) -> str:
88
  """Converts the structured JSON from Layout Analysis into formatted Markdown."""
 
128
  text_prompt = ocr_prompt if task_choice == "Content Extraction" else layout_prompt
129
 
130
  # 2. Select model and processor
131
+ if model_name == "Camel-Doc-OCR-080125": processor, model = processor_m, model_m
132
  elif model_name == "Megalodon-OCR-Sync-0713": processor, model = processor_t, model_t
133
  elif model_name == "Nanonets-OCR-s": processor, model = processor_c, model_c
134
  elif model_name == "MonkeyOCR-Recognition": processor, model = processor_g, model_g
135
+ elif model_name == "OCRFlux-3B": processor, model = processor_i, model_i
136
  else:
137
  yield "Invalid model selected.", "Invalid model selected.", None
138
  return
 
197
  # Left Column (Inputs)
198
  with gr.Column(scale=1):
199
  model_choice = gr.Dropdown(
200
+ choices=["Camel-Doc-OCR-080125",
201
  "MonkeyOCR-Recognition",
202
  "Nanonets-OCR-s",
203
+ "Megalodon-OCR-Sync-0713",
204
+ "OCRFlux-3B"],
205
  label="Select Model", value="Nanonets-OCR-s"
206
  )
207
  task_choice = gr.Dropdown(