prithivMLmods commited on
Commit
89e2ec5
·
verified ·
1 Parent(s): df3fc87

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -6
app.py CHANGED
@@ -9,6 +9,7 @@ from PIL import Image
9
  import cv2
10
  from transformers import (
11
  Qwen2_5_VLForConditionalGeneration,
 
12
  AutoProcessor,
13
  TextIteratorStreamer,
14
  )
@@ -41,7 +42,7 @@ model_x = Qwen2_5_VLForConditionalGeneration.from_pretrained(
41
  torch_dtype=torch.float16
42
  ).to(device).eval()
43
 
44
- # Load OCRFlux-3B
45
  MODEL_ID_T = "prithivMLmods/Megalodon-OCR-Sync-0713"
46
  processor_t = AutoProcessor.from_pretrained(MODEL_ID_T, trust_remote_code=True)
47
  model_t = Qwen2_5_VLForConditionalGeneration.from_pretrained(
@@ -51,9 +52,9 @@ model_t = Qwen2_5_VLForConditionalGeneration.from_pretrained(
51
  ).to(device).eval()
52
 
53
  # Load ShotVL-7B
54
- MODEL_ID_S = "Vchitect/ShotVL-7B"
55
  processor_s = AutoProcessor.from_pretrained(MODEL_ID_S, trust_remote_code=True)
56
- model_s = Qwen2_5_VLForConditionalGeneration.from_pretrained(
57
  MODEL_ID_S,
58
  trust_remote_code=True,
59
  torch_dtype=torch.float16
@@ -98,7 +99,7 @@ def generate_image(model_name: str, text: str, image: Image.Image,
98
  elif model_name == "Megalodon-OCR-Sync-0713":
99
  processor = processor_t
100
  model = model_t
101
- elif model_name == "ShotVL-7B":
102
  processor = processor_s
103
  model = model_s
104
  else:
@@ -154,7 +155,7 @@ def generate_video(model_name: str, text: str, video_path: str,
154
  elif model_name == "Megalodon-OCR-Sync-0713":
155
  processor = processor_t
156
  model = model_t
157
- elif model_name == "ShotVL-7B":
158
  processor = processor_s
159
  model = model_s
160
  else:
@@ -271,12 +272,28 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
271
  markdown_output = gr.Markdown(label="(Result.md)")
272
 
273
  model_choice = gr.Radio(
274
- choices=["Camel-Doc-OCR-062825", "MonkeyOCR-pro-1.2B", "Megalodon-OCR-Sync-0713", "ShotVL-7B"],
275
  label="Select Model",
276
  value="Camel-Doc-OCR-062825"
277
  )
278
 
279
  gr.Markdown("**Model Info 💻** | [Report Bug](https://huggingface.co/spaces/prithivMLmods/Multimodal-OCR-Comparator/discussions)")
280
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
281
  if __name__ == "__main__":
282
  demo.queue(max_size=30).launch(share=True, mcp_server=True, ssr_mode=False, show_error=True)
 
9
  import cv2
10
  from transformers import (
11
  Qwen2_5_VLForConditionalGeneration,
12
+ Qwen2VLForConditionalGeneration,
13
  AutoProcessor,
14
  TextIteratorStreamer,
15
  )
 
42
  torch_dtype=torch.float16
43
  ).to(device).eval()
44
 
45
+ # Load Megalodon-OCR-Sync-0713
46
  MODEL_ID_T = "prithivMLmods/Megalodon-OCR-Sync-0713"
47
  processor_t = AutoProcessor.from_pretrained(MODEL_ID_T, trust_remote_code=True)
48
  model_t = Qwen2_5_VLForConditionalGeneration.from_pretrained(
 
52
  ).to(device).eval()
53
 
54
  # Load ShotVL-7B
55
+ MODEL_ID_S = "prithivMLmods/Qwen2-VL-OCR-2B-Instruct"
56
  processor_s = AutoProcessor.from_pretrained(MODEL_ID_S, trust_remote_code=True)
57
+ model_s = Qwen2VLForConditionalGeneration.from_pretrained(
58
  MODEL_ID_S,
59
  trust_remote_code=True,
60
  torch_dtype=torch.float16
 
99
  elif model_name == "Megalodon-OCR-Sync-0713":
100
  processor = processor_t
101
  model = model_t
102
+ elif model_name == "Qwen2-VL-OCR-2B":
103
  processor = processor_s
104
  model = model_s
105
  else:
 
155
  elif model_name == "Megalodon-OCR-Sync-0713":
156
  processor = processor_t
157
  model = model_t
158
+ elif model_name == "Qwen2-VL-OCR-2B":
159
  processor = processor_s
160
  model = model_s
161
  else:
 
272
  markdown_output = gr.Markdown(label="(Result.md)")
273
 
274
  model_choice = gr.Radio(
275
+ choices=["Camel-Doc-OCR-062825", "MonkeyOCR-pro-1.2B", "Megalodon-OCR-Sync-0713", "Qwen2-VL-OCR-2B"],
276
  label="Select Model",
277
  value="Camel-Doc-OCR-062825"
278
  )
279
 
280
  gr.Markdown("**Model Info 💻** | [Report Bug](https://huggingface.co/spaces/prithivMLmods/Multimodal-OCR-Comparator/discussions)")
281
 
282
+ # Define the submit button actions
283
+ image_submit.click(fn=generate_image,
284
+ inputs=[
285
+ model_choice, image_query, image_upload,
286
+ max_new_tokens, temperature, top_p, top_k,
287
+ repetition_penalty
288
+ ],
289
+ outputs=[output, markdown_output])
290
+ video_submit.click(fn=generate_video,
291
+ inputs=[
292
+ model_choice, video_query, video_upload,
293
+ max_new_tokens, temperature, top_p, top_k,
294
+ repetition_penalty
295
+ ],
296
+ outputs=[output, markdown_output])
297
+
298
  if __name__ == "__main__":
299
  demo.queue(max_size=30).launch(share=True, mcp_server=True, ssr_mode=False, show_error=True)