prithivMLmods commited on
Commit
e5b26a3
·
verified ·
1 Parent(s): 5b3dc18

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -8
app.py CHANGED
@@ -20,7 +20,7 @@ import fitz
20
 
21
  from transformers import (
22
  Qwen2_5_VLForConditionalGeneration,
23
- Blip2ForConditionalGeneration,
24
  AutoModelForCausalLM,
25
  AutoModelForVision2Seq,
26
  AutoModelForImageTextToText,
@@ -79,18 +79,24 @@ model_g = Qwen2_5_VLForConditionalGeneration.from_pretrained(
79
  MODEL_ID_G, trust_remote_code=True, subfolder=SUBFOLDER, torch_dtype=torch.float16
80
  ).to(device).eval()
81
 
82
- MODEL_ID_I = "Salesforce/blip2-opt-2.7b"
83
  processor_i = AutoProcessor.from_pretrained(MODEL_ID_I, trust_remote_code=True)
84
- model_i = Blip2ForConditionalGeneration.from_pretrained(
85
  MODEL_ID_I, trust_remote_code=True, torch_dtype=torch.float16
86
  ).to(device).eval()
87
 
88
- MODEL_ID_A = "facebook/Perception-LM-1B"
89
  processor_a = AutoProcessor.from_pretrained(MODEL_ID_A, trust_remote_code=True)
90
- model_a = AutoModelForImageTextToText.from_pretrained(
91
  MODEL_ID_A, trust_remote_code=True, torch_dtype=torch.float16
92
  ).to(device).eval()
93
 
 
 
 
 
 
 
94
  # --- PDF Generation and Preview Utility Function ---
95
  def generate_and_preview_pdf(image: Image.Image, text_content: str, font_size: int, line_spacing: float, alignment: str, image_size: str):
96
  """
@@ -185,8 +191,9 @@ def process_document_stream(
185
  elif model_name == "LFM2-VL-1.6B": processor, model = processor_t, model_t
186
  elif model_name == "SmolVLM-Instruct-250M": processor, model = processor_c, model_c
187
  elif model_name == "MonkeyOCR-pro-1.2B": processor, model = processor_g, model_g
188
- elif model_name == "blip2-opt-2.7b": processor, model = processor_i, model_i
189
- elif model_name == "Perception-LM-1B": processor, model = processor_a, model_a
 
190
  else:
191
  yield "Invalid model selected.", ""
192
  return
@@ -243,7 +250,8 @@ def create_gradio_interface():
243
  # Left Column (Inputs)
244
  with gr.Column(scale=1):
245
  model_choice = gr.Dropdown(
246
- choices=["LFM2-VL-1.6B", "LFM2-VL-450M", "SmolVLM-Instruct-250M", "blip2-opt-2.7b", "MonkeyOCR-pro-1.2B", "Perception-LM-1B"],
 
247
  label="Select Model", value="LFM2-VL-1.6B"
248
  )
249
  prompt_input = gr.Textbox(label="Query Input", placeholder="✦︎ Enter your query")
 
20
 
21
  from transformers import (
22
  Qwen2_5_VLForConditionalGeneration,
23
+ Qwen2VLForConditionalGeneration,
24
  AutoModelForCausalLM,
25
  AutoModelForVision2Seq,
26
  AutoModelForImageTextToText,
 
79
  MODEL_ID_G, trust_remote_code=True, subfolder=SUBFOLDER, torch_dtype=torch.float16
80
  ).to(device).eval()
81
 
82
+ MODEL_ID_I = "UCSC-VLAA/VLAA-Thinker-Qwen2VL-2B"
83
  processor_i = AutoProcessor.from_pretrained(MODEL_ID_I, trust_remote_code=True)
84
+ model_i = Qwen2VLForConditionalGeneration.from_pretrained(
85
  MODEL_ID_I, trust_remote_code=True, torch_dtype=torch.float16
86
  ).to(device).eval()
87
 
88
+ MODEL_ID_A = "nanonets/Nanonets-OCR-s"
89
  processor_a = AutoProcessor.from_pretrained(MODEL_ID_A, trust_remote_code=True)
90
+ model_a = Qwen2_5_VLForConditionalGeneration.from_pretrained(
91
  MODEL_ID_A, trust_remote_code=True, torch_dtype=torch.float16
92
  ).to(device).eval()
93
 
94
+ MODEL_ID_X = "prithivMLmods/Megalodon-OCR-Sync-0713"
95
+ processor_x = AutoProcessor.from_pretrained(MODEL_ID_X, trust_remote_code=True)
96
+ model_x = Qwen2_5_VLForConditionalGeneration.from_pretrained(
97
+ MODEL_ID_X, trust_remote_code=True, torch_dtype=torch.float16
98
+ ).to(device).eval()
99
+
100
  # --- PDF Generation and Preview Utility Function ---
101
  def generate_and_preview_pdf(image: Image.Image, text_content: str, font_size: int, line_spacing: float, alignment: str, image_size: str):
102
  """
 
191
  elif model_name == "LFM2-VL-1.6B": processor, model = processor_t, model_t
192
  elif model_name == "SmolVLM-Instruct-250M": processor, model = processor_c, model_c
193
  elif model_name == "MonkeyOCR-pro-1.2B": processor, model = processor_g, model_g
194
+ elif model_name == "VLAA-Thinker-Qwen2VL-2B": processor, model = processor_i, model_i
195
+ elif model_name == "Nanonets-OCR-s": processor, model = processor_a, model_a
196
+ elif model_name == "Megalodon-OCR-Sync-0713": processor, model = processor_x, model_x
197
  else:
198
  yield "Invalid model selected.", ""
199
  return
 
250
  # Left Column (Inputs)
251
  with gr.Column(scale=1):
252
  model_choice = gr.Dropdown(
253
+ choices=["LFM2-VL-1.6B", "LFM2-VL-450M", "SmolVLM-Instruct-250M", "VLAA-Thinker-Qwen2VL-2B", "MonkeyOCR-pro-1.2B",
254
+ "Nanonets-OCR-s", "Megalodon-OCR-Sync-0713"],
255
  label="Select Model", value="LFM2-VL-1.6B"
256
  )
257
  prompt_input = gr.Textbox(label="Query Input", placeholder="✦︎ Enter your query")