prithivMLmods commited on
Commit
34f3cc6
·
verified ·
1 Parent(s): 2c3a91d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -5
app.py CHANGED
@@ -19,6 +19,7 @@ import fitz
19
 
20
  from transformers import (
21
  Qwen2_5_VLForConditionalGeneration,
 
22
  AutoModelForImageTextToText,
23
  AutoProcessor,
24
  TextIteratorStreamer,
@@ -59,10 +60,10 @@ model_t = AutoModelForImageTextToText.from_pretrained(
59
  MODEL_ID_T, trust_remote_code=True, torch_dtype=torch.float16
60
  ).to(device).eval()
61
 
62
- MODEL_ID_C = "nanonets/Nanonets-OCR-s"
63
  processor_c = AutoProcessor.from_pretrained(MODEL_ID_C, trust_remote_code=True)
64
- model_c = Qwen2_5_VLForConditionalGeneration.from_pretrained(
65
- MODEL_ID_C, trust_remote_code=True, torch_dtype=torch.float16
66
  ).to(device).eval()
67
 
68
  MODEL_ID_G = "echo840/MonkeyOCR"
@@ -173,7 +174,7 @@ def process_document_stream(
173
 
174
  if model_name == "LFM2-VL-450M": processor, model = processor_m, model_m
175
  elif model_name == "LFM2-VL-1.6B": processor, model = processor_t, model_t
176
- elif model_name == "Nanonets-OCR-s": processor, model = processor_c, model_c
177
  elif model_name == "MonkeyOCR-Recognition": processor, model = processor_g, model_g
178
  elif model_name == "olmOCR-7B-0725": processor, model = processor_i, model_i
179
  else:
@@ -232,7 +233,7 @@ def create_gradio_interface():
232
  # Left Column (Inputs)
233
  with gr.Column(scale=1):
234
  model_choice = gr.Dropdown(
235
- choices=["LFM2-VL-1.6B", "LFM2-VL-450M", "olmOCR-7B-0725", "Nanonets-OCR-s", "Megalodon-OCR-Sync-0713"],
236
  label="Select Model", value="Nanonets-OCR-s"
237
  )
238
  prompt_input = gr.Textbox(label="Query Input", placeholder="✦︎ Enter your query")
 
19
 
20
  from transformers import (
21
  Qwen2_5_VLForConditionalGeneration,
22
+ AutoModelForVision2Seq,
23
  AutoModelForImageTextToText,
24
  AutoProcessor,
25
  TextIteratorStreamer,
 
60
  MODEL_ID_T, trust_remote_code=True, torch_dtype=torch.float16
61
  ).to(device).eval()
62
 
63
+ MODEL_ID_C = "HuggingFaceTB/SmolVLM-Instruct-250M"
64
  processor_c = AutoProcessor.from_pretrained(MODEL_ID_C, trust_remote_code=True)
65
+ model_c = AutoModelForVision2Seq.from_pretrained(
66
+ MODEL_ID_C, trust_remote_code=True, torch_dtype=torch.float16, _attn_implementation="flash_attention_2"
67
  ).to(device).eval()
68
 
69
  MODEL_ID_G = "echo840/MonkeyOCR"
 
174
 
175
  if model_name == "LFM2-VL-450M": processor, model = processor_m, model_m
176
  elif model_name == "LFM2-VL-1.6B": processor, model = processor_t, model_t
177
+ elif model_name == "SmolVLM-Instruct-250M": processor, model = processor_c, model_c
178
  elif model_name == "MonkeyOCR-Recognition": processor, model = processor_g, model_g
179
  elif model_name == "olmOCR-7B-0725": processor, model = processor_i, model_i
180
  else:
 
233
  # Left Column (Inputs)
234
  with gr.Column(scale=1):
235
  model_choice = gr.Dropdown(
236
+ choices=["LFM2-VL-1.6B", "LFM2-VL-450M", "SmolVLM-Instruct-250M", "olmOCR-7B-0725", "Megalodon-OCR-Sync-0713"],
237
  label="Select Model", value="Nanonets-OCR-s"
238
  )
239
  prompt_input = gr.Textbox(label="Query Input", placeholder="✦︎ Enter your query")