prithivMLmods commited on
Commit
5b3dc18
·
verified ·
1 Parent(s): d7f9147

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -9
app.py CHANGED
@@ -20,6 +20,7 @@ import fitz
20
 
21
  from transformers import (
22
  Qwen2_5_VLForConditionalGeneration,
 
23
  AutoModelForCausalLM,
24
  AutoModelForVision2Seq,
25
  AutoModelForImageTextToText,
@@ -78,16 +79,16 @@ model_g = Qwen2_5_VLForConditionalGeneration.from_pretrained(
78
  MODEL_ID_G, trust_remote_code=True, subfolder=SUBFOLDER, torch_dtype=torch.float16
79
  ).to(device).eval()
80
 
81
- MODEL_ID_I = "microsoft/Florence-2-large"
82
  processor_i = AutoProcessor.from_pretrained(MODEL_ID_I, trust_remote_code=True)
83
- model_i = AutoModelForCausalLM.from_pretrained(
84
- MODEL_ID_I, trust_remote_code=True, torch_dtype=torch.float16, _attn_implementation="flash_attention_2"
85
  ).to(device).eval()
86
 
87
- MODEL_ID_A = "microsoft/Florence-2-base"
88
  processor_a = AutoProcessor.from_pretrained(MODEL_ID_A, trust_remote_code=True)
89
- model_a = AutoModelForCausalLM.from_pretrained(
90
- MODEL_ID_A, trust_remote_code=True, torch_dtype=torch.float16, _attn_implementation="flash_attention_2"
91
  ).to(device).eval()
92
 
93
  # --- PDF Generation and Preview Utility Function ---
@@ -184,8 +185,8 @@ def process_document_stream(
184
  elif model_name == "LFM2-VL-1.6B": processor, model = processor_t, model_t
185
  elif model_name == "SmolVLM-Instruct-250M": processor, model = processor_c, model_c
186
  elif model_name == "MonkeyOCR-pro-1.2B": processor, model = processor_g, model_g
187
- elif model_name == "Florence-2-large": processor, model = processor_i, model_i
188
- elif model_name == "Florence-2-base": processor, model = processor_a, model_a
189
  else:
190
  yield "Invalid model selected.", ""
191
  return
@@ -242,7 +243,7 @@ def create_gradio_interface():
242
  # Left Column (Inputs)
243
  with gr.Column(scale=1):
244
  model_choice = gr.Dropdown(
245
- choices=["LFM2-VL-1.6B", "LFM2-VL-450M", "SmolVLM-Instruct-250M", "Florence-2-large", "MonkeyOCR-pro-1.2B", "Florence-2-base"],
246
  label="Select Model", value="LFM2-VL-1.6B"
247
  )
248
  prompt_input = gr.Textbox(label="Query Input", placeholder="✦︎ Enter your query")
 
20
 
21
  from transformers import (
22
  Qwen2_5_VLForConditionalGeneration,
23
+ Blip2ForConditionalGeneration,
24
  AutoModelForCausalLM,
25
  AutoModelForVision2Seq,
26
  AutoModelForImageTextToText,
 
79
  MODEL_ID_G, trust_remote_code=True, subfolder=SUBFOLDER, torch_dtype=torch.float16
80
  ).to(device).eval()
81
 
82
+ MODEL_ID_I = "Salesforce/blip2-opt-2.7b"
83
  processor_i = AutoProcessor.from_pretrained(MODEL_ID_I, trust_remote_code=True)
84
+ model_i = Blip2ForConditionalGeneration.from_pretrained(
85
+ MODEL_ID_I, trust_remote_code=True, torch_dtype=torch.float16
86
  ).to(device).eval()
87
 
88
+ MODEL_ID_A = "facebook/Perception-LM-1B"
89
  processor_a = AutoProcessor.from_pretrained(MODEL_ID_A, trust_remote_code=True)
90
+ model_a = AutoModelForImageTextToText.from_pretrained(
91
+ MODEL_ID_A, trust_remote_code=True, torch_dtype=torch.float16
92
  ).to(device).eval()
93
 
94
  # --- PDF Generation and Preview Utility Function ---
 
185
  elif model_name == "LFM2-VL-1.6B": processor, model = processor_t, model_t
186
  elif model_name == "SmolVLM-Instruct-250M": processor, model = processor_c, model_c
187
  elif model_name == "MonkeyOCR-pro-1.2B": processor, model = processor_g, model_g
188
+ elif model_name == "blip2-opt-2.7b": processor, model = processor_i, model_i
189
+ elif model_name == "Perception-LM-1B": processor, model = processor_a, model_a
190
  else:
191
  yield "Invalid model selected.", ""
192
  return
 
243
  # Left Column (Inputs)
244
  with gr.Column(scale=1):
245
  model_choice = gr.Dropdown(
246
+ choices=["LFM2-VL-1.6B", "LFM2-VL-450M", "SmolVLM-Instruct-250M", "blip2-opt-2.7b", "MonkeyOCR-pro-1.2B", "Perception-LM-1B"],
247
  label="Select Model", value="LFM2-VL-1.6B"
248
  )
249
  prompt_input = gr.Textbox(label="Query Input", placeholder="✦︎ Enter your query")