prithivMLmods commited on
Commit
70a54c4
·
verified ·
1 Parent(s): 154496b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -3
app.py CHANGED
@@ -116,6 +116,16 @@ moondream = AutoModelForCausalLM.from_pretrained(
116
  )
117
  tokenizer_md = AutoTokenizer.from_pretrained(MODEL_ID_MD, revision=REVISION_MD)
118
 
 
 
 
 
 
 
 
 
 
 
119
 
120
  # --- PDF Generation and Preview Utility Function ---
121
  def generate_and_preview_pdf(image: Image.Image, text_content: str, font_size: int, line_spacing: float, alignment: str, image_size: str):
@@ -217,7 +227,8 @@ def process_document_stream(
217
  yield answer, answer
218
  return
219
 
220
- if model_name == "LFM2-VL-450M(fast)": processor, model = processor_m, model_m
 
221
  elif model_name == "LFM2-VL-1.6B(fast)": processor, model = processor_t, model_t
222
  elif model_name == "ShotVL-3B(cinematic)": processor, model = processor_z, model_z
223
  elif model_name == "SmolVLM-Instruct-250M(smol)": processor, model = processor_c, model_c
@@ -281,9 +292,9 @@ def create_gradio_interface():
281
  # Left Column (Inputs)
282
  with gr.Column(scale=1):
283
  model_choice = gr.Dropdown(
284
- choices=["LFM2-VL-450M(fast)", "LFM2-VL-1.6B(fast)", "SmolVLM-Instruct-250M(smol)", "Moondream2(vision)", "ShotVL-3B(cinematic)", "Megalodon-OCR-Sync-0713(ocr)",
285
  "VLAA-Thinker-Qwen2VL-2B(reason)", "MonkeyOCR-pro-1.2B(ocr)", "Nanonets-OCR-s(ocr)"],
286
- label="Select Model", value= "LFM2-VL-450M(fast)"
287
  )
288
  prompt_input = gr.Textbox(label="Query Input", placeholder="✦︎ Enter your query", value="Describe the image!")
289
  image_input = gr.Image(label="Upload Image", type="pil", sources=['upload'])
 
116
  )
117
  tokenizer_md = AutoTokenizer.from_pretrained(MODEL_ID_MD, revision=REVISION_MD)
118
 
119
+ # --- SmolVLM2 Model Loading ---
120
+ MODEL_ID_S2 = "HuggingFaceTB/SmolVLM2-2.2B-Instruct"
121
+ processor_s2 = AutoProcessor.from_pretrained(MODEL_ID_S2, trust_remote_code=True)
122
+ model_s2 = AutoModelForImageTextToText.from_pretrained(
123
+ MODEL_ID_S2,
124
+ trust_remote_code=True,
125
+ _attn_implementation="flash_attention_2",
126
+ torch_dtype=torch.float16
127
+ ).to(device).eval()
128
+
129
 
130
  # --- PDF Generation and Preview Utility Function ---
131
  def generate_and_preview_pdf(image: Image.Image, text_content: str, font_size: int, line_spacing: float, alignment: str, image_size: str):
 
227
  yield answer, answer
228
  return
229
 
230
+ if model_name == "SmolVLM2-2.2B-Instruct(smol)": processor, model = processor_s2, model_s2
231
+ elif model_name == "LFM2-VL-450M(fast)": processor, model = processor_m, model_m
232
  elif model_name == "LFM2-VL-1.6B(fast)": processor, model = processor_t, model_t
233
  elif model_name == "ShotVL-3B(cinematic)": processor, model = processor_z, model_z
234
  elif model_name == "SmolVLM-Instruct-250M(smol)": processor, model = processor_c, model_c
 
292
  # Left Column (Inputs)
293
  with gr.Column(scale=1):
294
  model_choice = gr.Dropdown(
295
+ choices=["SmolVLM2-2.2B-Instruct(smol)", "LFM2-VL-450M(fast)", "LFM2-VL-1.6B(fast)", "SmolVLM-Instruct-250M(smol)", "Moondream2(vision)", "ShotVL-3B(cinematic)", "Megalodon-OCR-Sync-0713(ocr)",
296
  "VLAA-Thinker-Qwen2VL-2B(reason)", "MonkeyOCR-pro-1.2B(ocr)", "Nanonets-OCR-s(ocr)"],
297
+ label="Select Model", value= "SmolVLM2-2.2B-Instruct(smol)"
298
  )
299
  prompt_input = gr.Textbox(label="Query Input", placeholder="✦︎ Enter your query", value="Describe the image!")
300
  image_input = gr.Image(label="Upload Image", type="pil", sources=['upload'])