prithivMLmods commited on
Commit
2c3a91d
·
verified ·
1 Parent(s): 90601a6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -7
app.py CHANGED
@@ -19,10 +19,13 @@ import fitz
19
 
20
  from transformers import (
21
  Qwen2_5_VLForConditionalGeneration,
 
22
  AutoProcessor,
23
  TextIteratorStreamer,
24
  )
25
 
 
 
26
  from reportlab.lib.pagesizes import A4
27
  from reportlab.lib.styles import getSampleStyleSheet
28
  from reportlab.platypus import SimpleDocTemplate, Image as RLImage, Paragraph, Spacer
@@ -44,15 +47,15 @@ if torch.cuda.is_available():
44
  print("Using device:", device)
45
 
46
  # --- Model Loading ---
47
- MODEL_ID_M = "prithivMLmods/Camel-Doc-OCR-080125"
48
  processor_m = AutoProcessor.from_pretrained(MODEL_ID_M, trust_remote_code=True)
49
- model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
50
  MODEL_ID_M, trust_remote_code=True, torch_dtype=torch.float16
51
  ).to(device).eval()
52
 
53
- MODEL_ID_T = "prithivMLmods/Megalodon-OCR-Sync-0713"
54
  processor_t = AutoProcessor.from_pretrained(MODEL_ID_T, trust_remote_code=True)
55
- model_t = Qwen2_5_VLForConditionalGeneration.from_pretrained(
56
  MODEL_ID_T, trust_remote_code=True, torch_dtype=torch.float16
57
  ).to(device).eval()
58
 
@@ -168,8 +171,8 @@ def process_document_stream(
168
  yield "Please enter a prompt.", ""
169
  return
170
 
171
- if model_name == "Camel-Doc-OCR-080125": processor, model = processor_m, model_m
172
- elif model_name == "Megalodon-OCR-Sync-0713": processor, model = processor_t, model_t
173
  elif model_name == "Nanonets-OCR-s": processor, model = processor_c, model_c
174
  elif model_name == "MonkeyOCR-Recognition": processor, model = processor_g, model_g
175
  elif model_name == "olmOCR-7B-0725": processor, model = processor_i, model_i
@@ -229,7 +232,7 @@ def create_gradio_interface():
229
  # Left Column (Inputs)
230
  with gr.Column(scale=1):
231
  model_choice = gr.Dropdown(
232
- choices=["Camel-Doc-OCR-080125", "MonkeyOCR-Recognition", "olmOCR-7B-0725", "Nanonets-OCR-s", "Megalodon-OCR-Sync-0713"],
233
  label="Select Model", value="Nanonets-OCR-s"
234
  )
235
  prompt_input = gr.Textbox(label="Query Input", placeholder="✦︎ Enter your query")
 
19
 
20
  from transformers import (
21
  Qwen2_5_VLForConditionalGeneration,
22
+ AutoModelForImageTextToText,
23
  AutoProcessor,
24
  TextIteratorStreamer,
25
  )
26
 
27
+ from transformers.image_utils import load_image
28
+
29
  from reportlab.lib.pagesizes import A4
30
  from reportlab.lib.styles import getSampleStyleSheet
31
  from reportlab.platypus import SimpleDocTemplate, Image as RLImage, Paragraph, Spacer
 
47
  print("Using device:", device)
48
 
49
  # --- Model Loading ---
50
+ MODEL_ID_M = "LiquidAI/LFM2-VL-450M"
51
  processor_m = AutoProcessor.from_pretrained(MODEL_ID_M, trust_remote_code=True)
52
+ model_m = AutoModelForImageTextToText.from_pretrained(
53
  MODEL_ID_M, trust_remote_code=True, torch_dtype=torch.float16
54
  ).to(device).eval()
55
 
56
+ MODEL_ID_T = "LiquidAI/LFM2-VL-1.6B"
57
  processor_t = AutoProcessor.from_pretrained(MODEL_ID_T, trust_remote_code=True)
58
+ model_t = AutoModelForImageTextToText.from_pretrained(
59
  MODEL_ID_T, trust_remote_code=True, torch_dtype=torch.float16
60
  ).to(device).eval()
61
 
 
171
  yield "Please enter a prompt.", ""
172
  return
173
 
174
+ if model_name == "LFM2-VL-450M": processor, model = processor_m, model_m
175
+ elif model_name == "LFM2-VL-1.6B": processor, model = processor_t, model_t
176
  elif model_name == "Nanonets-OCR-s": processor, model = processor_c, model_c
177
  elif model_name == "MonkeyOCR-Recognition": processor, model = processor_g, model_g
178
  elif model_name == "olmOCR-7B-0725": processor, model = processor_i, model_i
 
232
  # Left Column (Inputs)
233
  with gr.Column(scale=1):
234
  model_choice = gr.Dropdown(
235
+ choices=["LFM2-VL-1.6B", "LFM2-VL-450M", "olmOCR-7B-0725", "Nanonets-OCR-s", "Megalodon-OCR-Sync-0713"],
236
  label="Select Model", value="Nanonets-OCR-s"
237
  )
238
  prompt_input = gr.Textbox(label="Query Input", placeholder="✦︎ Enter your query")