Spaces:

prithivMLmods
/

Tiny-VLMs-Lab

Running on Zero

App Files Files Community

prithivMLmods commited on 14 days ago

Commit

6f2faf1

verified ·

1 Parent(s): df80f25

Update app.py

Browse files

Files changed (1) hide show

app.py +7 -8

app.py CHANGED Viewed

@@ -17,6 +17,7 @@ from PIL import Image
 from transformers import (
     Qwen2VLForConditionalGeneration,
     Qwen2_5_VLForConditionalGeneration,
     AutoModelForImageTextToText,
     AutoProcessor,
     TextIteratorStreamer,
@@ -91,18 +92,16 @@ model_i = Qwen2_5_VLForConditionalGeneration.from_pretrained(
 #-----------------------------------------#
-# Load MonkeyOCR
-MODEL_ID_J = "echo840/MonkeyOCR-pro-1.2B"
-SUBFOLDER = "Recognition"
 processor_j = AutoProcessor.from_pretrained(
     MODEL_ID_J,
     trust_remote_code=True,
-    subfolder=SUBFOLDER
 )
-model_j = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     MODEL_ID_J,
     trust_remote_code=True,
-    subfolder=SUBFOLDER,
     torch_dtype=torch.float16
 ).to(device).eval()
 #-----------------------------------------#
@@ -157,7 +156,7 @@ def process_document_stream(model_name: str, task_choice: str, image: Image.Imag
     elif model_name == "Nanonets-OCR-s": processor, model = processor_c, model_c
     elif model_name == "MonkeyOCR-Recognition": processor, model = processor_g, model_g
     elif model_name == "olmOCR-7B-0725": processor, model = processor_i, model_i
-    elif model_name == "MonkeyOCR-pro-1.2B": processor, model = processor_j, model_j
     else:
         yield "Invalid model selected.", "Invalid model selected.", None
         return
@@ -226,7 +225,7 @@ def create_gradio_interface():
                              "MonkeyOCR-Recognition",
                              "olmOCR-7B-0725",
                              "Nanonets-OCR-s",
-                             "MonkeyOCR-pro-1.2B",
                              "Megalodon-OCR-Sync-0713"
                             ],
                     label="Select Model", value="Nanonets-OCR-s"

 from transformers import (
     Qwen2VLForConditionalGeneration,
     Qwen2_5_VLForConditionalGeneration,
+    AutoModelForVision2Seq,
     AutoModelForImageTextToText,
     AutoProcessor,
     TextIteratorStreamer,
 #-----------------------------------------#
+# Load NuExtract-2.0-4B
+MODEL_ID_J = "numind/NuExtract-2.0-4B"
 processor_j = AutoProcessor.from_pretrained(
     MODEL_ID_J,
     trust_remote_code=True,
 )
+model_j = AutoModelForVision2Seq.from_pretrained(
     MODEL_ID_J,
     trust_remote_code=True,
+    attn_implementation="flash_attention_2",
     torch_dtype=torch.float16
 ).to(device).eval()
 #-----------------------------------------#
     elif model_name == "Nanonets-OCR-s": processor, model = processor_c, model_c
     elif model_name == "MonkeyOCR-Recognition": processor, model = processor_g, model_g
     elif model_name == "olmOCR-7B-0725": processor, model = processor_i, model_i
+    elif model_name == "NuExtract-2.0-4B": processor, model = processor_j, model_j
     else:
         yield "Invalid model selected.", "Invalid model selected.", None
         return
                              "MonkeyOCR-Recognition",
                              "olmOCR-7B-0725",
                              "Nanonets-OCR-s",
+                             "NuExtract-2.0-4B",
                              "Megalodon-OCR-Sync-0713"
                             ],
                     label="Select Model", value="Nanonets-OCR-s"