Spaces:

prithivMLmods
/

Tiny-VLMs-Lab

Running on Zero

App Files Files Community

prithivMLmods commited on 14 days ago

Commit

260ceda

verified ·

1 Parent(s): 79ad69c

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -0

app.py CHANGED Viewed

@@ -13,12 +13,18 @@ import gradio as gr
 import requests
 import torch
 from PIL import Image
 from transformers import (
     Qwen2_5_VLForConditionalGeneration,
     AutoProcessor,
     TextIteratorStreamer,
 )
 js_func = """
 function refresh() {
     const url = new URL(window.location);
@@ -83,6 +89,24 @@ model_i = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     MODEL_ID_I, trust_remote_code=True, torch_dtype=torch.float16
 ).to(device).eval()
 # --- Utility Functions ---
 def layoutjson2md(layout_data: List[Dict]) -> str:
     """Converts the structured JSON from Layout Analysis into formatted Markdown."""
@@ -133,6 +157,7 @@ def process_document_stream(model_name: str, task_choice: str, image: Image.Imag
     elif model_name == "Nanonets-OCR-s": processor, model = processor_c, model_c
     elif model_name == "MonkeyOCR-Recognition": processor, model = processor_g, model_g
     elif model_name == "olmOCR-7B-0725": processor, model = processor_i, model_i
     else:
         yield "Invalid model selected.", "Invalid model selected.", None
         return
@@ -201,6 +226,7 @@ def create_gradio_interface():
                              "MonkeyOCR-Recognition",
                              "olmOCR-7B-0725",
                              "Nanonets-OCR-s",
                              "Megalodon-OCR-Sync-0713"
                             ],
                     label="Select Model", value="Nanonets-OCR-s"

 import requests
 import torch
 from PIL import Image
 from transformers import (
+    Qwen2VLForConditionalGeneration,
     Qwen2_5_VLForConditionalGeneration,
+    AutoModelForImageTextToText,
     AutoProcessor,
     TextIteratorStreamer,
+    AutoModel,
+    AutoTokenizer,
 )
+# --- Activate Forced Dark Mode ---
 js_func = """
 function refresh() {
     const url = new URL(window.location);
     MODEL_ID_I, trust_remote_code=True, torch_dtype=torch.float16
 ).to(device).eval()
+#-----------------------------------------#
+# Load MonkeyOCR
+MODEL_ID_J = "echo840/MonkeyOCR-pro-1.2B"
+SUBFOLDER = "Recognition"
+processor_j = AutoProcessor.from_pretrained(
+    MODEL_ID_J,
+    trust_remote_code=True,
+    subfolder=SUBFOLDER
+)
+model_j = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+    MODEL_ID_J,
+    trust_remote_code=True,
+    subfolder=SUBFOLDER,
+    torch_dtype=torch.float16
+).to(device).eval()
+#-----------------------------------------#
 # --- Utility Functions ---
 def layoutjson2md(layout_data: List[Dict]) -> str:
     """Converts the structured JSON from Layout Analysis into formatted Markdown."""
     elif model_name == "Nanonets-OCR-s": processor, model = processor_c, model_c
     elif model_name == "MonkeyOCR-Recognition": processor, model = processor_g, model_g
     elif model_name == "olmOCR-7B-0725": processor, model = processor_i, model_i
+    elif model_name == "MonkeyOCR-pro-1.2B": processor, model = processor_j, model_j
     else:
         yield "Invalid model selected.", "Invalid model selected.", None
         return
                              "MonkeyOCR-Recognition",
                              "olmOCR-7B-0725",
                              "Nanonets-OCR-s",
+                             "MonkeyOCR-pro-1.2B",
                              "Megalodon-OCR-Sync-0713"
                             ],
                     label="Select Model", value="Nanonets-OCR-s"