Spaces:

prithivMLmods
/

Tiny-VLMs-Lab

Running on Zero

App Files Files Community

prithivMLmods commited on 13 days ago

Commit

18c8c76

verified ·

1 Parent(s): 4d0a926

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -4

app.py CHANGED Viewed

@@ -50,7 +50,7 @@ layout_prompt = """Please output the layout information from the image, includin
 ocr_prompt = "Perform precise OCR on the image. Extract all text content, maintaining the original structure, paragraphs, and tables as formatted markdown."
 # --- Model Loading ---
-MODEL_ID_M = "prithivMLmods/Camel-Doc-OCR-062825"
 processor_m = AutoProcessor.from_pretrained(MODEL_ID_M, trust_remote_code=True)
 model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     MODEL_ID_M, trust_remote_code=True, torch_dtype=torch.float16
@@ -77,6 +77,12 @@ model_g = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     MODEL_ID_G, trust_remote_code=True, subfolder=SUBFOLDER, torch_dtype=torch.float16
 ).to(device).eval()
 # --- Utility Functions ---
 def layoutjson2md(layout_data: List[Dict]) -> str:
     """Converts the structured JSON from Layout Analysis into formatted Markdown."""
@@ -122,10 +128,11 @@ def process_document_stream(model_name: str, task_choice: str, image: Image.Imag
     text_prompt = ocr_prompt if task_choice == "Content Extraction" else layout_prompt
     # 2. Select model and processor
-    if model_name == "Camel-Doc-OCR-062825": processor, model = processor_m, model_m
     elif model_name == "Megalodon-OCR-Sync-0713": processor, model = processor_t, model_t
     elif model_name == "Nanonets-OCR-s": processor, model = processor_c, model_c
     elif model_name == "MonkeyOCR-Recognition": processor, model = processor_g, model_g
     else:
         yield "Invalid model selected.", "Invalid model selected.", None
         return
@@ -190,10 +197,11 @@ def create_gradio_interface():
             # Left Column (Inputs)
             with gr.Column(scale=1):
                 model_choice = gr.Dropdown(
-                    choices=["Camel-Doc-OCR-062825",
                              "MonkeyOCR-Recognition",
                              "Nanonets-OCR-s",
-                             "Megalodon-OCR-Sync-0713"],
                     label="Select Model", value="Nanonets-OCR-s"
                 )
                 task_choice = gr.Dropdown(

 ocr_prompt = "Perform precise OCR on the image. Extract all text content, maintaining the original structure, paragraphs, and tables as formatted markdown."
 # --- Model Loading ---
+MODEL_ID_M = "prithivMLmods/Camel-Doc-OCR-080125"
 processor_m = AutoProcessor.from_pretrained(MODEL_ID_M, trust_remote_code=True)
 model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     MODEL_ID_M, trust_remote_code=True, torch_dtype=torch.float16
     MODEL_ID_G, trust_remote_code=True, subfolder=SUBFOLDER, torch_dtype=torch.float16
 ).to(device).eval()
+MODEL_ID_I = "ChatDOC/OCRFlux-3B"
+processor_i = AutoProcessor.from_pretrained(MODEL_ID_I, trust_remote_code=True)
+model_i = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+    MODEL_ID_I, trust_remote_code=True, torch_dtype=torch.float16
+).to(device).eval()
 # --- Utility Functions ---
 def layoutjson2md(layout_data: List[Dict]) -> str:
     """Converts the structured JSON from Layout Analysis into formatted Markdown."""
     text_prompt = ocr_prompt if task_choice == "Content Extraction" else layout_prompt
     # 2. Select model and processor
+    if model_name == "Camel-Doc-OCR-080125": processor, model = processor_m, model_m
     elif model_name == "Megalodon-OCR-Sync-0713": processor, model = processor_t, model_t
     elif model_name == "Nanonets-OCR-s": processor, model = processor_c, model_c
     elif model_name == "MonkeyOCR-Recognition": processor, model = processor_g, model_g
+    elif model_name == "OCRFlux-3B": processor, model = processor_i, model_i
     else:
         yield "Invalid model selected.", "Invalid model selected.", None
         return
             # Left Column (Inputs)
             with gr.Column(scale=1):
                 model_choice = gr.Dropdown(
+                    choices=["Camel-Doc-OCR-080125",
                              "MonkeyOCR-Recognition",
                              "Nanonets-OCR-s",
+                             "Megalodon-OCR-Sync-0713",
+                             "OCRFlux-3B"],
                     label="Select Model", value="Nanonets-OCR-s"
                 )
                 task_choice = gr.Dropdown(