Spaces:

prithivMLmods
/

Tiny-VLMs-Lab

Running on Zero

App Files Files Community

prithivMLmods commited on 15 days ago

Commit

f8f5f34

verified ·

1 Parent(s): e9d3211

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -8

app.py CHANGED Viewed

@@ -6,6 +6,7 @@ import traceback
 from io import BytesIO
 from typing import Any, Dict, List, Optional, Tuple
 import re
 import fitz  # PyMuPDF
 import gradio as gr
@@ -16,6 +17,13 @@ from transformers import AutoModelForCausalLM, AutoProcessor, VisionEncoderDecod
 from huggingface_hub import snapshot_download
 from qwen_vl_utils import process_vision_info
 # JavaScript for theme refresh
 js_func = """
 function refresh() {
@@ -48,7 +56,7 @@ prompt = """Please output the layout information from the PDF image, including e
 5. Final Output: Single JSON object
 """
-# Model loading functions (from model.py)
 def load_model(model_name):
     if model_name == "dots.ocr":
         model_id = "rednote-hilab/dots.ocr"
@@ -78,7 +86,7 @@ def load_model(model_name):
         raise ValueError(f"Unknown model: {model_name}")
     return model, processor
-# Inference functions (from model.py)
 def inference_dots_ocr(model, processor, image, prompt, max_new_tokens):
     messages = [
         {
@@ -103,7 +111,7 @@ def inference_dots_ocr(model, processor, image, prompt, max_new_tokens):
         generated_ids = model.generate(
             **inputs,
             max_new_tokens=max_new_tokens,
-            do_sample=False  # Removed temperature=0.1 to fix the warning
         )
     generated_ids_trimmed = [out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
     output_text = processor.batch_decode(
@@ -291,7 +299,7 @@ def layoutjson2md(image: Image.Image, layout_data: List[Dict], text_key: str = '
                     markdown_lines.append(f"**Table:** {text}\n")
             elif category == 'Formula':
                 if text.strip().startswith('$') or '\\' in text:
-                    markdown_lines.append(f"$$    \n{text}\n    $$\n")
                 else:
                     markdown_lines.append(f"**Formula:** {text}\n")
             elif category == 'Caption':
@@ -495,7 +503,6 @@ def create_gradio_interface():
                     max_pixels = gr.Number(value=MAX_PIXELS, label="Max Pixels")
                 process_btn = gr.Button("🔥 Process Document", variant="primary", elem_classes=["process-button"], size="lg")
                 clear_btn = gr.Button("Clear Document", variant="secondary")
             with gr.Column(scale=2):
                 with gr.Tabs():
                     with gr.Tab("✦︎ Processed Image"):
@@ -504,14 +511,12 @@ def create_gradio_interface():
                         markdown_output = gr.Markdown(value="Click 'Process Document' to see extracted content...", height=500)
                     with gr.Tab("⏲ Layout JSON"):
                         json_output = gr.JSON(label="Layout Analysis Results", value=None)
                 with gr.Row():
                     examples = gr.Examples(
                         examples=["examples/sample_image1.png", "examples/sample_image2.png", "examples/sample_pdf.pdf"],
                         inputs=file_input,
                         label="Example Documents"
-                )
         def handle_file_upload(file_path):
             image, page_info = load_file_for_preview(file_path)
             return image, page_info

 from io import BytesIO
 from typing import Any, Dict, List, Optional, Tuple
 import re
+import warnings
 import fitz  # PyMuPDF
 import gradio as gr
 from huggingface_hub import snapshot_download
 from qwen_vl_utils import process_vision_info
+# Suppress the FutureWarning for cleaner output (optional)
+warnings.filterwarnings(
+    "ignore",
+    category=FutureWarning,
+    message="Both `num_logits_to_keep` and `logits_to_keep` are set"
+)
 # JavaScript for theme refresh
 js_func = """
 function refresh() {
 5. Final Output: Single JSON object
 """
+# Model loading functions
 def load_model(model_name):
     if model_name == "dots.ocr":
         model_id = "rednote-hilab/dots.ocr"
         raise ValueError(f"Unknown model: {model_name}")
     return model, processor
+# Inference functions
 def inference_dots_ocr(model, processor, image, prompt, max_new_tokens):
     messages = [
         {
         generated_ids = model.generate(
             **inputs,
             max_new_tokens=max_new_tokens,
+            do_sample=False  # Temperature removed previously to fix another warning
         )
     generated_ids_trimmed = [out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
     output_text = processor.batch_decode(
                     markdown_lines.append(f"**Table:** {text}\n")
             elif category == 'Formula':
                 if text.strip().startswith('$') or '\\' in text:
+                    markdown_lines.append(f"$$  \n{text}\n  $$\n")  # Fixed formatting, removed extra spaces
                 else:
                     markdown_lines.append(f"**Formula:** {text}\n")
             elif category == 'Caption':
                     max_pixels = gr.Number(value=MAX_PIXELS, label="Max Pixels")
                 process_btn = gr.Button("🔥 Process Document", variant="primary", elem_classes=["process-button"], size="lg")
                 clear_btn = gr.Button("Clear Document", variant="secondary")
             with gr.Column(scale=2):
                 with gr.Tabs():
                     with gr.Tab("✦︎ Processed Image"):
                         markdown_output = gr.Markdown(value="Click 'Process Document' to see extracted content...", height=500)
                     with gr.Tab("⏲ Layout JSON"):
                         json_output = gr.JSON(label="Layout Analysis Results", value=None)
                 with gr.Row():
                     examples = gr.Examples(
                         examples=["examples/sample_image1.png", "examples/sample_image2.png", "examples/sample_pdf.pdf"],
                         inputs=file_input,
                         label="Example Documents"
+                    )
         def handle_file_upload(file_path):
             image, page_info = load_file_for_preview(file_path)
             return image, page_info