Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -6,6 +6,7 @@ import traceback
|
|
6 |
from io import BytesIO
|
7 |
from typing import Any, Dict, List, Optional, Tuple
|
8 |
import re
|
|
|
9 |
|
10 |
import fitz # PyMuPDF
|
11 |
import gradio as gr
|
@@ -16,6 +17,13 @@ from transformers import AutoModelForCausalLM, AutoProcessor, VisionEncoderDecod
|
|
16 |
from huggingface_hub import snapshot_download
|
17 |
from qwen_vl_utils import process_vision_info
|
18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
# JavaScript for theme refresh
|
20 |
js_func = """
|
21 |
function refresh() {
|
@@ -48,7 +56,7 @@ prompt = """Please output the layout information from the PDF image, including e
|
|
48 |
5. Final Output: Single JSON object
|
49 |
"""
|
50 |
|
51 |
-
# Model loading functions
|
52 |
def load_model(model_name):
|
53 |
if model_name == "dots.ocr":
|
54 |
model_id = "rednote-hilab/dots.ocr"
|
@@ -78,7 +86,7 @@ def load_model(model_name):
|
|
78 |
raise ValueError(f"Unknown model: {model_name}")
|
79 |
return model, processor
|
80 |
|
81 |
-
# Inference functions
|
82 |
def inference_dots_ocr(model, processor, image, prompt, max_new_tokens):
|
83 |
messages = [
|
84 |
{
|
@@ -103,7 +111,7 @@ def inference_dots_ocr(model, processor, image, prompt, max_new_tokens):
|
|
103 |
generated_ids = model.generate(
|
104 |
**inputs,
|
105 |
max_new_tokens=max_new_tokens,
|
106 |
-
do_sample=False #
|
107 |
)
|
108 |
generated_ids_trimmed = [out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
|
109 |
output_text = processor.batch_decode(
|
@@ -291,7 +299,7 @@ def layoutjson2md(image: Image.Image, layout_data: List[Dict], text_key: str = '
|
|
291 |
markdown_lines.append(f"**Table:** {text}\n")
|
292 |
elif category == 'Formula':
|
293 |
if text.strip().startswith('$') or '\\' in text:
|
294 |
-
markdown_lines.append(f"$$
|
295 |
else:
|
296 |
markdown_lines.append(f"**Formula:** {text}\n")
|
297 |
elif category == 'Caption':
|
@@ -495,7 +503,6 @@ def create_gradio_interface():
|
|
495 |
max_pixels = gr.Number(value=MAX_PIXELS, label="Max Pixels")
|
496 |
process_btn = gr.Button("🔥 Process Document", variant="primary", elem_classes=["process-button"], size="lg")
|
497 |
clear_btn = gr.Button("Clear Document", variant="secondary")
|
498 |
-
|
499 |
with gr.Column(scale=2):
|
500 |
with gr.Tabs():
|
501 |
with gr.Tab("✦︎ Processed Image"):
|
@@ -504,14 +511,12 @@ def create_gradio_interface():
|
|
504 |
markdown_output = gr.Markdown(value="Click 'Process Document' to see extracted content...", height=500)
|
505 |
with gr.Tab("⏲ Layout JSON"):
|
506 |
json_output = gr.JSON(label="Layout Analysis Results", value=None)
|
507 |
-
|
508 |
with gr.Row():
|
509 |
examples = gr.Examples(
|
510 |
examples=["examples/sample_image1.png", "examples/sample_image2.png", "examples/sample_pdf.pdf"],
|
511 |
inputs=file_input,
|
512 |
label="Example Documents"
|
513 |
-
|
514 |
-
|
515 |
def handle_file_upload(file_path):
|
516 |
image, page_info = load_file_for_preview(file_path)
|
517 |
return image, page_info
|
|
|
6 |
from io import BytesIO
|
7 |
from typing import Any, Dict, List, Optional, Tuple
|
8 |
import re
|
9 |
+
import warnings
|
10 |
|
11 |
import fitz # PyMuPDF
|
12 |
import gradio as gr
|
|
|
17 |
from huggingface_hub import snapshot_download
|
18 |
from qwen_vl_utils import process_vision_info
|
19 |
|
20 |
+
# Suppress the FutureWarning for cleaner output (optional)
|
21 |
+
warnings.filterwarnings(
|
22 |
+
"ignore",
|
23 |
+
category=FutureWarning,
|
24 |
+
message="Both `num_logits_to_keep` and `logits_to_keep` are set"
|
25 |
+
)
|
26 |
+
|
27 |
# JavaScript for theme refresh
|
28 |
js_func = """
|
29 |
function refresh() {
|
|
|
56 |
5. Final Output: Single JSON object
|
57 |
"""
|
58 |
|
59 |
+
# Model loading functions
|
60 |
def load_model(model_name):
|
61 |
if model_name == "dots.ocr":
|
62 |
model_id = "rednote-hilab/dots.ocr"
|
|
|
86 |
raise ValueError(f"Unknown model: {model_name}")
|
87 |
return model, processor
|
88 |
|
89 |
+
# Inference functions
|
90 |
def inference_dots_ocr(model, processor, image, prompt, max_new_tokens):
|
91 |
messages = [
|
92 |
{
|
|
|
111 |
generated_ids = model.generate(
|
112 |
**inputs,
|
113 |
max_new_tokens=max_new_tokens,
|
114 |
+
do_sample=False # Temperature removed previously to fix another warning
|
115 |
)
|
116 |
generated_ids_trimmed = [out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
|
117 |
output_text = processor.batch_decode(
|
|
|
299 |
markdown_lines.append(f"**Table:** {text}\n")
|
300 |
elif category == 'Formula':
|
301 |
if text.strip().startswith('$') or '\\' in text:
|
302 |
+
markdown_lines.append(f"$$ \n{text}\n $$\n") # Fixed formatting, removed extra spaces
|
303 |
else:
|
304 |
markdown_lines.append(f"**Formula:** {text}\n")
|
305 |
elif category == 'Caption':
|
|
|
503 |
max_pixels = gr.Number(value=MAX_PIXELS, label="Max Pixels")
|
504 |
process_btn = gr.Button("🔥 Process Document", variant="primary", elem_classes=["process-button"], size="lg")
|
505 |
clear_btn = gr.Button("Clear Document", variant="secondary")
|
|
|
506 |
with gr.Column(scale=2):
|
507 |
with gr.Tabs():
|
508 |
with gr.Tab("✦︎ Processed Image"):
|
|
|
511 |
markdown_output = gr.Markdown(value="Click 'Process Document' to see extracted content...", height=500)
|
512 |
with gr.Tab("⏲ Layout JSON"):
|
513 |
json_output = gr.JSON(label="Layout Analysis Results", value=None)
|
|
|
514 |
with gr.Row():
|
515 |
examples = gr.Examples(
|
516 |
examples=["examples/sample_image1.png", "examples/sample_image2.png", "examples/sample_pdf.pdf"],
|
517 |
inputs=file_input,
|
518 |
label="Example Documents"
|
519 |
+
)
|
|
|
520 |
def handle_file_upload(file_path):
|
521 |
image, page_info = load_file_for_preview(file_path)
|
522 |
return image, page_info
|