prithivMLmods commited on
Commit
f8f5f34
·
verified ·
1 Parent(s): e9d3211

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -8
app.py CHANGED
@@ -6,6 +6,7 @@ import traceback
6
  from io import BytesIO
7
  from typing import Any, Dict, List, Optional, Tuple
8
  import re
 
9
 
10
  import fitz # PyMuPDF
11
  import gradio as gr
@@ -16,6 +17,13 @@ from transformers import AutoModelForCausalLM, AutoProcessor, VisionEncoderDecod
16
  from huggingface_hub import snapshot_download
17
  from qwen_vl_utils import process_vision_info
18
 
 
 
 
 
 
 
 
19
  # JavaScript for theme refresh
20
  js_func = """
21
  function refresh() {
@@ -48,7 +56,7 @@ prompt = """Please output the layout information from the PDF image, including e
48
  5. Final Output: Single JSON object
49
  """
50
 
51
- # Model loading functions (from model.py)
52
  def load_model(model_name):
53
  if model_name == "dots.ocr":
54
  model_id = "rednote-hilab/dots.ocr"
@@ -78,7 +86,7 @@ def load_model(model_name):
78
  raise ValueError(f"Unknown model: {model_name}")
79
  return model, processor
80
 
81
- # Inference functions (from model.py)
82
  def inference_dots_ocr(model, processor, image, prompt, max_new_tokens):
83
  messages = [
84
  {
@@ -103,7 +111,7 @@ def inference_dots_ocr(model, processor, image, prompt, max_new_tokens):
103
  generated_ids = model.generate(
104
  **inputs,
105
  max_new_tokens=max_new_tokens,
106
- do_sample=False # Removed temperature=0.1 to fix the warning
107
  )
108
  generated_ids_trimmed = [out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
109
  output_text = processor.batch_decode(
@@ -291,7 +299,7 @@ def layoutjson2md(image: Image.Image, layout_data: List[Dict], text_key: str = '
291
  markdown_lines.append(f"**Table:** {text}\n")
292
  elif category == 'Formula':
293
  if text.strip().startswith('$') or '\\' in text:
294
- markdown_lines.append(f"$$ \n{text}\n $$\n")
295
  else:
296
  markdown_lines.append(f"**Formula:** {text}\n")
297
  elif category == 'Caption':
@@ -495,7 +503,6 @@ def create_gradio_interface():
495
  max_pixels = gr.Number(value=MAX_PIXELS, label="Max Pixels")
496
  process_btn = gr.Button("🔥 Process Document", variant="primary", elem_classes=["process-button"], size="lg")
497
  clear_btn = gr.Button("Clear Document", variant="secondary")
498
-
499
  with gr.Column(scale=2):
500
  with gr.Tabs():
501
  with gr.Tab("✦︎ Processed Image"):
@@ -504,14 +511,12 @@ def create_gradio_interface():
504
  markdown_output = gr.Markdown(value="Click 'Process Document' to see extracted content...", height=500)
505
  with gr.Tab("⏲ Layout JSON"):
506
  json_output = gr.JSON(label="Layout Analysis Results", value=None)
507
-
508
  with gr.Row():
509
  examples = gr.Examples(
510
  examples=["examples/sample_image1.png", "examples/sample_image2.png", "examples/sample_pdf.pdf"],
511
  inputs=file_input,
512
  label="Example Documents"
513
- )
514
-
515
  def handle_file_upload(file_path):
516
  image, page_info = load_file_for_preview(file_path)
517
  return image, page_info
 
6
  from io import BytesIO
7
  from typing import Any, Dict, List, Optional, Tuple
8
  import re
9
+ import warnings
10
 
11
  import fitz # PyMuPDF
12
  import gradio as gr
 
17
  from huggingface_hub import snapshot_download
18
  from qwen_vl_utils import process_vision_info
19
 
20
+ # Suppress the FutureWarning for cleaner output (optional)
21
+ warnings.filterwarnings(
22
+ "ignore",
23
+ category=FutureWarning,
24
+ message="Both `num_logits_to_keep` and `logits_to_keep` are set"
25
+ )
26
+
27
  # JavaScript for theme refresh
28
  js_func = """
29
  function refresh() {
 
56
  5. Final Output: Single JSON object
57
  """
58
 
59
+ # Model loading functions
60
  def load_model(model_name):
61
  if model_name == "dots.ocr":
62
  model_id = "rednote-hilab/dots.ocr"
 
86
  raise ValueError(f"Unknown model: {model_name}")
87
  return model, processor
88
 
89
+ # Inference functions
90
  def inference_dots_ocr(model, processor, image, prompt, max_new_tokens):
91
  messages = [
92
  {
 
111
  generated_ids = model.generate(
112
  **inputs,
113
  max_new_tokens=max_new_tokens,
114
+ do_sample=False # Temperature removed previously to fix another warning
115
  )
116
  generated_ids_trimmed = [out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
117
  output_text = processor.batch_decode(
 
299
  markdown_lines.append(f"**Table:** {text}\n")
300
  elif category == 'Formula':
301
  if text.strip().startswith('$') or '\\' in text:
302
+ markdown_lines.append(f"$$ \n{text}\n $$\n") # Fixed formatting, removed extra spaces
303
  else:
304
  markdown_lines.append(f"**Formula:** {text}\n")
305
  elif category == 'Caption':
 
503
  max_pixels = gr.Number(value=MAX_PIXELS, label="Max Pixels")
504
  process_btn = gr.Button("🔥 Process Document", variant="primary", elem_classes=["process-button"], size="lg")
505
  clear_btn = gr.Button("Clear Document", variant="secondary")
 
506
  with gr.Column(scale=2):
507
  with gr.Tabs():
508
  with gr.Tab("✦︎ Processed Image"):
 
511
  markdown_output = gr.Markdown(value="Click 'Process Document' to see extracted content...", height=500)
512
  with gr.Tab("⏲ Layout JSON"):
513
  json_output = gr.JSON(label="Layout Analysis Results", value=None)
 
514
  with gr.Row():
515
  examples = gr.Examples(
516
  examples=["examples/sample_image1.png", "examples/sample_image2.png", "examples/sample_pdf.pdf"],
517
  inputs=file_input,
518
  label="Example Documents"
519
+ )
 
520
  def handle_file_upload(file_path):
521
  image, page_info = load_file_for_preview(file_path)
522
  return image, page_info