taprosoft commited on
Commit
7e20950
·
1 Parent(s): 7e604f0

fix: minor update backend

Browse files
Files changed (3) hide show
  1. .gitignore +1 -0
  2. backends/mineru.py +2 -2
  3. backends/unstructured.py +0 -1
.gitignore CHANGED
@@ -465,3 +465,4 @@ S.gpg-agent*
465
  .vscode/settings.json
466
  examples/example1/assets
467
  storage/*
 
 
465
  .vscode/settings.json
466
  examples/example1/assets
467
  storage/*
468
+ debug_data/*
backends/mineru.py CHANGED
@@ -41,7 +41,7 @@ def do_process_mineru(input_path, output_dir):
41
 
42
  pdf_data = read_fn(input_path)
43
  parse_method = "auto"
44
- local_image_dir, local_md_dir = prepare_env(output_dir, file_name, parse_method)
45
  do_parse(
46
  output_dir,
47
  file_name,
@@ -51,7 +51,7 @@ def do_process_mineru(input_path, output_dir):
51
  debug_able=False,
52
  f_dump_orig_pdf=False,
53
  f_draw_layout_bbox=ENABLE_DEBUG_MODE,
54
- f_draw_char_bbox=ENABLE_DEBUG_MODE,
55
  formula_enable=False,
56
  table_enable=True,
57
  )
 
41
 
42
  pdf_data = read_fn(input_path)
43
  parse_method = "auto"
44
+ _, local_md_dir = prepare_env(output_dir, file_name, parse_method)
45
  do_parse(
46
  output_dir,
47
  file_name,
 
51
  debug_able=False,
52
  f_dump_orig_pdf=False,
53
  f_draw_layout_bbox=ENABLE_DEBUG_MODE,
54
+ f_draw_char_bbox=False,
55
  formula_enable=False,
56
  table_enable=True,
57
  )
backends/unstructured.py CHANGED
@@ -58,7 +58,6 @@ def convert_unstructured(path: str, file_name: str):
58
  # mandatory to use ``hi_res`` strategy
59
  strategy="hi_res",
60
  infer_table_structure=True,
61
- # extract_images_in_pdf=True,
62
  extract_image_block_types=["Image", "Table"],
63
  extract_image_block_to_payload=True,
64
  analysis=ENABLE_DEBUG_MODE,
 
58
  # mandatory to use ``hi_res`` strategy
59
  strategy="hi_res",
60
  infer_table_structure=True,
 
61
  extract_image_block_types=["Image", "Table"],
62
  extract_image_block_to_payload=True,
63
  analysis=ENABLE_DEBUG_MODE,