taprosoft
commited on
Commit
·
7e20950
1
Parent(s):
7e604f0
fix: minor update backend
Browse files- .gitignore +1 -0
- backends/mineru.py +2 -2
- backends/unstructured.py +0 -1
.gitignore
CHANGED
|
@@ -465,3 +465,4 @@ S.gpg-agent*
|
|
| 465 |
.vscode/settings.json
|
| 466 |
examples/example1/assets
|
| 467 |
storage/*
|
|
|
|
|
|
| 465 |
.vscode/settings.json
|
| 466 |
examples/example1/assets
|
| 467 |
storage/*
|
| 468 |
+
debug_data/*
|
backends/mineru.py
CHANGED
|
@@ -41,7 +41,7 @@ def do_process_mineru(input_path, output_dir):
|
|
| 41 |
|
| 42 |
pdf_data = read_fn(input_path)
|
| 43 |
parse_method = "auto"
|
| 44 |
-
|
| 45 |
do_parse(
|
| 46 |
output_dir,
|
| 47 |
file_name,
|
|
@@ -51,7 +51,7 @@ def do_process_mineru(input_path, output_dir):
|
|
| 51 |
debug_able=False,
|
| 52 |
f_dump_orig_pdf=False,
|
| 53 |
f_draw_layout_bbox=ENABLE_DEBUG_MODE,
|
| 54 |
-
f_draw_char_bbox=
|
| 55 |
formula_enable=False,
|
| 56 |
table_enable=True,
|
| 57 |
)
|
|
|
|
| 41 |
|
| 42 |
pdf_data = read_fn(input_path)
|
| 43 |
parse_method = "auto"
|
| 44 |
+
_, local_md_dir = prepare_env(output_dir, file_name, parse_method)
|
| 45 |
do_parse(
|
| 46 |
output_dir,
|
| 47 |
file_name,
|
|
|
|
| 51 |
debug_able=False,
|
| 52 |
f_dump_orig_pdf=False,
|
| 53 |
f_draw_layout_bbox=ENABLE_DEBUG_MODE,
|
| 54 |
+
f_draw_char_bbox=False,
|
| 55 |
formula_enable=False,
|
| 56 |
table_enable=True,
|
| 57 |
)
|
backends/unstructured.py
CHANGED
|
@@ -58,7 +58,6 @@ def convert_unstructured(path: str, file_name: str):
|
|
| 58 |
# mandatory to use ``hi_res`` strategy
|
| 59 |
strategy="hi_res",
|
| 60 |
infer_table_structure=True,
|
| 61 |
-
# extract_images_in_pdf=True,
|
| 62 |
extract_image_block_types=["Image", "Table"],
|
| 63 |
extract_image_block_to_payload=True,
|
| 64 |
analysis=ENABLE_DEBUG_MODE,
|
|
|
|
| 58 |
# mandatory to use ``hi_res`` strategy
|
| 59 |
strategy="hi_res",
|
| 60 |
infer_table_structure=True,
|
|
|
|
| 61 |
extract_image_block_types=["Image", "Table"],
|
| 62 |
extract_image_block_to_payload=True,
|
| 63 |
analysis=ENABLE_DEBUG_MODE,
|