taprosoft
commited on
Commit
·
44f7c24
1
Parent(s):
7aa7cc7
fix: update methods
Browse files- app.py +4 -9
- backends/__init__.py +0 -6
- requirements.txt +1 -1
app.py
CHANGED
@@ -11,11 +11,10 @@ from gradio_pdf import PDF
|
|
11 |
from backends import ( # convert_zerox,
|
12 |
SUPPORTED_METHODS,
|
13 |
SUPPORTED_METHODS_METADATA,
|
14 |
-
convert_gemini,
|
15 |
convert_gmft,
|
16 |
-
convert_img2table,
|
17 |
convert_pypdf,
|
18 |
convert_smoldocling,
|
|
|
19 |
)
|
20 |
from backends.settings import ENABLE_DEBUG_MODE
|
21 |
from utils import remove_images_from_markdown, trim_pages
|
@@ -53,10 +52,8 @@ def convert_document(path, method, start_page=0, enabled=True):
|
|
53 |
path,
|
54 |
embed_images=True,
|
55 |
)
|
56 |
-
elif method == "
|
57 |
-
text, debug_image_paths =
|
58 |
-
elif method == "Img2Table (table-only)":
|
59 |
-
text, debug_image_paths = convert_img2table(path, file_name)
|
60 |
elif method == "GMFT (table-only)":
|
61 |
text, debug_image_paths = convert_gmft(path, file_name)
|
62 |
elif method == "PyPDF":
|
@@ -183,9 +180,7 @@ with gr.Blocks(
|
|
183 |
with gr.Row():
|
184 |
methods = gr.Dropdown(
|
185 |
SUPPORTED_METHODS,
|
186 |
-
label=(
|
187 |
-
"Conversion methods " f"(select up-to {MAX_SELECTED_METHODS})"
|
188 |
-
),
|
189 |
value=SUPPORTED_METHODS[:2],
|
190 |
multiselect=True,
|
191 |
)
|
|
|
11 |
from backends import ( # convert_zerox,
|
12 |
SUPPORTED_METHODS,
|
13 |
SUPPORTED_METHODS_METADATA,
|
|
|
14 |
convert_gmft,
|
|
|
15 |
convert_pypdf,
|
16 |
convert_smoldocling,
|
17 |
+
convert_unstructured,
|
18 |
)
|
19 |
from backends.settings import ENABLE_DEBUG_MODE
|
20 |
from utils import remove_images_from_markdown, trim_pages
|
|
|
52 |
path,
|
53 |
embed_images=True,
|
54 |
)
|
55 |
+
elif method == "Unstructured":
|
56 |
+
text, debug_image_paths = convert_unstructured(path, file_name)
|
|
|
|
|
57 |
elif method == "GMFT (table-only)":
|
58 |
text, debug_image_paths = convert_gmft(path, file_name)
|
59 |
elif method == "PyPDF":
|
|
|
180 |
with gr.Row():
|
181 |
methods = gr.Dropdown(
|
182 |
SUPPORTED_METHODS,
|
183 |
+
label=("Conversion methods"),
|
|
|
|
|
184 |
value=SUPPORTED_METHODS[:2],
|
185 |
multiselect=True,
|
186 |
)
|
backends/__init__.py
CHANGED
@@ -1,7 +1,5 @@
|
|
1 |
# flake8: noqa
|
2 |
-
from .gemini import convert_gemini
|
3 |
from .gmft import convert_gmft
|
4 |
-
from .img2table import convert_img2table
|
5 |
from .pypdf import convert_pypdf
|
6 |
from .smoldocling import convert_smoldocling
|
7 |
from .unstructured import convert_unstructured
|
@@ -9,8 +7,6 @@ from .unstructured import convert_unstructured
|
|
9 |
__all__ = [
|
10 |
"convert_smoldocling",
|
11 |
"convert_unstructured",
|
12 |
-
"convert_gemini",
|
13 |
-
"convert_img2table",
|
14 |
"convert_gmft",
|
15 |
"convert_pypdf",
|
16 |
]
|
@@ -19,9 +15,7 @@ SUPPORTED_METHODS = [
|
|
19 |
"SmolDocling",
|
20 |
"PyMuPDF",
|
21 |
"PyPDF",
|
22 |
-
"Gemini (API)",
|
23 |
"Unstructured",
|
24 |
-
"Img2Table (table-only)",
|
25 |
"GMFT (table-only)",
|
26 |
]
|
27 |
SUPPORTED_METHODS_METADATA = {
|
|
|
1 |
# flake8: noqa
|
|
|
2 |
from .gmft import convert_gmft
|
|
|
3 |
from .pypdf import convert_pypdf
|
4 |
from .smoldocling import convert_smoldocling
|
5 |
from .unstructured import convert_unstructured
|
|
|
7 |
__all__ = [
|
8 |
"convert_smoldocling",
|
9 |
"convert_unstructured",
|
|
|
|
|
10 |
"convert_gmft",
|
11 |
"convert_pypdf",
|
12 |
]
|
|
|
15 |
"SmolDocling",
|
16 |
"PyMuPDF",
|
17 |
"PyPDF",
|
|
|
18 |
"Unstructured",
|
|
|
19 |
"GMFT (table-only)",
|
20 |
]
|
21 |
SUPPORTED_METHODS_METADATA = {
|
requirements.txt
CHANGED
@@ -10,4 +10,4 @@ transformers<5.0.0,>=4.45.2
|
|
10 |
pypdf
|
11 |
docling_core
|
12 |
opencv-contrib-python
|
13 |
-
|
|
|
10 |
pypdf
|
11 |
docling_core
|
12 |
opencv-contrib-python
|
13 |
+
flash_attn
|