taprosoft
commited on
Commit
·
7e604f0
1
Parent(s):
703dc2e
docs: update README
Browse files
README.md
CHANGED
@@ -5,6 +5,7 @@ colorFrom: blue
|
|
5 |
colorTo: green
|
6 |
sdk: docker
|
7 |
pinned: false
|
|
|
8 |
short_description: Convert PDFs to Markdown with open-source parsers
|
9 |
---
|
10 |
|
|
|
5 |
colorTo: green
|
6 |
sdk: docker
|
7 |
pinned: false
|
8 |
+
header: mini
|
9 |
short_description: Convert PDFs to Markdown with open-source parsers
|
10 |
---
|
11 |
|
app.py
CHANGED
@@ -79,7 +79,7 @@ latex_delimiters = [
|
|
79 |
# startup test (also for loading models the first time)
|
80 |
start_startup = time.time()
|
81 |
WARMUP_PDF_PATH = "table.pdf"
|
82 |
-
SUPPORTED_METHODS = ["
|
83 |
|
84 |
print("Warm-up sequence")
|
85 |
for method in SUPPORTED_METHODS:
|
|
|
79 |
# startup test (also for loading models the first time)
|
80 |
start_startup = time.time()
|
81 |
WARMUP_PDF_PATH = "table.pdf"
|
82 |
+
SUPPORTED_METHODS = ["PyMuPDF", "Docling", "Marker", "MinerU", "Unstructured"]
|
83 |
|
84 |
print("Warm-up sequence")
|
85 |
for method in SUPPORTED_METHODS:
|
utils.py
CHANGED
@@ -57,6 +57,7 @@ def prepare_env_mineru():
|
|
57 |
|
58 |
# download nltk data
|
59 |
nltk.download("punkt_tab")
|
|
|
60 |
|
61 |
# download models
|
62 |
os.system(
|
|
|
57 |
|
58 |
# download nltk data
|
59 |
nltk.download("punkt_tab")
|
60 |
+
nltk.download("averaged_perceptron_tagger_eng")
|
61 |
|
62 |
# download models
|
63 |
os.system(
|