taprosoft commited on
Commit
7e604f0
·
1 Parent(s): 703dc2e

docs: update README

Browse files
Files changed (3) hide show
  1. README.md +1 -0
  2. app.py +1 -1
  3. utils.py +1 -0
README.md CHANGED
@@ -5,6 +5,7 @@ colorFrom: blue
5
  colorTo: green
6
  sdk: docker
7
  pinned: false
 
8
  short_description: Convert PDFs to Markdown with open-source parsers
9
  ---
10
 
 
5
  colorTo: green
6
  sdk: docker
7
  pinned: false
8
+ header: mini
9
  short_description: Convert PDFs to Markdown with open-source parsers
10
  ---
11
 
app.py CHANGED
@@ -79,7 +79,7 @@ latex_delimiters = [
79
  # startup test (also for loading models the first time)
80
  start_startup = time.time()
81
  WARMUP_PDF_PATH = "table.pdf"
82
- SUPPORTED_METHODS = ["Docling", "Marker", "Unstructured", "MinerU", "PyMuPDF"]
83
 
84
  print("Warm-up sequence")
85
  for method in SUPPORTED_METHODS:
 
79
  # startup test (also for loading models the first time)
80
  start_startup = time.time()
81
  WARMUP_PDF_PATH = "table.pdf"
82
+ SUPPORTED_METHODS = ["PyMuPDF", "Docling", "Marker", "MinerU", "Unstructured"]
83
 
84
  print("Warm-up sequence")
85
  for method in SUPPORTED_METHODS:
utils.py CHANGED
@@ -57,6 +57,7 @@ def prepare_env_mineru():
57
 
58
  # download nltk data
59
  nltk.download("punkt_tab")
 
60
 
61
  # download models
62
  os.system(
 
57
 
58
  # download nltk data
59
  nltk.download("punkt_tab")
60
+ nltk.download("averaged_perceptron_tagger_eng")
61
 
62
  # download models
63
  os.system(