Debug: deletion of last unstructured
Browse files
country_by_country/table_extraction/__init__.py
CHANGED
@@ -27,8 +27,6 @@ import sys
|
|
27 |
from .camelot_extractor import Camelot
|
28 |
from .from_csv import FromCSV
|
29 |
from .llama_parse_extractor import LlamaParseExtractor
|
30 |
-
from .unstructured import Unstructured
|
31 |
-
from .unstructured_api import UnstructuredAPI
|
32 |
|
33 |
logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(message)s")
|
34 |
|
@@ -42,10 +40,6 @@ def from_config(config: dict) -> Camelot:
|
|
42 |
return Camelot(**extractor_params)
|
43 |
elif extractor_type == "FromCSV":
|
44 |
return FromCSV(**extractor_params)
|
45 |
-
elif extractor_type == "Unstructured":
|
46 |
-
return Unstructured(**extractor_params)
|
47 |
-
elif extractor_type == "UnstructuredAPI":
|
48 |
-
return UnstructuredAPI(**extractor_params)
|
49 |
elif extractor_type == "LlamaParse":
|
50 |
return LlamaParseExtractor(**extractor_params)
|
51 |
elif extractor_type == "ExtractTableAPI":
|
|
|
27 |
from .camelot_extractor import Camelot
|
28 |
from .from_csv import FromCSV
|
29 |
from .llama_parse_extractor import LlamaParseExtractor
|
|
|
|
|
30 |
|
31 |
logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(message)s")
|
32 |
|
|
|
40 |
return Camelot(**extractor_params)
|
41 |
elif extractor_type == "FromCSV":
|
42 |
return FromCSV(**extractor_params)
|
|
|
|
|
|
|
|
|
43 |
elif extractor_type == "LlamaParse":
|
44 |
return LlamaParseExtractor(**extractor_params)
|
45 |
elif extractor_type == "ExtractTableAPI":
|
extract_config.yaml
CHANGED
@@ -5,7 +5,3 @@ pagefilter:
|
|
5 |
|
6 |
table_extraction:
|
7 |
- type: LlamaParse
|
8 |
-
- type: Unstructured
|
9 |
-
params:
|
10 |
-
hi_res_model_name: "yolox"
|
11 |
-
pdf_image_dpi: 300
|
|
|
5 |
|
6 |
table_extraction:
|
7 |
- type: LlamaParse
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
@@ -5,12 +5,9 @@ camelot-py
|
|
5 |
opencv-python-headless
|
6 |
ghostscript
|
7 |
pypdf
|
8 |
-
unstructured
|
9 |
pdf2image
|
10 |
-
unstructured-inference
|
11 |
pytesseract
|
12 |
pikepdf
|
13 |
-
unstructured-pytesseract
|
14 |
joblib
|
15 |
llama-parse
|
16 |
python-dotenv
|
|
|
5 |
opencv-python-headless
|
6 |
ghostscript
|
7 |
pypdf
|
|
|
8 |
pdf2image
|
|
|
9 |
pytesseract
|
10 |
pikepdf
|
|
|
11 |
joblib
|
12 |
llama-parse
|
13 |
python-dotenv
|