ADucatez commited on
Commit
ea04da6
·
1 Parent(s): 2e8bfe7

Debug: deletion of last unstructured

Browse files
country_by_country/table_extraction/__init__.py CHANGED
@@ -27,8 +27,6 @@ import sys
27
  from .camelot_extractor import Camelot
28
  from .from_csv import FromCSV
29
  from .llama_parse_extractor import LlamaParseExtractor
30
- from .unstructured import Unstructured
31
- from .unstructured_api import UnstructuredAPI
32
 
33
  logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(message)s")
34
 
@@ -42,10 +40,6 @@ def from_config(config: dict) -> Camelot:
42
  return Camelot(**extractor_params)
43
  elif extractor_type == "FromCSV":
44
  return FromCSV(**extractor_params)
45
- elif extractor_type == "Unstructured":
46
- return Unstructured(**extractor_params)
47
- elif extractor_type == "UnstructuredAPI":
48
- return UnstructuredAPI(**extractor_params)
49
  elif extractor_type == "LlamaParse":
50
  return LlamaParseExtractor(**extractor_params)
51
  elif extractor_type == "ExtractTableAPI":
 
27
  from .camelot_extractor import Camelot
28
  from .from_csv import FromCSV
29
  from .llama_parse_extractor import LlamaParseExtractor
 
 
30
 
31
  logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(message)s")
32
 
 
40
  return Camelot(**extractor_params)
41
  elif extractor_type == "FromCSV":
42
  return FromCSV(**extractor_params)
 
 
 
 
43
  elif extractor_type == "LlamaParse":
44
  return LlamaParseExtractor(**extractor_params)
45
  elif extractor_type == "ExtractTableAPI":
extract_config.yaml CHANGED
@@ -5,7 +5,3 @@ pagefilter:
5
 
6
  table_extraction:
7
  - type: LlamaParse
8
- - type: Unstructured
9
- params:
10
- hi_res_model_name: "yolox"
11
- pdf_image_dpi: 300
 
5
 
6
  table_extraction:
7
  - type: LlamaParse
 
 
 
 
requirements.txt CHANGED
@@ -5,12 +5,9 @@ camelot-py
5
  opencv-python-headless
6
  ghostscript
7
  pypdf
8
- unstructured
9
  pdf2image
10
- unstructured-inference
11
  pytesseract
12
  pikepdf
13
- unstructured-pytesseract
14
  joblib
15
  llama-parse
16
  python-dotenv
 
5
  opencv-python-headless
6
  ghostscript
7
  pypdf
 
8
  pdf2image
 
9
  pytesseract
10
  pikepdf
 
11
  joblib
12
  llama-parse
13
  python-dotenv