Commit
·
3d18b9d
1
Parent(s):
6635049
Fix to config file reference
Browse files
tools/find_duplicate_tabular.py
CHANGED
@@ -11,7 +11,7 @@ from pathlib import Path
|
|
11 |
from tools.helper_functions import OUTPUT_FOLDER, read_file
|
12 |
from tools.data_anonymise import initial_clean
|
13 |
from tools.load_spacy_model_custom_recognisers import nlp
|
14 |
-
from tools.config import DO_INITIAL_TABULAR_DATA_CLEAN, REMOVE_DUPLICATE_ROWS, MAX_SIMULTANEOUS_FILES,
|
15 |
|
16 |
if REMOVE_DUPLICATE_ROWS == "True": REMOVE_DUPLICATE_ROWS = True
|
17 |
else: REMOVE_DUPLICATE_ROWS = False
|
@@ -140,8 +140,8 @@ def find_duplicate_cells_in_tabular_data(
|
|
140 |
# If sheet was successfully_loaded
|
141 |
if not temp_df.empty:
|
142 |
|
143 |
-
if temp_df.shape[0] >
|
144 |
-
out_message = f"Number of rows in {file_path} for sheet {sheet_name} is greater than {
|
145 |
print(out_message)
|
146 |
raise Exception(out_message)
|
147 |
|
@@ -161,8 +161,8 @@ def find_duplicate_cells_in_tabular_data(
|
|
161 |
else:
|
162 |
temp_df = read_file(file_path)
|
163 |
|
164 |
-
if temp_df.shape[0] >
|
165 |
-
out_message = f"Number of rows in {file_path} is greater than {
|
166 |
print(out_message)
|
167 |
raise Exception(out_message)
|
168 |
|
|
|
11 |
from tools.helper_functions import OUTPUT_FOLDER, read_file
|
12 |
from tools.data_anonymise import initial_clean
|
13 |
from tools.load_spacy_model_custom_recognisers import nlp
|
14 |
+
from tools.config import DO_INITIAL_TABULAR_DATA_CLEAN, REMOVE_DUPLICATE_ROWS, MAX_SIMULTANEOUS_FILES, MAX_TABLE_ROWS
|
15 |
|
16 |
if REMOVE_DUPLICATE_ROWS == "True": REMOVE_DUPLICATE_ROWS = True
|
17 |
else: REMOVE_DUPLICATE_ROWS = False
|
|
|
140 |
# If sheet was successfully_loaded
|
141 |
if not temp_df.empty:
|
142 |
|
143 |
+
if temp_df.shape[0] > MAX_TABLE_ROWS:
|
144 |
+
out_message = f"Number of rows in {file_path} for sheet {sheet_name} is greater than {MAX_TABLE_ROWS}. Please submit a smaller file."
|
145 |
print(out_message)
|
146 |
raise Exception(out_message)
|
147 |
|
|
|
161 |
else:
|
162 |
temp_df = read_file(file_path)
|
163 |
|
164 |
+
if temp_df.shape[0] > MAX_TABLE_ROWS:
|
165 |
+
out_message = f"Number of rows in {file_path} is greater than {MAX_TABLE_ROWS}. Please submit a smaller file."
|
166 |
print(out_message)
|
167 |
raise Exception(out_message)
|
168 |
|