Commit
·
47a3a80
1
Parent(s):
f93e49c
Minor changes for cost codes, package updates. Added pyproject.toml file
Browse files- app.py +1 -1
- pyproject.toml +57 -0
- requirements.txt +2 -2
- tools/config.py +4 -3
app.py
CHANGED
@@ -719,7 +719,7 @@ with app:
|
|
719 |
print("Downloading cost codes from S3")
|
720 |
app.load(download_file_from_s3, inputs=[s3_default_bucket, s3_default_cost_codes_file, default_cost_codes_output_folder_location]).\
|
721 |
success(load_in_default_cost_codes, inputs = [default_cost_codes_output_folder_location, default_cost_code_textbox], outputs=[cost_code_dataframe, cost_code_dataframe_base, cost_code_choice_drop])
|
722 |
-
print("Successfully loaded cost
|
723 |
elif os.path.exists(COST_CODES_PATH):
|
724 |
print("Loading cost codes from default cost codes path location:", COST_CODES_PATH)
|
725 |
app.load(load_in_default_cost_codes, inputs = [default_cost_codes_output_folder_location, default_cost_code_textbox], outputs=[cost_code_dataframe, cost_code_dataframe_base, cost_code_choice_drop])
|
|
|
719 |
print("Downloading cost codes from S3")
|
720 |
app.load(download_file_from_s3, inputs=[s3_default_bucket, s3_default_cost_codes_file, default_cost_codes_output_folder_location]).\
|
721 |
success(load_in_default_cost_codes, inputs = [default_cost_codes_output_folder_location, default_cost_code_textbox], outputs=[cost_code_dataframe, cost_code_dataframe_base, cost_code_choice_drop])
|
722 |
+
print("Successfully loaded cost codes from S3")
|
723 |
elif os.path.exists(COST_CODES_PATH):
|
724 |
print("Loading cost codes from default cost codes path location:", COST_CODES_PATH)
|
725 |
app.load(load_in_default_cost_codes, inputs = [default_cost_codes_output_folder_location, default_cost_code_textbox], outputs=[cost_code_dataframe, cost_code_dataframe_base, cost_code_choice_drop])
|
pyproject.toml
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[build-system]
|
2 |
+
requires = ["setuptools>=61.0", "wheel"]
|
3 |
+
build-backend = "setuptools.build_meta"
|
4 |
+
|
5 |
+
[project]
|
6 |
+
name = "doc_redaction" # Your application's name
|
7 |
+
version = "0.6.0" # Your application's current version
|
8 |
+
description = "Redact PDF/image-based documents, or CSV/XLSX files using a Gradio-based GUI interface" # A short description
|
9 |
+
readme = "README.md" # Path to your project's README file
|
10 |
+
requires-python = ">=3.10" # The minimum Python version required
|
11 |
+
|
12 |
+
dependencies = [
|
13 |
+
"pdfminer.six==20240706",
|
14 |
+
"pdf2image==1.17.0",
|
15 |
+
"pymupdf==1.25.3",
|
16 |
+
"opencv-python==4.10.0.84",
|
17 |
+
"presidio_analyzer==2.2.358",
|
18 |
+
"presidio_anonymizer==2.2.358",
|
19 |
+
"presidio-image-redactor==0.0.56",
|
20 |
+
"pikepdf==9.5.2",
|
21 |
+
"pandas==2.2.3",
|
22 |
+
"scikit-learn==1.6.1",
|
23 |
+
"spacy==3.8.4",
|
24 |
+
# Direct URL dependency for spacy model
|
25 |
+
"en_core_web_lg @ https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.8.0/en_core_web_lg-3.8.0.tar.gz",
|
26 |
+
"gradio==5.27.1",
|
27 |
+
"boto3==1.38.4",
|
28 |
+
"pyarrow==19.0.1",
|
29 |
+
"openpyxl==3.1.5",
|
30 |
+
"Faker==36.1.1",
|
31 |
+
"python-levenshtein==0.26.1",
|
32 |
+
"spaczz==0.6.1",
|
33 |
+
# Direct URL dependency for gradio_image_annotator wheel
|
34 |
+
"gradio_image_annotation @ https://github.com/seanpedrick-case/gradio_image_annotator/releases/download/v0.3.2/gradio_image_annotation-0.3.2-py3-none-any.whl",
|
35 |
+
"rapidfuzz==3.12.1",
|
36 |
+
"python-dotenv==1.0.1",
|
37 |
+
"numpy==1.26.4",
|
38 |
+
"awslambdaric==3.0.1"
|
39 |
+
]
|
40 |
+
|
41 |
+
[project.urls]
|
42 |
+
Homepage = "https://seanpedrick-case.github.io/doc_redaction/README.html"
|
43 |
+
repository = "https://github.com/seanpedrick-case/doc_redaction"
|
44 |
+
|
45 |
+
[project.optional-dependencies]
|
46 |
+
dev = ["pytest"]
|
47 |
+
|
48 |
+
# Optional: You can add configuration for tools used in your project under the [tool] section
|
49 |
+
# For example, configuration for a linter like Ruff:
|
50 |
+
[tool.ruff]
|
51 |
+
line-length = 88
|
52 |
+
select = ["E", "F", "I"]
|
53 |
+
|
54 |
+
# Optional: Configuration for a formatter like Black:
|
55 |
+
[tool.black]
|
56 |
+
line-length = 88
|
57 |
+
target-version = ['py310']
|
requirements.txt
CHANGED
@@ -10,8 +10,8 @@ pandas==2.2.3
|
|
10 |
scikit-learn==1.6.1
|
11 |
spacy==3.8.4
|
12 |
en_core_web_lg @ https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.8.0/en_core_web_lg-3.8.0.tar.gz
|
13 |
-
gradio==5.
|
14 |
-
boto3==1.
|
15 |
pyarrow==19.0.1
|
16 |
openpyxl==3.1.5
|
17 |
Faker==36.1.1
|
|
|
10 |
scikit-learn==1.6.1
|
11 |
spacy==3.8.4
|
12 |
en_core_web_lg @ https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.8.0/en_core_web_lg-3.8.0.tar.gz
|
13 |
+
gradio==5.27.1
|
14 |
+
boto3==1.38.4
|
15 |
pyarrow==19.0.1
|
16 |
openpyxl==3.1.5
|
17 |
Faker==36.1.1
|
tools/config.py
CHANGED
@@ -255,10 +255,11 @@ DEFAULT_COST_CODE = get_or_create_env_var('DEFAULT_COST_CODE', '')
|
|
255 |
|
256 |
COST_CODES_PATH = get_or_create_env_var('COST_CODES_PATH', '') # 'config/COST_CENTRES.csv' # file should be a csv file with a single table in it that has two columns with a header. First column should contain cost codes, second column should contain a name or description for the cost code
|
257 |
|
258 |
-
S3_COST_CODES_PATH = get_or_create_env_var('S3_COST_CODES_PATH', '') # COST_CENTRES.csv # This is a path within the DOCUMENT_REDACTION_BUCKET
|
259 |
-
|
|
|
260 |
if COST_CODES_PATH: OUTPUT_COST_CODES_PATH = COST_CODES_PATH
|
261 |
-
else: OUTPUT_COST_CODES_PATH = ''
|
262 |
|
263 |
ENFORCE_COST_CODES = get_or_create_env_var('ENFORCE_COST_CODES', 'False') # If you have cost codes listed, is it compulsory to choose one before redacting?
|
264 |
|
|
|
255 |
|
256 |
COST_CODES_PATH = get_or_create_env_var('COST_CODES_PATH', '') # 'config/COST_CENTRES.csv' # file should be a csv file with a single table in it that has two columns with a header. First column should contain cost codes, second column should contain a name or description for the cost code
|
257 |
|
258 |
+
S3_COST_CODES_PATH = get_or_create_env_var('S3_COST_CODES_PATH', '') # COST_CENTRES.csv # This is a path within the DOCUMENT_REDACTION_BUCKET
|
259 |
+
|
260 |
+
# A default path in case s3 cost code location is provided but no local cost code location given
|
261 |
if COST_CODES_PATH: OUTPUT_COST_CODES_PATH = COST_CODES_PATH
|
262 |
+
else: OUTPUT_COST_CODES_PATH = 'config/cost_codes.csv'
|
263 |
|
264 |
ENFORCE_COST_CODES = get_or_create_env_var('ENFORCE_COST_CODES', 'False') # If you have cost codes listed, is it compulsory to choose one before redacting?
|
265 |
|