Commit
·
47a3a80
1
Parent(s):
f93e49c
Minor changes for cost codes, package updates. Added pyproject.toml file
Browse files- app.py +1 -1
- pyproject.toml +57 -0
- requirements.txt +2 -2
- tools/config.py +4 -3
app.py
CHANGED
|
@@ -719,7 +719,7 @@ with app:
|
|
| 719 |
print("Downloading cost codes from S3")
|
| 720 |
app.load(download_file_from_s3, inputs=[s3_default_bucket, s3_default_cost_codes_file, default_cost_codes_output_folder_location]).\
|
| 721 |
success(load_in_default_cost_codes, inputs = [default_cost_codes_output_folder_location, default_cost_code_textbox], outputs=[cost_code_dataframe, cost_code_dataframe_base, cost_code_choice_drop])
|
| 722 |
-
print("Successfully loaded cost
|
| 723 |
elif os.path.exists(COST_CODES_PATH):
|
| 724 |
print("Loading cost codes from default cost codes path location:", COST_CODES_PATH)
|
| 725 |
app.load(load_in_default_cost_codes, inputs = [default_cost_codes_output_folder_location, default_cost_code_textbox], outputs=[cost_code_dataframe, cost_code_dataframe_base, cost_code_choice_drop])
|
|
|
|
| 719 |
print("Downloading cost codes from S3")
|
| 720 |
app.load(download_file_from_s3, inputs=[s3_default_bucket, s3_default_cost_codes_file, default_cost_codes_output_folder_location]).\
|
| 721 |
success(load_in_default_cost_codes, inputs = [default_cost_codes_output_folder_location, default_cost_code_textbox], outputs=[cost_code_dataframe, cost_code_dataframe_base, cost_code_choice_drop])
|
| 722 |
+
print("Successfully loaded cost codes from S3")
|
| 723 |
elif os.path.exists(COST_CODES_PATH):
|
| 724 |
print("Loading cost codes from default cost codes path location:", COST_CODES_PATH)
|
| 725 |
app.load(load_in_default_cost_codes, inputs = [default_cost_codes_output_folder_location, default_cost_code_textbox], outputs=[cost_code_dataframe, cost_code_dataframe_base, cost_code_choice_drop])
|
pyproject.toml
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[build-system]
|
| 2 |
+
requires = ["setuptools>=61.0", "wheel"]
|
| 3 |
+
build-backend = "setuptools.build_meta"
|
| 4 |
+
|
| 5 |
+
[project]
|
| 6 |
+
name = "doc_redaction" # Your application's name
|
| 7 |
+
version = "0.6.0" # Your application's current version
|
| 8 |
+
description = "Redact PDF/image-based documents, or CSV/XLSX files using a Gradio-based GUI interface" # A short description
|
| 9 |
+
readme = "README.md" # Path to your project's README file
|
| 10 |
+
requires-python = ">=3.10" # The minimum Python version required
|
| 11 |
+
|
| 12 |
+
dependencies = [
|
| 13 |
+
"pdfminer.six==20240706",
|
| 14 |
+
"pdf2image==1.17.0",
|
| 15 |
+
"pymupdf==1.25.3",
|
| 16 |
+
"opencv-python==4.10.0.84",
|
| 17 |
+
"presidio_analyzer==2.2.358",
|
| 18 |
+
"presidio_anonymizer==2.2.358",
|
| 19 |
+
"presidio-image-redactor==0.0.56",
|
| 20 |
+
"pikepdf==9.5.2",
|
| 21 |
+
"pandas==2.2.3",
|
| 22 |
+
"scikit-learn==1.6.1",
|
| 23 |
+
"spacy==3.8.4",
|
| 24 |
+
# Direct URL dependency for spacy model
|
| 25 |
+
"en_core_web_lg @ https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.8.0/en_core_web_lg-3.8.0.tar.gz",
|
| 26 |
+
"gradio==5.27.1",
|
| 27 |
+
"boto3==1.38.4",
|
| 28 |
+
"pyarrow==19.0.1",
|
| 29 |
+
"openpyxl==3.1.5",
|
| 30 |
+
"Faker==36.1.1",
|
| 31 |
+
"python-levenshtein==0.26.1",
|
| 32 |
+
"spaczz==0.6.1",
|
| 33 |
+
# Direct URL dependency for gradio_image_annotator wheel
|
| 34 |
+
"gradio_image_annotation @ https://github.com/seanpedrick-case/gradio_image_annotator/releases/download/v0.3.2/gradio_image_annotation-0.3.2-py3-none-any.whl",
|
| 35 |
+
"rapidfuzz==3.12.1",
|
| 36 |
+
"python-dotenv==1.0.1",
|
| 37 |
+
"numpy==1.26.4",
|
| 38 |
+
"awslambdaric==3.0.1"
|
| 39 |
+
]
|
| 40 |
+
|
| 41 |
+
[project.urls]
|
| 42 |
+
Homepage = "https://seanpedrick-case.github.io/doc_redaction/README.html"
|
| 43 |
+
repository = "https://github.com/seanpedrick-case/doc_redaction"
|
| 44 |
+
|
| 45 |
+
[project.optional-dependencies]
|
| 46 |
+
dev = ["pytest"]
|
| 47 |
+
|
| 48 |
+
# Optional: You can add configuration for tools used in your project under the [tool] section
|
| 49 |
+
# For example, configuration for a linter like Ruff:
|
| 50 |
+
[tool.ruff]
|
| 51 |
+
line-length = 88
|
| 52 |
+
select = ["E", "F", "I"]
|
| 53 |
+
|
| 54 |
+
# Optional: Configuration for a formatter like Black:
|
| 55 |
+
[tool.black]
|
| 56 |
+
line-length = 88
|
| 57 |
+
target-version = ['py310']
|
requirements.txt
CHANGED
|
@@ -10,8 +10,8 @@ pandas==2.2.3
|
|
| 10 |
scikit-learn==1.6.1
|
| 11 |
spacy==3.8.4
|
| 12 |
en_core_web_lg @ https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.8.0/en_core_web_lg-3.8.0.tar.gz
|
| 13 |
-
gradio==5.
|
| 14 |
-
boto3==1.
|
| 15 |
pyarrow==19.0.1
|
| 16 |
openpyxl==3.1.5
|
| 17 |
Faker==36.1.1
|
|
|
|
| 10 |
scikit-learn==1.6.1
|
| 11 |
spacy==3.8.4
|
| 12 |
en_core_web_lg @ https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.8.0/en_core_web_lg-3.8.0.tar.gz
|
| 13 |
+
gradio==5.27.1
|
| 14 |
+
boto3==1.38.4
|
| 15 |
pyarrow==19.0.1
|
| 16 |
openpyxl==3.1.5
|
| 17 |
Faker==36.1.1
|
tools/config.py
CHANGED
|
@@ -255,10 +255,11 @@ DEFAULT_COST_CODE = get_or_create_env_var('DEFAULT_COST_CODE', '')
|
|
| 255 |
|
| 256 |
COST_CODES_PATH = get_or_create_env_var('COST_CODES_PATH', '') # 'config/COST_CENTRES.csv' # file should be a csv file with a single table in it that has two columns with a header. First column should contain cost codes, second column should contain a name or description for the cost code
|
| 257 |
|
| 258 |
-
S3_COST_CODES_PATH = get_or_create_env_var('S3_COST_CODES_PATH', '') # COST_CENTRES.csv # This is a path within the DOCUMENT_REDACTION_BUCKET
|
| 259 |
-
|
|
|
|
| 260 |
if COST_CODES_PATH: OUTPUT_COST_CODES_PATH = COST_CODES_PATH
|
| 261 |
-
else: OUTPUT_COST_CODES_PATH = ''
|
| 262 |
|
| 263 |
ENFORCE_COST_CODES = get_or_create_env_var('ENFORCE_COST_CODES', 'False') # If you have cost codes listed, is it compulsory to choose one before redacting?
|
| 264 |
|
|
|
|
| 255 |
|
| 256 |
COST_CODES_PATH = get_or_create_env_var('COST_CODES_PATH', '') # 'config/COST_CENTRES.csv' # file should be a csv file with a single table in it that has two columns with a header. First column should contain cost codes, second column should contain a name or description for the cost code
|
| 257 |
|
| 258 |
+
S3_COST_CODES_PATH = get_or_create_env_var('S3_COST_CODES_PATH', '') # COST_CENTRES.csv # This is a path within the DOCUMENT_REDACTION_BUCKET
|
| 259 |
+
|
| 260 |
+
# A default path in case s3 cost code location is provided but no local cost code location given
|
| 261 |
if COST_CODES_PATH: OUTPUT_COST_CODES_PATH = COST_CODES_PATH
|
| 262 |
+
else: OUTPUT_COST_CODES_PATH = 'config/cost_codes.csv'
|
| 263 |
|
| 264 |
ENFORCE_COST_CODES = get_or_create_env_var('ENFORCE_COST_CODES', 'False') # If you have cost codes listed, is it compulsory to choose one before redacting?
|
| 265 |
|