seanpedrickcase commited on
Commit
47a3a80
·
1 Parent(s): f93e49c

Minor changes for cost codes, package updates. Added pyproject.toml file

Browse files
Files changed (4) hide show
  1. app.py +1 -1
  2. pyproject.toml +57 -0
  3. requirements.txt +2 -2
  4. tools/config.py +4 -3
app.py CHANGED
@@ -719,7 +719,7 @@ with app:
719
  print("Downloading cost codes from S3")
720
  app.load(download_file_from_s3, inputs=[s3_default_bucket, s3_default_cost_codes_file, default_cost_codes_output_folder_location]).\
721
  success(load_in_default_cost_codes, inputs = [default_cost_codes_output_folder_location, default_cost_code_textbox], outputs=[cost_code_dataframe, cost_code_dataframe_base, cost_code_choice_drop])
722
- print("Successfully loaded cost codesc from S3")
723
  elif os.path.exists(COST_CODES_PATH):
724
  print("Loading cost codes from default cost codes path location:", COST_CODES_PATH)
725
  app.load(load_in_default_cost_codes, inputs = [default_cost_codes_output_folder_location, default_cost_code_textbox], outputs=[cost_code_dataframe, cost_code_dataframe_base, cost_code_choice_drop])
 
719
  print("Downloading cost codes from S3")
720
  app.load(download_file_from_s3, inputs=[s3_default_bucket, s3_default_cost_codes_file, default_cost_codes_output_folder_location]).\
721
  success(load_in_default_cost_codes, inputs = [default_cost_codes_output_folder_location, default_cost_code_textbox], outputs=[cost_code_dataframe, cost_code_dataframe_base, cost_code_choice_drop])
722
+ print("Successfully loaded cost codes from S3")
723
  elif os.path.exists(COST_CODES_PATH):
724
  print("Loading cost codes from default cost codes path location:", COST_CODES_PATH)
725
  app.load(load_in_default_cost_codes, inputs = [default_cost_codes_output_folder_location, default_cost_code_textbox], outputs=[cost_code_dataframe, cost_code_dataframe_base, cost_code_choice_drop])
pyproject.toml ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [build-system]
2
+ requires = ["setuptools>=61.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "doc_redaction" # Your application's name
7
+ version = "0.6.0" # Your application's current version
8
+ description = "Redact PDF/image-based documents, or CSV/XLSX files using a Gradio-based GUI interface" # A short description
9
+ readme = "README.md" # Path to your project's README file
10
+ requires-python = ">=3.10" # The minimum Python version required
11
+
12
+ dependencies = [
13
+ "pdfminer.six==20240706",
14
+ "pdf2image==1.17.0",
15
+ "pymupdf==1.25.3",
16
+ "opencv-python==4.10.0.84",
17
+ "presidio_analyzer==2.2.358",
18
+ "presidio_anonymizer==2.2.358",
19
+ "presidio-image-redactor==0.0.56",
20
+ "pikepdf==9.5.2",
21
+ "pandas==2.2.3",
22
+ "scikit-learn==1.6.1",
23
+ "spacy==3.8.4",
24
+ # Direct URL dependency for spacy model
25
+ "en_core_web_lg @ https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.8.0/en_core_web_lg-3.8.0.tar.gz",
26
+ "gradio==5.27.1",
27
+ "boto3==1.38.4",
28
+ "pyarrow==19.0.1",
29
+ "openpyxl==3.1.5",
30
+ "Faker==36.1.1",
31
+ "python-levenshtein==0.26.1",
32
+ "spaczz==0.6.1",
33
+ # Direct URL dependency for gradio_image_annotator wheel
34
+ "gradio_image_annotation @ https://github.com/seanpedrick-case/gradio_image_annotator/releases/download/v0.3.2/gradio_image_annotation-0.3.2-py3-none-any.whl",
35
+ "rapidfuzz==3.12.1",
36
+ "python-dotenv==1.0.1",
37
+ "numpy==1.26.4",
38
+ "awslambdaric==3.0.1"
39
+ ]
40
+
41
+ [project.urls]
42
+ Homepage = "https://seanpedrick-case.github.io/doc_redaction/README.html"
43
+ repository = "https://github.com/seanpedrick-case/doc_redaction"
44
+
45
+ [project.optional-dependencies]
46
+ dev = ["pytest"]
47
+
48
+ # Optional: You can add configuration for tools used in your project under the [tool] section
49
+ # For example, configuration for a linter like Ruff:
50
+ [tool.ruff]
51
+ line-length = 88
52
+ select = ["E", "F", "I"]
53
+
54
+ # Optional: Configuration for a formatter like Black:
55
+ [tool.black]
56
+ line-length = 88
57
+ target-version = ['py310']
requirements.txt CHANGED
@@ -10,8 +10,8 @@ pandas==2.2.3
10
  scikit-learn==1.6.1
11
  spacy==3.8.4
12
  en_core_web_lg @ https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.8.0/en_core_web_lg-3.8.0.tar.gz
13
- gradio==5.25.2
14
- boto3==1.37.29
15
  pyarrow==19.0.1
16
  openpyxl==3.1.5
17
  Faker==36.1.1
 
10
  scikit-learn==1.6.1
11
  spacy==3.8.4
12
  en_core_web_lg @ https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.8.0/en_core_web_lg-3.8.0.tar.gz
13
+ gradio==5.27.1
14
+ boto3==1.38.4
15
  pyarrow==19.0.1
16
  openpyxl==3.1.5
17
  Faker==36.1.1
tools/config.py CHANGED
@@ -255,10 +255,11 @@ DEFAULT_COST_CODE = get_or_create_env_var('DEFAULT_COST_CODE', '')
255
 
256
  COST_CODES_PATH = get_or_create_env_var('COST_CODES_PATH', '') # 'config/COST_CENTRES.csv' # file should be a csv file with a single table in it that has two columns with a header. First column should contain cost codes, second column should contain a name or description for the cost code
257
 
258
- S3_COST_CODES_PATH = get_or_create_env_var('S3_COST_CODES_PATH', '') # COST_CENTRES.csv # This is a path within the DOCUMENT_REDACTION_BUCKET
259
-
 
260
  if COST_CODES_PATH: OUTPUT_COST_CODES_PATH = COST_CODES_PATH
261
- else: OUTPUT_COST_CODES_PATH = ''
262
 
263
  ENFORCE_COST_CODES = get_or_create_env_var('ENFORCE_COST_CODES', 'False') # If you have cost codes listed, is it compulsory to choose one before redacting?
264
 
 
255
 
256
  COST_CODES_PATH = get_or_create_env_var('COST_CODES_PATH', '') # 'config/COST_CENTRES.csv' # file should be a csv file with a single table in it that has two columns with a header. First column should contain cost codes, second column should contain a name or description for the cost code
257
 
258
+ S3_COST_CODES_PATH = get_or_create_env_var('S3_COST_CODES_PATH', '') # COST_CENTRES.csv # This is a path within the DOCUMENT_REDACTION_BUCKET
259
+
260
+ # A default path in case s3 cost code location is provided but no local cost code location given
261
  if COST_CODES_PATH: OUTPUT_COST_CODES_PATH = COST_CODES_PATH
262
+ else: OUTPUT_COST_CODES_PATH = 'config/cost_codes.csv'
263
 
264
  ENFORCE_COST_CODES = get_or_create_env_var('ENFORCE_COST_CODES', 'False') # If you have cost codes listed, is it compulsory to choose one before redacting?
265