seanpedrickcase commited on
Commit
69c2af9
·
1 Parent(s): 94e514b

Updated version numbers, minor text revision

Browse files
DocRedactApp_0.6.1.spec → DocRedactApp.spec RENAMED
@@ -43,7 +43,7 @@ exe = EXE(
43
  a.scripts,
44
  [],
45
  exclude_binaries=True,
46
- name='DocRedactApp_0.4.0',
47
  debug=False,
48
  bootloader_ignore_signals=False,
49
  strip=False,
@@ -62,5 +62,5 @@ coll = COLLECT(
62
  strip=False,
63
  upx=True,
64
  upx_exclude=[],
65
- name='DocRedactApp_0.6.1',
66
  )
 
43
  a.scripts,
44
  [],
45
  exclude_binaries=True,
46
+ name='DocRedactApp_0.6.2',
47
  debug=False,
48
  bootloader_ignore_signals=False,
49
  strip=False,
 
62
  strip=False,
63
  upx=True,
64
  upx_exclude=[],
65
+ name='DocRedactApp_0.6.2',
66
  )
README.md CHANGED
@@ -10,6 +10,8 @@ license: agpl-3.0
10
  ---
11
  # Document redaction
12
 
 
 
13
  Redact personally identifiable information (PII) from documents (pdf, images), open text, or tabular data (xlsx/csv/parquet). Please see the [User Guide](#user-guide) for a walkthrough on how to use the app. Below is a very brief overview.
14
 
15
  To identify text in documents, the 'local' text/OCR image analysis uses spacy/tesseract, and works ok for documents with typed text. If available, choose 'AWS Textract service' to redact more complex elements e.g. signatures or handwriting. Then, choose a method for PII identification. 'Local' is quick and gives good results if you are primarily looking for a custom list of terms to redact (see Redaction settings). If available, AWS Comprehend gives better results at a small cost.
 
10
  ---
11
  # Document redaction
12
 
13
+ version: 0.6.2
14
+
15
  Redact personally identifiable information (PII) from documents (pdf, images), open text, or tabular data (xlsx/csv/parquet). Please see the [User Guide](#user-guide) for a walkthrough on how to use the app. Below is a very brief overview.
16
 
17
  To identify text in documents, the 'local' text/OCR image analysis uses spacy/tesseract, and works ok for documents with typed text. If available, choose 'AWS Textract service' to redact more complex elements e.g. signatures or handwriting. Then, choose a method for PII identification. 'Local' is quick and gives good results if you are primarily looking for a custom list of terms to redact (see Redaction settings). If available, AWS Comprehend gives better results at a small cost.
how_to_create_exe_dist.txt CHANGED
@@ -16,7 +16,7 @@ NOTE: for ensuring that spaCy models are loaded into the program correctly in re
16
 
17
  9.Run the following (This helped me: https://github.com/pyinstaller/pyinstaller/issues/8108):
18
 
19
- a) In command line: pyi-makespec --additional-hooks-dir="build_deps" --add-data "tesseract/:tesseract/" --add-data "poppler/poppler-24.02.0/:poppler/poppler-24.02.0/" --collect-data=gradio_client --collect-data=gradio --hidden-import=gradio_image_annotation --collect-data=gradio_image_annotation --collect-all=gradio_image_annotation --hidden-import pyarrow.vendored.version --hidden-import pydicom.encoders --hidden-import=safehttpx --collect-all=safehttpx --hidden-import=presidio_analyzer --collect-all=presidio_analyzer --hidden-import=presidio_anonymizer --collect-all=presidio_anonymizer --hidden-import=presidio_image_redactor --collect-all=presidio_image_redactor --name DocRedactApp_0.4.0 app.py
20
 
21
  # Add --onefile to the above if you would like everything packaged as a single exe, although this will need to be extracted upon starting the app, slowing down initialisation time significantly.
22
 
@@ -32,7 +32,7 @@ a = Analysis(
32
 
33
  hook-presidio-image-redactor.py
34
 
35
- c) Back in command line, run this: pyinstaller --clean --noconfirm DocRedactApp_0.4.0.spec
36
 
37
 
38
  9. A 'dist' folder will be created with the executable inside along with all dependencies('dist\redaction').
 
16
 
17
  9.Run the following (This helped me: https://github.com/pyinstaller/pyinstaller/issues/8108):
18
 
19
+ a) In command line: pyi-makespec --additional-hooks-dir="build_deps" --add-data "tesseract/:tesseract/" --add-data "poppler/poppler-24.02.0/:poppler/poppler-24.02.0/" --collect-data=gradio_client --collect-data=gradio --hidden-import=gradio_image_annotation --collect-data=gradio_image_annotation --collect-all=gradio_image_annotation --hidden-import pyarrow.vendored.version --hidden-import pydicom.encoders --hidden-import=safehttpx --collect-all=safehttpx --hidden-import=presidio_analyzer --collect-all=presidio_analyzer --hidden-import=presidio_anonymizer --collect-all=presidio_anonymizer --hidden-import=presidio_image_redactor --collect-all=presidio_image_redactor --name DocRedactApp app.py
20
 
21
  # Add --onefile to the above if you would like everything packaged as a single exe, although this will need to be extracted upon starting the app, slowing down initialisation time significantly.
22
 
 
32
 
33
  hook-presidio-image-redactor.py
34
 
35
+ c) Back in command line, run this: pyinstaller --clean --noconfirm DocRedactApp.spec
36
 
37
 
38
  9. A 'dist' folder will be created with the executable inside along with all dependencies('dist\redaction').
pyproject.toml CHANGED
@@ -3,11 +3,11 @@ requires = ["setuptools>=61.0", "wheel"]
3
  build-backend = "setuptools.build_meta"
4
 
5
  [project]
6
- name = "doc_redaction" # Your application's name
7
- version = "0.6.1" # Your application's current version
8
- description = "Redact PDF/image-based documents, or CSV/XLSX files using a Gradio-based GUI interface" # A short description
9
- readme = "README.md" # Path to your project's README file
10
- requires-python = ">=3.10" # The minimum Python version required
11
 
12
  dependencies = [
13
  "pdfminer.six==20240706",
@@ -45,13 +45,12 @@ repository = "https://github.com/seanpedrick-case/doc_redaction"
45
  [project.optional-dependencies]
46
  dev = ["pytest"]
47
 
48
- # Optional: You can add configuration for tools used in your project under the [tool] section
49
- # For example, configuration for a linter like Ruff:
50
  [tool.ruff]
51
  line-length = 88
52
  select = ["E", "F", "I"]
53
 
54
- # Optional: Configuration for a formatter like Black:
55
  [tool.black]
56
  line-length = 88
57
  target-version = ['py310']
 
3
  build-backend = "setuptools.build_meta"
4
 
5
  [project]
6
+ name = "doc_redaction"
7
+ version = "0.6.2"
8
+ description = "Redact PDF/image-based documents, or CSV/XLSX files using a Gradio-based GUI interface"
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
 
12
  dependencies = [
13
  "pdfminer.six==20240706",
 
45
  [project.optional-dependencies]
46
  dev = ["pytest"]
47
 
48
+ # Configuration for Ruff linter:
 
49
  [tool.ruff]
50
  line-length = 88
51
  select = ["E", "F", "I"]
52
 
53
+ # Configuration for a Black formatter:
54
  [tool.black]
55
  line-length = 88
56
  target-version = ['py310']