Added example data files. Greatly revised CLI redaction for redaction, deduplication, and AWS Textract batch calls. Various minor fixes and package updates.
d60759d
| [build-system] | |
| requires = ["setuptools>=61.0", "wheel"] | |
| build-backend = "setuptools.build_meta" | |
| [project] | |
| name = "doc_redaction" | |
| version = "1.1.0" | |
| description = "Redact PDF/image-based documents, or CSV/XLSX files using a Gradio-based GUI interface" | |
| readme = "README.md" | |
| requires-python = ">=3.10" | |
| dependencies = [ | |
| "pdfminer.six==20250506", | |
| "pdf2image==1.17.0", | |
| "pymupdf==1.26.3", | |
| "opencv-python==4.12.0.88", | |
| "presidio_analyzer==2.2.359", | |
| "presidio_anonymizer==2.2.359", | |
| "presidio-image-redactor==0.0.57", | |
| "pikepdf==9.10.2", | |
| "pandas==2.3.1", | |
| "scikit-learn==1.7.1", | |
| "spacy==3.8.7", | |
| # Direct URL dependency for spacy model | |
| "en_core_web_lg @ https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.8.0/en_core_web_lg-3.8.0.tar.gz", | |
| "gradio==5.46.1", | |
| "boto3==1.40.31", | |
| "pyarrow==21.0.0", | |
| "openpyxl==3.1.5", | |
| "Faker==37.5.3", | |
| "python-levenshtein==0.27.1", | |
| "spaczz==0.6.1", | |
| # Direct URL dependency for gradio_image_annotator wheel | |
| "gradio_image_annotation @ https://github.com/seanpedrick-case/gradio_image_annotator/releases/download/v0.3.3/gradio_image_annotation-0.3.3-py3-none-any.whl", | |
| "rapidfuzz==3.13.0", | |
| "python-dotenv==1.0.1", | |
| "awslambdaric==3.1.1", | |
| "python-docx==1.2.0", | |
| "paddlepaddle==3.1.0", | |
| "paddleocr==3.1.1", | |
| "polars==1.33.1" | |
| ] | |
| [project.urls] | |
| Homepage = "https://seanpedrick-case.github.io/doc_redaction/" | |
| repository = "https://github.com/seanpedrick-case/doc_redaction" | |
| [project.optional-dependencies] | |
| dev = ["pytest"] | |
| # Configuration for Ruff linter: | |
| [tool.ruff] | |
| line-length = 88 | |
| select = ["E", "F", "I"] | |
| # Configuration for a Black formatter: | |
| [tool.black] | |
| line-length = 88 | |
| target-version = ['py310'] |