document_redaction / pyproject.toml
seanpedrickcase's picture
Added example data files. Greatly revised CLI redaction for redaction, deduplication, and AWS Textract batch calls. Various minor fixes and package updates.
d60759d
raw
history blame
1.75 kB
[build-system]
requires = ["setuptools>=61.0", "wheel"]
build-backend = "setuptools.build_meta"
[project]
name = "doc_redaction"
version = "1.1.0"
description = "Redact PDF/image-based documents, or CSV/XLSX files using a Gradio-based GUI interface"
readme = "README.md"
requires-python = ">=3.10"
dependencies = [
"pdfminer.six==20250506",
"pdf2image==1.17.0",
"pymupdf==1.26.3",
"opencv-python==4.12.0.88",
"presidio_analyzer==2.2.359",
"presidio_anonymizer==2.2.359",
"presidio-image-redactor==0.0.57",
"pikepdf==9.10.2",
"pandas==2.3.1",
"scikit-learn==1.7.1",
"spacy==3.8.7",
# Direct URL dependency for spacy model
"en_core_web_lg @ https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.8.0/en_core_web_lg-3.8.0.tar.gz",
"gradio==5.46.1",
"boto3==1.40.31",
"pyarrow==21.0.0",
"openpyxl==3.1.5",
"Faker==37.5.3",
"python-levenshtein==0.27.1",
"spaczz==0.6.1",
# Direct URL dependency for gradio_image_annotator wheel
"gradio_image_annotation @ https://github.com/seanpedrick-case/gradio_image_annotator/releases/download/v0.3.3/gradio_image_annotation-0.3.3-py3-none-any.whl",
"rapidfuzz==3.13.0",
"python-dotenv==1.0.1",
"awslambdaric==3.1.1",
"python-docx==1.2.0",
"paddlepaddle==3.1.0",
"paddleocr==3.1.1",
"polars==1.33.1"
]
[project.urls]
Homepage = "https://seanpedrick-case.github.io/doc_redaction/"
repository = "https://github.com/seanpedrick-case/doc_redaction"
[project.optional-dependencies]
dev = ["pytest"]
# Configuration for Ruff linter:
[tool.ruff]
line-length = 88
select = ["E", "F", "I"]
# Configuration for a Black formatter:
[tool.black]
line-length = 88
target-version = ['py310']