Minor package updates, cleaned up README and pyproject files related to installation. Updated example_config.env
2b56cca
| [build-system] | |
| requires = ["setuptools>=61.0", "wheel"] | |
| build-backend = "setuptools.build_meta" | |
| [project] | |
| name = "doc_redaction" | |
| version = "1.5.3" | |
| description = "Redact PDF/image-based documents, Word, or CSV/XLSX files using a Gradio-based GUI interface" | |
| readme = "README.md" | |
| authors = [ | |
| { name = "Sean Pedrick-Case", email = "[email protected]" }, | |
| ] | |
| maintainers = [ | |
| { name = "Sean Pedrick-Case", email = "[email protected]" }, | |
| ] | |
| license = { text = "AGPL-3.0-only" } # This licence type required to use PyMuPDF | |
| keywords = [ | |
| "redaction", | |
| "pdf", | |
| "nlp", | |
| "documents", | |
| "document-processing", | |
| "gradio", | |
| "pii", | |
| "pii-detection" | |
| ] | |
| classifiers = [ | |
| "Development Status :: 5 - Production/Stable", | |
| "Intended Audience :: Developers", | |
| "Intended Audience :: Legal Industry", | |
| "Topic :: Text Processing :: General", | |
| "Topic :: Security :: Cryptography", | |
| "Programming Language :: Python :: 3", | |
| "Programming Language :: Python :: 3.10", | |
| "Programming Language :: Python :: 3.11", | |
| "Programming Language :: Python :: 3.12", | |
| "Programming Language :: Python :: 3.13", | |
| ] | |
| requires-python = ">=3.10" | |
| dependencies = [ | |
| "pdfminer.six==20251107", | |
| "pdf2image==1.17.0", | |
| "pymupdf==1.26.6", | |
| "bleach==6.3.0", | |
| "opencv-python==4.12.0.88", | |
| "presidio_analyzer==2.2.360", | |
| "presidio_anonymizer==2.2.360", | |
| "presidio-image-redactor==0.0.57", | |
| "pikepdf==9.11.0", | |
| "pandas==2.3.3", | |
| "scikit-learn==1.7.2", | |
| "spacy==3.8.8", | |
| "en_core_web_lg @ https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.8.0/en_core_web_lg-3.8.0.tar.gz", | |
| "gradio==5.49.1", | |
| "boto3==1.40.72", | |
| "pyarrow==21.0.0", | |
| "openpyxl==3.1.5", | |
| "Faker==37.8.0", | |
| "python-levenshtein==0.27.1", | |
| "spaczz==0.6.1", | |
| "gradio_image_annotation @ https://github.com/seanpedrick-case/gradio_image_annotator/releases/download/v0.3.3/gradio_image_annotation-0.3.3-py3-none-any.whl", | |
| "rapidfuzz==3.14.1", | |
| "python-dotenv==1.0.1", | |
| "awslambdaric==3.1.1", | |
| "python-docx==1.2.0", | |
| "polars==1.35.2", | |
| "defusedxml==0.7.1", | |
| "numpy==2.2.6", | |
| "spaces==0.42.1", | |
| ] | |
| [project.optional-dependencies] | |
| # For testing | |
| dev = ["pytest"] | |
| test = ["pytest", "pytest-cov"] | |
| # To install the app with paddle and vlm support with pip, example command (in base folder and correct python environment): pip install .[paddle,vlm], or uv pip install .[ocr,vlm] if using uv. Note need to GPU version of Torch below | |
| # Extra dependencies for PaddleOCR | |
| # If you want the GPU-accelerated version, run manually pip install paddlepaddle-gpu<=3.2.1 -i https://www.paddlepaddle.org.cn/packages/stable/cu126/ | |
| paddle = [ | |
| "paddlepaddle>=3.0.0,<=3.2.1", | |
| "paddleocr==3.3.0", | |
| ] | |
| # Extra dependencies for VLM models | |
| # For torch you should use --index-url https://download.pytorch.org/whl/cu126 for cuda support for paddleocr, need to install manually | |
| vlm = [ | |
| "torch>=2.5.1,<=2.8.0", | |
| "torchvision>=0.20.1", | |
| "transformers==4.57.1", | |
| "accelerate==1.11.0", | |
| ] | |
| [project.urls] | |
| Homepage = "https://seanpedrick-case.github.io/doc_redaction/" | |
| Repository = "https://github.com/seanpedrick-case/doc_redaction" | |
| [project.scripts] | |
| cli_redact = "cli_redact:main" | |
| # Configuration for Ruff linter: | |
| [tool.ruff] | |
| line-length = 88 | |
| [tool.ruff.lint] | |
| select = ["E", "F", "I"] | |
| ignore = [ | |
| "E501", # line-too-long (handled with Black) | |
| "E402", # module-import-not-at-top-of-file (sometimes needed for conditional imports) | |
| ] | |
| [tool.ruff.lint.per-file-ignores] | |
| "__init__.py" = ["F401"] # Allow unused imports in __init__.py | |
| # Configuration for a Black formatter: | |
| [tool.black] | |
| line-length = 88 | |
| target-version = ['py310'] | |
| # Configuration for pytest: | |
| [tool.pytest.ini_options] | |
| filterwarnings = [ | |
| "ignore::DeprecationWarning:click.parser", | |
| "ignore::DeprecationWarning:weasel.util.config", | |
| "ignore::DeprecationWarning:builtin type", | |
| "ignore::DeprecationWarning:websockets.legacy", | |
| "ignore::DeprecationWarning:websockets.server", | |
| "ignore::DeprecationWarning:spacy.cli._util", | |
| "ignore::DeprecationWarning:weasel.util.config", | |
| "ignore::DeprecationWarning:importlib._bootstrap", | |
| ] | |
| testpaths = ["test"] | |
| python_files = ["test_*.py", "*_test.py"] | |
| python_classes = ["Test*"] | |
| python_functions = ["test_*"] | |
| addopts = [ | |
| "-v", | |
| "--tb=short", | |
| "--strict-markers", | |
| "--disable-warnings", | |
| ] |