|
|
[build-system] |
|
|
requires = ["setuptools>=61.0", "wheel"] |
|
|
build-backend = "setuptools.build_meta" |
|
|
|
|
|
[project] |
|
|
name = "doc_redaction" |
|
|
version = "1.6.2" |
|
|
description = "Redact PDF/image-based documents, Word, or CSV/XLSX files using a Gradio-based GUI interface" |
|
|
readme = "README.md" |
|
|
authors = [ |
|
|
{ name = "Sean Pedrick-Case", email = "spedrickcase@lambeth.gov.uk" }, |
|
|
] |
|
|
maintainers = [ |
|
|
{ name = "Sean Pedrick-Case", email = "spedrickcase@lambeth.gov.uk" }, |
|
|
] |
|
|
license = { text = "AGPL-3.0-only" } |
|
|
keywords = [ |
|
|
"redaction", |
|
|
"pdf", |
|
|
"nlp", |
|
|
"documents", |
|
|
"document-processing", |
|
|
"gradio", |
|
|
"pii", |
|
|
"pii-detection" |
|
|
] |
|
|
classifiers = [ |
|
|
"Development Status :: 5 - Production/Stable", |
|
|
"Intended Audience :: Developers", |
|
|
"Intended Audience :: Legal Industry", |
|
|
"Topic :: Text Processing :: General", |
|
|
"Topic :: Security :: Cryptography", |
|
|
"Programming Language :: Python :: 3", |
|
|
"Programming Language :: Python :: 3.10", |
|
|
"Programming Language :: Python :: 3.11", |
|
|
"Programming Language :: Python :: 3.12", |
|
|
"Programming Language :: Python :: 3.13", |
|
|
] |
|
|
requires-python = ">=3.10" |
|
|
dependencies = [ |
|
|
"pdfminer.six==20251107", |
|
|
"pdf2image==1.17.0", |
|
|
"pymupdf==1.26.6", |
|
|
"bleach==6.3.0", |
|
|
"opencv-python==4.12.0.88", |
|
|
"presidio_analyzer==2.2.360", |
|
|
"presidio_anonymizer==2.2.360", |
|
|
"presidio-image-redactor==0.0.57", |
|
|
"pikepdf==9.11.0", |
|
|
"pandas==2.3.3", |
|
|
"scikit-learn==1.7.2", |
|
|
"spacy==3.8.8", |
|
|
"en_core_web_lg @ https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.8.0/en_core_web_lg-3.8.0.tar.gz", |
|
|
"gradio==5.49.1", |
|
|
"boto3==1.40.72", |
|
|
"pyarrow==21.0.0", |
|
|
"openpyxl==3.1.5", |
|
|
"Faker==37.8.0", |
|
|
"python-levenshtein==0.27.1", |
|
|
"spaczz==0.6.1", |
|
|
"gradio_image_annotation @ https://github.com/seanpedrick-case/gradio_image_annotator/releases/download/v0.3.3/gradio_image_annotation-0.3.3-py3-none-any.whl", |
|
|
"rapidfuzz==3.14.1", |
|
|
"python-dotenv==1.0.1", |
|
|
"awslambdaric==3.1.1", |
|
|
"python-docx==1.2.0", |
|
|
"polars==1.35.2", |
|
|
"defusedxml==0.7.1", |
|
|
"numpy==2.2.6", |
|
|
"spaces==0.42.1", |
|
|
] |
|
|
|
|
|
[project.optional-dependencies] |
|
|
|
|
|
|
|
|
dev = ["pytest"] |
|
|
test = ["pytest", "pytest-cov"] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
paddle = [ |
|
|
"paddlepaddle>=3.0.0,<=3.2.1", |
|
|
"paddleocr==3.3.0", |
|
|
"pycocotools==2.0.10", |
|
|
] |
|
|
|
|
|
|
|
|
|
|
|
vlm = [ |
|
|
"torch>=2.5.1,<=2.8.0", |
|
|
"torchvision>=0.20.1", |
|
|
"transformers==4.57.2", |
|
|
"accelerate==1.11.0", |
|
|
"bitsandbytes==0.48.2", |
|
|
"sentencepiece==0.2.1", |
|
|
] |
|
|
|
|
|
|
|
|
mcp = [ |
|
|
"gradio[mcp]==5.49.1" |
|
|
] |
|
|
|
|
|
[project.urls] |
|
|
Homepage = "https://seanpedrick-case.github.io/doc_redaction/" |
|
|
Repository = "https://github.com/seanpedrick-case/doc_redaction" |
|
|
|
|
|
[project.scripts] |
|
|
cli_redact = "cli_redact:main" |
|
|
|
|
|
|
|
|
[tool.ruff] |
|
|
line-length = 88 |
|
|
|
|
|
[tool.ruff.lint] |
|
|
select = ["E", "F", "I"] |
|
|
ignore = [ |
|
|
"E501", |
|
|
"E402", |
|
|
] |
|
|
|
|
|
[tool.ruff.lint.per-file-ignores] |
|
|
"__init__.py" = ["F401"] |
|
|
|
|
|
|
|
|
[tool.black] |
|
|
line-length = 88 |
|
|
target-version = ['py310'] |
|
|
|
|
|
|
|
|
[tool.pytest.ini_options] |
|
|
filterwarnings = [ |
|
|
"ignore::DeprecationWarning:click.parser", |
|
|
"ignore::DeprecationWarning:weasel.util.config", |
|
|
"ignore::DeprecationWarning:builtin type", |
|
|
"ignore::DeprecationWarning:websockets.legacy", |
|
|
"ignore::DeprecationWarning:websockets.server", |
|
|
"ignore::DeprecationWarning:spacy.cli._util", |
|
|
"ignore::DeprecationWarning:weasel.util.config", |
|
|
"ignore::DeprecationWarning:importlib._bootstrap", |
|
|
] |
|
|
testpaths = ["test"] |
|
|
python_files = ["test_*.py", "*_test.py"] |
|
|
python_classes = ["Test*"] |
|
|
python_functions = ["test_*"] |
|
|
addopts = [ |
|
|
"-v", |
|
|
"--tb=short", |
|
|
"--strict-markers", |
|
|
"--disable-warnings", |
|
|
] |