Spaces:
Sleeping
Sleeping
Update pipelines/anonymize.py
Browse files- pipelines/anonymize.py +0 -6
pipelines/anonymize.py
CHANGED
|
@@ -4,15 +4,12 @@ from reportlab.lib.pagesizes import A4
|
|
| 4 |
from reportlab.pdfgen import canvas
|
| 5 |
from reportlab.lib.units import mm
|
| 6 |
|
| 7 |
-
|
| 8 |
EMAIL_RE = re.compile(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}")
|
| 9 |
PHONE_RE = re.compile(r"(?:\+?\d{1,3}[ -]?)?(?:\(\d{2,4}\)[ -]?)?\d{2,4}[ -]?\d{2,4}[ -]?\d{3,4}")
|
| 10 |
NAME_HINT = re.compile(r"^(氏名|Name)[::]?\s*(.+)$", re.MULTILINE)
|
| 11 |
|
| 12 |
-
|
| 13 |
def anonymize_text(text: str) -> Tuple[str, Dict[str, str]]:
|
| 14 |
mapping: Dict[str, str] = {}
|
| 15 |
-
|
| 16 |
def _mask_all(pattern: re.Pattern, label: str, s: str) -> str:
|
| 17 |
idx = 1
|
| 18 |
def _repl(m):
|
|
@@ -37,7 +34,6 @@ def anonymize_text(text: str) -> Tuple[str, Dict[str, str]]:
|
|
| 37 |
|
| 38 |
return out, mapping
|
| 39 |
|
| 40 |
-
|
| 41 |
def render_anonymized_pdf(text: str) -> bytes:
|
| 42 |
from io import BytesIO
|
| 43 |
buf = BytesIO()
|
|
@@ -46,13 +42,11 @@ def render_anonymized_pdf(text: str) -> bytes:
|
|
| 46 |
line_h = 6 * mm
|
| 47 |
x = 15 * mm
|
| 48 |
y = height - 20 * mm
|
| 49 |
-
|
| 50 |
for line in text.splitlines():
|
| 51 |
if y < 20 * mm:
|
| 52 |
c.showPage()
|
| 53 |
y = height - 20 * mm
|
| 54 |
c.drawString(x, y, line[:2000])
|
| 55 |
y -= line_h
|
| 56 |
-
|
| 57 |
c.save()
|
| 58 |
return buf.getvalue()
|
|
|
|
| 4 |
from reportlab.pdfgen import canvas
|
| 5 |
from reportlab.lib.units import mm
|
| 6 |
|
|
|
|
| 7 |
EMAIL_RE = re.compile(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}")
|
| 8 |
PHONE_RE = re.compile(r"(?:\+?\d{1,3}[ -]?)?(?:\(\d{2,4}\)[ -]?)?\d{2,4}[ -]?\d{2,4}[ -]?\d{3,4}")
|
| 9 |
NAME_HINT = re.compile(r"^(氏名|Name)[::]?\s*(.+)$", re.MULTILINE)
|
| 10 |
|
|
|
|
| 11 |
def anonymize_text(text: str) -> Tuple[str, Dict[str, str]]:
|
| 12 |
mapping: Dict[str, str] = {}
|
|
|
|
| 13 |
def _mask_all(pattern: re.Pattern, label: str, s: str) -> str:
|
| 14 |
idx = 1
|
| 15 |
def _repl(m):
|
|
|
|
| 34 |
|
| 35 |
return out, mapping
|
| 36 |
|
|
|
|
| 37 |
def render_anonymized_pdf(text: str) -> bytes:
|
| 38 |
from io import BytesIO
|
| 39 |
buf = BytesIO()
|
|
|
|
| 42 |
line_h = 6 * mm
|
| 43 |
x = 15 * mm
|
| 44 |
y = height - 20 * mm
|
|
|
|
| 45 |
for line in text.splitlines():
|
| 46 |
if y < 20 * mm:
|
| 47 |
c.showPage()
|
| 48 |
y = height - 20 * mm
|
| 49 |
c.drawString(x, y, line[:2000])
|
| 50 |
y -= line_h
|
|
|
|
| 51 |
c.save()
|
| 52 |
return buf.getvalue()
|