Spaces:
Sleeping
Sleeping
Update pipelines/anonymize.py
Browse files- pipelines/anonymize.py +3 -4
pipelines/anonymize.py
CHANGED
|
@@ -4,6 +4,7 @@ from reportlab.lib.pagesizes import A4
|
|
| 4 |
from reportlab.pdfgen import canvas
|
| 5 |
from reportlab.lib.units import mm
|
| 6 |
|
|
|
|
| 7 |
EMAIL_RE = re.compile(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}")
|
| 8 |
PHONE_RE = re.compile(r"(?:\+?\d{1,3}[ -]?)?(?:\(\d{2,4}\)[ -]?)?\d{2,4}[ -]?\d{2,4}[ -]?\d{3,4}")
|
| 9 |
NAME_HINT = re.compile(r"^(氏名|Name)[::]?\s*(.+)$", re.MULTILINE)
|
|
@@ -27,7 +28,6 @@ def anonymize_text(text: str) -> Tuple[str, Dict[str, str]]:
|
|
| 27 |
out = _mask_all(EMAIL_RE, "EMAIL", out)
|
| 28 |
out = _mask_all(PHONE_RE, "TEL", out)
|
| 29 |
|
| 30 |
-
# 名前行の簡易マスク
|
| 31 |
for m in NAME_HINT.finditer(text):
|
| 32 |
full = m.group(0)
|
| 33 |
name = m.group(2).strip()
|
|
@@ -43,8 +43,7 @@ def render_anonymized_pdf(text: str) -> bytes:
|
|
| 43 |
buf = BytesIO()
|
| 44 |
c = canvas.Canvas(buf, pagesize=A4)
|
| 45 |
width, height = A4
|
| 46 |
-
|
| 47 |
-
line_height = 6 * mm
|
| 48 |
x = 15 * mm
|
| 49 |
y = height - 20 * mm
|
| 50 |
|
|
@@ -53,7 +52,7 @@ def render_anonymized_pdf(text: str) -> bytes:
|
|
| 53 |
c.showPage()
|
| 54 |
y = height - 20 * mm
|
| 55 |
c.drawString(x, y, line[:2000])
|
| 56 |
-
y -=
|
| 57 |
|
| 58 |
c.save()
|
| 59 |
return buf.getvalue()
|
|
|
|
| 4 |
from reportlab.pdfgen import canvas
|
| 5 |
from reportlab.lib.units import mm
|
| 6 |
|
| 7 |
+
|
| 8 |
EMAIL_RE = re.compile(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}")
|
| 9 |
PHONE_RE = re.compile(r"(?:\+?\d{1,3}[ -]?)?(?:\(\d{2,4}\)[ -]?)?\d{2,4}[ -]?\d{2,4}[ -]?\d{3,4}")
|
| 10 |
NAME_HINT = re.compile(r"^(氏名|Name)[::]?\s*(.+)$", re.MULTILINE)
|
|
|
|
| 28 |
out = _mask_all(EMAIL_RE, "EMAIL", out)
|
| 29 |
out = _mask_all(PHONE_RE, "TEL", out)
|
| 30 |
|
|
|
|
| 31 |
for m in NAME_HINT.finditer(text):
|
| 32 |
full = m.group(0)
|
| 33 |
name = m.group(2).strip()
|
|
|
|
| 43 |
buf = BytesIO()
|
| 44 |
c = canvas.Canvas(buf, pagesize=A4)
|
| 45 |
width, height = A4
|
| 46 |
+
line_h = 6 * mm
|
|
|
|
| 47 |
x = 15 * mm
|
| 48 |
y = height - 20 * mm
|
| 49 |
|
|
|
|
| 52 |
c.showPage()
|
| 53 |
y = height - 20 * mm
|
| 54 |
c.drawString(x, y, line[:2000])
|
| 55 |
+
y -= line_h
|
| 56 |
|
| 57 |
c.save()
|
| 58 |
return buf.getvalue()
|