Spaces:
Sleeping
Sleeping
File size: 2,832 Bytes
bb3db47 0b2df0d 32cf7ad bb3db47 088d472 1c1a2e4 bb3db47 94af959 49c1832 32cf7ad bb3db47 32cf7ad 49c1832 bb3db47 49c1832 bb3db47 49c1832 bb3db47 bea9095 49c1832 bb3db47 49c1832 088d472 e199664 bb3db47 49c1832 bb3db47 94af959 bb3db47 49c1832 bb3db47 e199664 bea9095 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
import re
from typing import Tuple, Dict
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import A4
from reportlab.pdfbase.pdfmetrics import stringWidth
from io import BytesIO
EMAIL_RE = re.compile(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}")
PHONE_RE = re.compile(r"(?:\+?\d{1,3}[ -]?)?(?:\(\d{2,4}\)[ -]?)?\d{2,4}[ -]?\d{2,4}[ -]?\d{3,4}")
NAME_HINT_RE = re.compile(r"(氏名[::]?\s*)(\S+)", re.IGNORECASE)
def anonymize_text(text: str) -> Tuple[str, Dict[str, str]]:
"""
超軽量匿名化:メール/電話/氏名っぽい先頭行を [REDACTED_*] に置換。
"""
mapping: Dict[str, str] = {}
def _sub_and_store(pattern, repl_key, s):
def _repl(m):
original = m.group(0)
masked = f"[REDACTED_{repl_key}]"
mapping[original] = masked
return masked
return pattern.sub(_repl, s)
out = text
out = _sub_and_store(EMAIL_RE, "EMAIL", out)
out = _sub_and_store(PHONE_RE, "PHONE", out)
# 氏名ヒント(例: "氏名: 山田太郎")
def _name_repl(m):
original = m.group(2)
masked = "[REDACTED_NAME]"
mapping[original] = masked
return m.group(1) + masked
out = NAME_HINT_RE.sub(_name_repl, out, count=1)
return out, mapping
def render_anonymized_pdf(text: str) -> bytes:
"""
依存を増やさずにReportLabでテキストをA4に流し込む最小PDFレンダラ。
"""
buf = BytesIO()
c = canvas.Canvas(buf, pagesize=A4)
width, height = A4
left_margin = 40
right_margin = 40
top_margin = 40
bottom_margin = 40
y = height - top_margin
max_width = width - left_margin - right_margin
line_height = 14
def _draw_wrapped(line: str):
nonlocal y
if not line:
y -= line_height
if y < bottom_margin:
c.showPage()
y = height - top_margin
return
words = line.split(" ")
current = ""
for w in words:
trial = (current + " " + w).strip()
if stringWidth(trial, "Helvetica", 11) <= max_width:
current = trial
else:
c.setFont("Helvetica", 11)
c.drawString(left_margin, y, current)
y -= line_height
if y < bottom_margin:
c.showPage()
y = height - top_margin
current = w
if current:
c.setFont("Helvetica", 11)
c.drawString(left_margin, y, current)
y -= line_height
if y < bottom_margin:
c.showPage()
y = height - top_margin
for line in text.splitlines():
_draw_wrapped(line)
c.showPage()
c.save()
return buf.getvalue()
|