Spaces:
Sleeping
Sleeping
| import re | |
| from typing import Tuple, Dict | |
| from reportlab.pdfgen import canvas | |
| from reportlab.lib.pagesizes import A4 | |
| from reportlab.pdfbase.pdfmetrics import stringWidth | |
| from io import BytesIO | |
| EMAIL_RE = re.compile(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}") | |
| PHONE_RE = re.compile(r"(?:\+?\d{1,3}[ -]?)?(?:\(\d{2,4}\)[ -]?)?\d{2,4}[ -]?\d{2,4}[ -]?\d{3,4}") | |
| NAME_HINT_RE = re.compile(r"(氏名[::]?\s*)(\S+)", re.IGNORECASE) | |
| def anonymize_text(text: str) -> Tuple[str, Dict[str, str]]: | |
| """ | |
| 超軽量匿名化:メール/電話/氏名っぽい先頭行を [REDACTED_*] に置換。 | |
| """ | |
| mapping: Dict[str, str] = {} | |
| def _sub_and_store(pattern, repl_key, s): | |
| def _repl(m): | |
| original = m.group(0) | |
| masked = f"[REDACTED_{repl_key}]" | |
| mapping[original] = masked | |
| return masked | |
| return pattern.sub(_repl, s) | |
| out = text | |
| out = _sub_and_store(EMAIL_RE, "EMAIL", out) | |
| out = _sub_and_store(PHONE_RE, "PHONE", out) | |
| # 氏名ヒント(例: "氏名: 山田太郎") | |
| def _name_repl(m): | |
| original = m.group(2) | |
| masked = "[REDACTED_NAME]" | |
| mapping[original] = masked | |
| return m.group(1) + masked | |
| out = NAME_HINT_RE.sub(_name_repl, out, count=1) | |
| return out, mapping | |
| def render_anonymized_pdf(text: str) -> bytes: | |
| """ | |
| 依存を増やさずにReportLabでテキストをA4に流し込む最小PDFレンダラ。 | |
| """ | |
| buf = BytesIO() | |
| c = canvas.Canvas(buf, pagesize=A4) | |
| width, height = A4 | |
| left_margin = 40 | |
| right_margin = 40 | |
| top_margin = 40 | |
| bottom_margin = 40 | |
| y = height - top_margin | |
| max_width = width - left_margin - right_margin | |
| line_height = 14 | |
| def _draw_wrapped(line: str): | |
| nonlocal y | |
| if not line: | |
| y -= line_height | |
| if y < bottom_margin: | |
| c.showPage() | |
| y = height - top_margin | |
| return | |
| words = line.split(" ") | |
| current = "" | |
| for w in words: | |
| trial = (current + " " + w).strip() | |
| if stringWidth(trial, "Helvetica", 11) <= max_width: | |
| current = trial | |
| else: | |
| c.setFont("Helvetica", 11) | |
| c.drawString(left_margin, y, current) | |
| y -= line_height | |
| if y < bottom_margin: | |
| c.showPage() | |
| y = height - top_margin | |
| current = w | |
| if current: | |
| c.setFont("Helvetica", 11) | |
| c.drawString(left_margin, y, current) | |
| y -= line_height | |
| if y < bottom_margin: | |
| c.showPage() | |
| y = height - top_margin | |
| for line in text.splitlines(): | |
| _draw_wrapped(line) | |
| c.showPage() | |
| c.save() | |
| return buf.getvalue() | |