import re from typing import Tuple, Dict from reportlab.pdfgen import canvas from reportlab.lib.pagesizes import A4 from reportlab.pdfbase.pdfmetrics import stringWidth from io import BytesIO EMAIL_RE = re.compile(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}") PHONE_RE = re.compile(r"(?:\+?\d{1,3}[ -]?)?(?:\(\d{2,4}\)[ -]?)?\d{2,4}[ -]?\d{2,4}[ -]?\d{3,4}") NAME_HINT_RE = re.compile(r"(氏名[::]?\s*)(\S+)", re.IGNORECASE) def anonymize_text(text: str) -> Tuple[str, Dict[str, str]]: """ 超軽量匿名化:メール/電話/氏名っぽい先頭行を [REDACTED_*] に置換。 """ mapping: Dict[str, str] = {} def _sub_and_store(pattern, repl_key, s): def _repl(m): original = m.group(0) masked = f"[REDACTED_{repl_key}]" mapping[original] = masked return masked return pattern.sub(_repl, s) out = text out = _sub_and_store(EMAIL_RE, "EMAIL", out) out = _sub_and_store(PHONE_RE, "PHONE", out) # 氏名ヒント(例: "氏名: 山田太郎") def _name_repl(m): original = m.group(2) masked = "[REDACTED_NAME]" mapping[original] = masked return m.group(1) + masked out = NAME_HINT_RE.sub(_name_repl, out, count=1) return out, mapping def render_anonymized_pdf(text: str) -> bytes: """ 依存を増やさずにReportLabでテキストをA4に流し込む最小PDFレンダラ。 """ buf = BytesIO() c = canvas.Canvas(buf, pagesize=A4) width, height = A4 left_margin = 40 right_margin = 40 top_margin = 40 bottom_margin = 40 y = height - top_margin max_width = width - left_margin - right_margin line_height = 14 def _draw_wrapped(line: str): nonlocal y if not line: y -= line_height if y < bottom_margin: c.showPage() y = height - top_margin return words = line.split(" ") current = "" for w in words: trial = (current + " " + w).strip() if stringWidth(trial, "Helvetica", 11) <= max_width: current = trial else: c.setFont("Helvetica", 11) c.drawString(left_margin, y, current) y -= line_height if y < bottom_margin: c.showPage() y = height - top_margin current = w if current: c.setFont("Helvetica", 11) c.drawString(left_margin, y, current) y -= line_height if y < bottom_margin: c.showPage() y = height - top_margin for line in text.splitlines(): _draw_wrapped(line) c.showPage() c.save() return buf.getvalue()