File size: 2,832 Bytes
bb3db47
 
0b2df0d
32cf7ad
bb3db47
088d472
1c1a2e4
bb3db47
 
 
94af959
49c1832
32cf7ad
bb3db47
32cf7ad
49c1832
 
bb3db47
 
 
 
 
 
 
 
 
 
 
49c1832
bb3db47
 
 
 
 
 
 
49c1832
bb3db47
bea9095
 
 
49c1832
bb3db47
49c1832
088d472
e199664
 
bb3db47
 
 
 
 
 
 
49c1832
 
bb3db47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94af959
bb3db47
49c1832
bb3db47
e199664
bea9095
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import re
from typing import Tuple, Dict
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import A4
from reportlab.pdfbase.pdfmetrics import stringWidth
from io import BytesIO

EMAIL_RE = re.compile(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}")
PHONE_RE = re.compile(r"(?:\+?\d{1,3}[ -]?)?(?:\(\d{2,4}\)[ -]?)?\d{2,4}[ -]?\d{2,4}[ -]?\d{3,4}")
NAME_HINT_RE = re.compile(r"(氏名[::]?\s*)(\S+)", re.IGNORECASE)

def anonymize_text(text: str) -> Tuple[str, Dict[str, str]]:
    """
    超軽量匿名化:メール/電話/氏名っぽい先頭行を [REDACTED_*] に置換。
    """
    mapping: Dict[str, str] = {}

    def _sub_and_store(pattern, repl_key, s):
        def _repl(m):
            original = m.group(0)
            masked = f"[REDACTED_{repl_key}]"
            mapping[original] = masked
            return masked
        return pattern.sub(_repl, s)

    out = text
    out = _sub_and_store(EMAIL_RE, "EMAIL", out)
    out = _sub_and_store(PHONE_RE, "PHONE", out)

    # 氏名ヒント(例: "氏名: 山田太郎")
    def _name_repl(m):
        original = m.group(2)
        masked = "[REDACTED_NAME]"
        mapping[original] = masked
        return m.group(1) + masked
    out = NAME_HINT_RE.sub(_name_repl, out, count=1)

    return out, mapping


def render_anonymized_pdf(text: str) -> bytes:
    """
    依存を増やさずにReportLabでテキストをA4に流し込む最小PDFレンダラ。
    """
    buf = BytesIO()
    c = canvas.Canvas(buf, pagesize=A4)
    width, height = A4

    left_margin = 40
    right_margin = 40
    top_margin = 40
    bottom_margin = 40
    y = height - top_margin
    max_width = width - left_margin - right_margin
    line_height = 14

    def _draw_wrapped(line: str):
        nonlocal y
        if not line:
            y -= line_height
            if y < bottom_margin:
                c.showPage()
                y = height - top_margin
            return

        words = line.split(" ")
        current = ""
        for w in words:
            trial = (current + " " + w).strip()
            if stringWidth(trial, "Helvetica", 11) <= max_width:
                current = trial
            else:
                c.setFont("Helvetica", 11)
                c.drawString(left_margin, y, current)
                y -= line_height
                if y < bottom_margin:
                    c.showPage()
                    y = height - top_margin
                current = w
        if current:
            c.setFont("Helvetica", 11)
            c.drawString(left_margin, y, current)
            y -= line_height
            if y < bottom_margin:
                c.showPage()
                y = height - top_margin

    for line in text.splitlines():
        _draw_wrapped(line)

    c.showPage()
    c.save()
    return buf.getvalue()