Corin1998 commited on
Commit
32cf7ad
·
verified ·
1 Parent(s): efd9e64

Update pipelines/anonymize.py

Browse files
Files changed (1) hide show
  1. pipelines/anonymize.py +11 -31
pipelines/anonymize.py CHANGED
@@ -1,48 +1,28 @@
1
- import re
2
- from typing import Tuple, Dict
3
- from reportlab.lib.pagesizes import A4
4
  from reportlab.pdfgen import canvas
 
5
  from io import BytesIO
6
 
7
- # ごく簡易な匿名化(メール/電話/氏名候補っぽい行)
8
- EMAIL_RE = re.compile(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}")
9
- PHONE_RE = re.compile(r"(?:\+?\d{1,3}[ -]?)?(?:\(\d{2,4}\)[ -]?)?\d{2,4}[ -]?\d{2,4}[ -]?\d{3,4}")
10
- NAME_HINT = re.compile(r"^\s*(氏名|Name)\s*[::]?\s*(.+)$")
11
 
12
- def anonymize_text(text: str) -> Tuple[str, Dict[str, str]]:
13
- mapping = {}
14
- def _mask_email(m):
15
- val = m.group(0); k = f"EMAIL_{len(mapping)+1}"
16
- mapping[val] = k; return k + "@masked"
17
- def _mask_phone(m):
18
- val = m.group(0); k = f"TEL_{len(mapping)+1}"
19
- mapping[val] = k; return k
20
 
21
- # line単位で氏名候補をマスク
22
- lines = []
23
- for line in text.splitlines():
24
- nm = NAME_HINT.search(line)
25
- if nm:
26
- full = nm.group(0)
27
- mapping[full] = "NAME_MASKED"
28
- lines.append("氏名: NAME_MASKED")
29
- else:
30
- lines.append(line)
31
- masked = "\n".join(lines)
32
- masked = EMAIL_RE.sub(_mask_email, masked)
33
- masked = PHONE_RE.sub(_mask_phone, masked)
34
- return masked, mapping
35
 
36
  def render_anonymized_pdf(text: str) -> bytes:
37
  buf = BytesIO()
38
  c = canvas.Canvas(buf, pagesize=A4)
39
  width, height = A4
40
- x, y = 40, height - 40
41
  for line in text.splitlines():
42
  if y < 40:
43
  c.showPage()
44
  y = height - 40
45
- c.drawString(x, y, line[:1000]) # 1行制限(超長行ガード)
46
  y -= 14
47
  c.save()
48
  return buf.getvalue()
 
1
+ # 最低限の動作用スタブ。実運用は実名/連絡先のマスキング等を実装してください。
 
 
2
  from reportlab.pdfgen import canvas
3
+ from reportlab.lib.pagesizes import A4
4
  from io import BytesIO
5
 
 
 
 
 
6
 
7
+ def anonymize_text(text: str):
8
+ """
9
+ ここでは何もしないでそのまま返す。
10
+ 実運用では氏名・メール・電話などを正規表現等で [REDACTED] に置換し、
11
+ 置換マップを返してください。
12
+ """
13
+ return text, {}
 
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  def render_anonymized_pdf(text: str) -> bytes:
17
  buf = BytesIO()
18
  c = canvas.Canvas(buf, pagesize=A4)
19
  width, height = A4
20
+ y = height - 40
21
  for line in text.splitlines():
22
  if y < 40:
23
  c.showPage()
24
  y = height - 40
25
+ c.drawString(40, y, line[:120])
26
  y -= 14
27
  c.save()
28
  return buf.getvalue()