Spaces:
Running on Zero
Running on Zero
File size: 2,233 Bytes
0038229 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 | from __future__ import annotations
import re
REPLACE_MAP: dict[str, str] = {
r"\t": "",
r"\[n\]": "",
r" ": "",
r"[;▼♀♂《》≪≫①②③④⑤⑥]": "",
r"[\u02d7\u2010-\u2015\u2043\u2212\u23af\u23e4\u2500\u2501\u2e3a\u2e3b]": "",
r"[\uff5e\u301C]": "ー",
r"?": "?",
r"!": "!",
r"[●◯〇]": "○",
r"♥": "♡",
}
FULLWIDTH_ALPHA_TO_HALFWIDTH = str.maketrans(
{
chr(full): chr(half)
for full, half in zip(
list(range(0xFF21, 0xFF3B)) + list(range(0xFF41, 0xFF5B)),
list(range(0x41, 0x5B)) + list(range(0x61, 0x7B)),
strict=True,
)
}
)
_HALFWIDTH_KATAKANA_CHARS = "ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン"
_FULLWIDTH_KATAKANA_CHARS = "ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン"
HALFWIDTH_KATAKANA_TO_FULLWIDTH = str.maketrans(
_HALFWIDTH_KATAKANA_CHARS, _FULLWIDTH_KATAKANA_CHARS
)
FULLWIDTH_DIGITS_TO_HALFWIDTH = str.maketrans(
{
chr(full): chr(half)
for full, half in zip(range(0xFF10, 0xFF1A), range(0x30, 0x3A), strict=True)
}
)
def normalize_text(text: str) -> str:
"""Normalize text for TTS."""
for pattern, replacement in REPLACE_MAP.items():
text = re.sub(pattern, replacement, text)
text = text.translate(FULLWIDTH_ALPHA_TO_HALFWIDTH)
text = text.translate(FULLWIDTH_DIGITS_TO_HALFWIDTH)
text = text.translate(HALFWIDTH_KATAKANA_TO_FULLWIDTH)
text = re.sub(r"…{3,}", "……", text)
if text.startswith("「") and text.endswith("」"):
text = text[1:-1]
if text.startswith("『") and text.endswith("』"):
text = text[1:-1]
if text.startswith("(") and text.endswith(")"):
text = text[1:-1]
if text.startswith("【") and text.endswith("】"):
text = text[1:-1]
if text.startswith("(") and text.endswith(")"):
text = text[1:-1]
if text.endswith("。"):
text = text.rstrip("。")
return text
|