""" uv sync wget https://huggingface.co/thewh1teagle/phonikud-onnx/resolve/main/phonikud-1.0.int8.onnx uv run gradio app.py """ from phonikud import phonemize, lexicon from phonikud.utils import remove_nikud import gradio as gr from phonikud_onnx import Phonikud from pathlib import Path default_text = """ הַדַּיָּיג נִצְמָד לְדֹופֶן הַסִּירָה בִּזְמַן הַסְּעָרָה. הִסְבַּרְתִּי לָהּ אֶת הַכֹּל, וְאָמַרְתִּי בְּדִיּוּק מָה קָרָה. הַיְּלָדִים אָהֲבוּ בִּמְיֻוחָד אֶת הַסִּיפּוּרִים הַלָּלוּ שֶׁהַמּוֹרָה הִקְרִיאָה. """.strip() def on_phonikud_toggle(use_phonikud): if not use_phonikud: return default_text return remove_nikud(default_text) css = """ .input textarea { font-size: 22px; padding: 15px; height: 200px; } .phonemes { background: var(--input-background-fill); } .phonemes { padding: 5px; min-height: 50px; } """ theme = gr.themes.Soft(font=[gr.themes.GoogleFont("Noto Sans Hebrew")]) phonikud = None commit = "unknown" model_path = Path("./phonikud-1.0.int8.onnx") if model_path.exists(): phonikud = Phonikud(str(model_path)) metadata = phonikud.get_metadata() commit = metadata.get("commit", "unknown") def on_submit(text: str, schema: str, use_phonikud: bool) -> str: diacritized = ( phonikud.add_diacritics( text, mark_matres_lectionis=lexicon.NIKUD_HASER_DIACRITIC ) if phonikud and use_phonikud else text ) phonemes = phonemize( diacritized, predict_stress=True, schema=schema, predict_vocal_shva=False ) if use_phonikud: return f"