| import streamlit as st | |
| from transformers import pipeline | |
| import unicodedata | |
| import re | |
| def contains_text(text): | |
| return re.search('[A-Za-z]', text) or re.search('[א-ת]', text) | |
| def normalize(text): | |
| return unicodedata.normalize('NFC', text | |
| ).replace('\u05ba', '\u05b9' | |
| ).replace('\u05be', '-' | |
| ).replace('״', '"' | |
| ).replace("׳", "'") | |
| with st.spinner('Loading TaatikNet framework...'): | |
| pipe = pipeline("text2text-generation", model='malper/taatiknet', device_map="auto") | |
| st.success('Loaded!') | |
| text = st.text_area('Enter text and press ctrl/command+enter:') | |
| if text: | |
| words = [normalize(x) for x in text.split() if contains_text(x)] | |
| if len(words) > 0: | |
| outputs = pipe(words, max_length=200, num_beams=5, num_return_sequences=5) | |
| texts = [ | |
| ' '.join(x['generated_text'] for x in option) | |
| for option in zip(*outputs) | |
| ] | |
| st.write(texts[0]) | |
| st.write('Other options:') | |
| for option in texts[1:]: | |
| st.write(option) |