|
|
import streamlit as st |
|
|
from transformers import MBartForConditionalGeneration, MBart50Tokenizer |
|
|
|
|
|
|
|
|
model_path = "echarif/mBART_for_darija_transaltion" |
|
|
model = MBartForConditionalGeneration.from_pretrained(model_path) |
|
|
tokenizer = MBart50Tokenizer.from_pretrained(model_path) |
|
|
|
|
|
|
|
|
st.set_page_config(page_title="Darija to English Translator", page_icon="๐", layout="wide") |
|
|
|
|
|
|
|
|
st.title("Darija to English Translation App ๐") |
|
|
st.markdown( |
|
|
""" |
|
|
### Welcome to the Darija to English Translation App! ๐ |
|
|
This app uses **mBART** for translating Moroccan Darija to English with advanced NLP capabilities. |
|
|
""" |
|
|
) |
|
|
|
|
|
|
|
|
st.sidebar.header("Model Parameters") |
|
|
beam_size = st.sidebar.slider("Beam Size", 1, 10, 5) |
|
|
max_length = st.sidebar.slider("Max Length", 10, 200, 50) |
|
|
src_lang = st.sidebar.selectbox("Source Language", ["ar_AR", "fr_XX"], index=0) |
|
|
tgt_lang = st.sidebar.selectbox("Target Language", ["en_XX", "es_XX"], index=0) |
|
|
|
|
|
|
|
|
col1, col2 = st.columns(2) |
|
|
|
|
|
with col1: |
|
|
st.header("Input Text") |
|
|
input_text = st.text_area("Darija Text:", "") |
|
|
|
|
|
with col2: |
|
|
st.header("Translation") |
|
|
if st.button("Translate"): |
|
|
if input_text.strip(): |
|
|
try: |
|
|
|
|
|
tokenizer.src_lang = src_lang |
|
|
inputs = tokenizer(input_text, return_tensors="pt", padding=True) |
|
|
outputs = model.generate( |
|
|
**inputs, max_length=max_length, num_beams=beam_size, |
|
|
forced_bos_token_id=tokenizer.lang_code_to_id[tgt_lang] |
|
|
) |
|
|
|
|
|
translation = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
st.success("Translation:") |
|
|
st.write(translation) |
|
|
except Exception as e: |
|
|
st.error(f"An error occurred: {e}") |
|
|
else: |
|
|
st.warning("Please enter text to translate!") |
|
|
|
|
|
|
|
|
if 'translation' in locals(): |
|
|
st.download_button("Download Translation", data=translation, file_name="translation.txt") |
|
|
|