File size: 2,293 Bytes
abea1d0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import streamlit as st
from transformers import MBartForConditionalGeneration, MBart50Tokenizer

# Download the model and tokenizer
model_path = "echarif/mBART_for_darija_transaltion"  # ุชุฃูƒุฏ ู…ู† ูˆุฌูˆุฏ ุงู„ู…ุณุงุฑ ุงู„ุตุญูŠุญ
model = MBartForConditionalGeneration.from_pretrained(model_path)
tokenizer = MBart50Tokenizer.from_pretrained(model_path)

# Interface setup
st.set_page_config(page_title="Darija to English Translator", page_icon="๐ŸŒ", layout="wide")
#st.image("logo.png", width=200)

st.title("Darija to English Translation App ๐ŸŒ")
st.markdown(
    """
    ### Welcome to the Darija to English Translation App! ๐ŸŒ
    This app uses **mBART** for translating Moroccan Darija to English with advanced NLP capabilities.
    """
)

# Parameters setting sidebar
st.sidebar.header("Model Parameters")
beam_size = st.sidebar.slider("Beam Size", 1, 10, 5)  # Default: 5
max_length = st.sidebar.slider("Max Length", 10, 200, 50)  # Default: 50
src_lang = st.sidebar.selectbox("Source Language", ["ar_AR", "fr_XX"], index=0)
tgt_lang = st.sidebar.selectbox("Target Language", ["en_XX", "es_XX"], index=0)

# Enter and translate text
col1, col2 = st.columns(2)

with col1:
    st.header("Input Text")
    input_text = st.text_area("Darija Text:", "")

with col2:
    st.header("Translation")
    if st.button("Translate"):
        if input_text.strip():
            try:
                # Preparing text for translation
                tokenizer.src_lang = src_lang
                inputs = tokenizer(input_text, return_tensors="pt", padding=True)
                outputs = model.generate(
                    **inputs, max_length=max_length, num_beams=beam_size,
                    forced_bos_token_id=tokenizer.lang_code_to_id[tgt_lang]
                )
                # Show translation
                translation = tokenizer.decode(outputs[0], skip_special_tokens=True)
                st.success("Translation:")
                st.write(translation)
            except Exception as e:
                st.error(f"An error occurred: {e}")
        else:
            st.warning("Please enter text to translate!")

# Button to download translation
if 'translation' in locals():
    st.download_button("Download Translation", data=translation, file_name="translation.txt")