File size: 3,185 Bytes
aa45f22
 
 
 
2a9bd6e
aa45f22
2a9bd6e
 
 
 
 
aa45f22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ad4078b
aa45f22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import streamlit as st
import asyncio
import re
import os
import subprocess
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
try:
    import easyocr
except ImportError:
    subprocess.run(["pip", "install", "easyocr", "torch", "transformers", "sentencepiece"])
    import easyocr  # Try importing again after installing

asyncio.set_event_loop(asyncio.new_event_loop())

# teksto atpazinimo is vaizdo modelio ikrovimas

reader = easyocr.Reader(['lt'])


# vertimui reikalingi moduliai
translator_model = "Helsinki-NLP/opus-mt-tc-big-lt-en"  # lt to en
translator_model_back = "Helsinki-NLP/opus-mt-tc-big-en-lt"  # en to lt
translator_lt_en = pipeline("translation", model=translator_model)
translator_en_lt = pipeline("translation", model=translator_model_back)

# modelis naudojamas santrumpai sukurti
summarizer_model_name = "facebook/bart-large-cnn" 
try:
    summarizer = pipeline("summarization", model=summarizer_model_name)
except Exception as e:
    st.error(f"Klaida su santraukos modeliu: {e}")
    st.stop()

# Streamlit UI Setup
st.title("Lietuviško teksto iš nuotraukos santraukos sukūrimas naudojant DI (verčiant)")
st.write("Įkelkite nuotrauką su tekstu:")

# Upload image file
uploaded_file = st.file_uploader("Įkelkite nuotrauką...", type=["png", "jpg", "jpeg"])

def preprocess_text(text):
    text = text.replace("-\n", "").replace("- ", "")  
    text = re.sub(r"[^a-zA-ZąčęėįšųūžĄČĘĖĮŠŲŪŽ0-9\s\.,;:]", "", text)  
    return text

if uploaded_file:
    st.image(uploaded_file, caption="Įkelta nuotrauka", use_container_width=True)

    with st.spinner("Gaunamas tekstas..."):
        extracted_text = reader.readtext(uploaded_file.read(), detail=0)
        extracted_text = " ".join(extracted_text)

    if extracted_text:
        st.subheader("Gautas tekstas:")
        st.write(extracted_text)

        # Preprocess the extracted text
        processed_text = preprocess_text(extracted_text)

        st.subheader("Sutvarkytas tekstas:")
        st.write(processed_text)

        # Translate Lithuanian text → English
        with st.spinner("Tekstas verčiamas..."):
            translated_text = translator_lt_en(processed_text)[0]['translation_text']

        st.subheader("Tekstas, išverstas į anglų kalbą:")
        st.write(translated_text)

        # Generate Summary
        with st.spinner("Gaunama santrauka..."):
            try:
                summary_output = summarizer(translated_text, max_length=100, min_length=30, do_sample=False)
                summary_english = summary_output[0]['summary_text']

                st.subheader("Santrauka anglų kalba:")
                st.write(summary_english)

                # Translate Summary Back to Lithuanian
                with st.spinner("Verčiama santrauka..."):
                    summary_lithuanian = translator_en_lt(summary_english)[0]['translation_text']

                st.subheader("Santrauka Lietuviškai:")
                st.write(summary_lithuanian)

            except Exception as e:
                st.error(f"Klaida gaunant santrauką: {e}")
    else:
        st.warning("Nerasta jokio teksto, pabandykite iš naujo.")