Vr2123156 commited on
Commit
aa45f22
·
verified ·
1 Parent(s): 015297e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -0
app.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import easyocr
3
+ import asyncio
4
+ import re
5
+ import os
6
+ from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
7
+
8
+ asyncio.set_event_loop(asyncio.new_event_loop())
9
+
10
+ # teksto atpazinimo is vaizdo modelio ikrovimas
11
+
12
+ reader = easyocr.Reader(['lt'])
13
+
14
+
15
+ # vertimui reikalingi moduliai
16
+ translator_model = "Helsinki-NLP/opus-mt-tc-big-lt-en" # lt to en
17
+ translator_model_back = "Helsinki-NLP/opus-mt-tc-big-en-lt" # en to lt
18
+ translator_lt_en = pipeline("translation", model=translator_model)
19
+ translator_en_lt = pipeline("translation", model=translator_model_back)
20
+
21
+ # modelis naudojamas santrumpai sukurti
22
+ summarizer_model_name = "facebook/bart-large-cnn"
23
+ try:
24
+ summarizer = pipeline("summarization", model=summarizer_model_name)
25
+ except Exception as e:
26
+ st.error(f"Klaida su santraukos modeliu: {e}")
27
+ st.stop()
28
+
29
+ # Streamlit UI Setup
30
+ st.title("Lietuviško teksto iš nuotraukos santraukos sukūrimas naudojant DI")
31
+ st.write("Įkelkite nuotrauką su tekstu:")
32
+
33
+ # Upload image file
34
+ uploaded_file = st.file_uploader("Įkelkite nuotrauką...", type=["png", "jpg", "jpeg"])
35
+
36
+ def preprocess_text(text):
37
+ text = text.replace("-\n", "").replace("- ", "")
38
+ text = re.sub(r"[^a-zA-ZąčęėįšųūžĄČĘĖĮŠŲŪŽ0-9\s\.,;:]", "", text)
39
+ return text
40
+
41
+ if uploaded_file:
42
+ st.image(uploaded_file, caption="Įkelta nuotrauka", use_container_width=True)
43
+
44
+ with st.spinner("Gaunamas tekstas..."):
45
+ extracted_text = reader.readtext(uploaded_file.read(), detail=0)
46
+ extracted_text = " ".join(extracted_text)
47
+
48
+ if extracted_text:
49
+ st.subheader("Gautas tekstas:")
50
+ st.write(extracted_text)
51
+
52
+ # Preprocess the extracted text
53
+ processed_text = preprocess_text(extracted_text)
54
+
55
+ st.subheader("Sutvarkytas tekstas:")
56
+ st.write(processed_text)
57
+
58
+ # Translate Lithuanian text → English
59
+ with st.spinner("Tekstas verčiamas..."):
60
+ translated_text = translator_lt_en(processed_text)[0]['translation_text']
61
+
62
+ st.subheader("Tekstas, išverstas į anglų kalbą:")
63
+ st.write(translated_text)
64
+
65
+ # Generate Summary
66
+ with st.spinner("Gaunama santrauka..."):
67
+ try:
68
+ summary_output = summarizer(translated_text, max_length=100, min_length=30, do_sample=False)
69
+ summary_english = summary_output[0]['summary_text']
70
+
71
+ st.subheader("Santrauka anglų kalba:")
72
+ st.write(summary_english)
73
+
74
+ # Translate Summary Back to Lithuanian
75
+ with st.spinner("Verčiama santrauka..."):
76
+ summary_lithuanian = translator_en_lt(summary_english)[0]['translation_text']
77
+
78
+ st.subheader("Santrauka Lietuviškai:")
79
+ st.write(summary_lithuanian)
80
+
81
+ except Exception as e:
82
+ st.error(f"Klaida gaunant santrauką: {e}")
83
+ else:
84
+ st.warning("Nerasta jokio teksto, pabandykite iš naujo.")