Spaces:
Sleeping
Sleeping
File size: 3,185 Bytes
aa45f22 2a9bd6e aa45f22 2a9bd6e aa45f22 ad4078b aa45f22 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
import streamlit as st
import asyncio
import re
import os
import subprocess
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
try:
import easyocr
except ImportError:
subprocess.run(["pip", "install", "easyocr", "torch", "transformers", "sentencepiece"])
import easyocr # Try importing again after installing
asyncio.set_event_loop(asyncio.new_event_loop())
# teksto atpazinimo is vaizdo modelio ikrovimas
reader = easyocr.Reader(['lt'])
# vertimui reikalingi moduliai
translator_model = "Helsinki-NLP/opus-mt-tc-big-lt-en" # lt to en
translator_model_back = "Helsinki-NLP/opus-mt-tc-big-en-lt" # en to lt
translator_lt_en = pipeline("translation", model=translator_model)
translator_en_lt = pipeline("translation", model=translator_model_back)
# modelis naudojamas santrumpai sukurti
summarizer_model_name = "facebook/bart-large-cnn"
try:
summarizer = pipeline("summarization", model=summarizer_model_name)
except Exception as e:
st.error(f"Klaida su santraukos modeliu: {e}")
st.stop()
# Streamlit UI Setup
st.title("Lietuviško teksto iš nuotraukos santraukos sukūrimas naudojant DI (verčiant)")
st.write("Įkelkite nuotrauką su tekstu:")
# Upload image file
uploaded_file = st.file_uploader("Įkelkite nuotrauką...", type=["png", "jpg", "jpeg"])
def preprocess_text(text):
text = text.replace("-\n", "").replace("- ", "")
text = re.sub(r"[^a-zA-ZąčęėįšųūžĄČĘĖĮŠŲŪŽ0-9\s\.,;:]", "", text)
return text
if uploaded_file:
st.image(uploaded_file, caption="Įkelta nuotrauka", use_container_width=True)
with st.spinner("Gaunamas tekstas..."):
extracted_text = reader.readtext(uploaded_file.read(), detail=0)
extracted_text = " ".join(extracted_text)
if extracted_text:
st.subheader("Gautas tekstas:")
st.write(extracted_text)
# Preprocess the extracted text
processed_text = preprocess_text(extracted_text)
st.subheader("Sutvarkytas tekstas:")
st.write(processed_text)
# Translate Lithuanian text → English
with st.spinner("Tekstas verčiamas..."):
translated_text = translator_lt_en(processed_text)[0]['translation_text']
st.subheader("Tekstas, išverstas į anglų kalbą:")
st.write(translated_text)
# Generate Summary
with st.spinner("Gaunama santrauka..."):
try:
summary_output = summarizer(translated_text, max_length=100, min_length=30, do_sample=False)
summary_english = summary_output[0]['summary_text']
st.subheader("Santrauka anglų kalba:")
st.write(summary_english)
# Translate Summary Back to Lithuanian
with st.spinner("Verčiama santrauka..."):
summary_lithuanian = translator_en_lt(summary_english)[0]['translation_text']
st.subheader("Santrauka Lietuviškai:")
st.write(summary_lithuanian)
except Exception as e:
st.error(f"Klaida gaunant santrauką: {e}")
else:
st.warning("Nerasta jokio teksto, pabandykite iš naujo.")
|