|
|
import streamlit as st |
|
|
from PIL import Image |
|
|
import fitz |
|
|
import numpy as np |
|
|
import tempfile |
|
|
import os |
|
|
import time |
|
|
import io |
|
|
import json |
|
|
import torch |
|
|
import cv2 |
|
|
|
|
|
|
|
|
import ocr_engines |
|
|
|
|
|
|
|
|
llm_available = False |
|
|
try: |
|
|
import llm_processor |
|
|
|
|
|
llm_available = True |
|
|
except ImportError: |
|
|
pass |
|
|
|
|
|
|
|
|
if not os.path.exists("results"): |
|
|
os.makedirs("results") |
|
|
|
|
|
|
|
|
st.title("OCRInsight") |
|
|
|
|
|
|
|
|
st.sidebar.header("Settings") |
|
|
|
|
|
|
|
|
|
|
|
def save_text_to_file(attributes_of_output, all_ocr_text, filename): |
|
|
with open(filename, "a", encoding="utf-8") as f: |
|
|
f.write("\n" + "-" * 75 + "\n") |
|
|
f.write("Attributes of Output:\n") |
|
|
f.write(attributes_of_output) |
|
|
f.write("\nOCR Result:\n") |
|
|
f.write(all_ocr_text) |
|
|
f.write("\n" + "-" * 75 + "\n") |
|
|
st.success(f"{filename} saved successfully!") |
|
|
|
|
|
|
|
|
|
|
|
device = st.sidebar.radio("Select Device", ["CPU", "GPU (CUDA)"]) |
|
|
save_output = st.sidebar.checkbox("Save Outputs") |
|
|
|
|
|
|
|
|
language = st.sidebar.selectbox( |
|
|
"Select Language", ["Türkçe", "English", "Français", "Deutsch", "Español"] |
|
|
) |
|
|
|
|
|
|
|
|
language_codes = { |
|
|
"Türkçe": "tr", |
|
|
"English": "en", |
|
|
"Français": "fr", |
|
|
"Deutsch": "de", |
|
|
"Español": "es", |
|
|
} |
|
|
|
|
|
|
|
|
ocr_models = st.sidebar.multiselect( |
|
|
"Select OCR Models", |
|
|
["EasyOCR", "DocTR", "Tesseract", "PaddleOCR"], |
|
|
["EasyOCR"], |
|
|
) |
|
|
|
|
|
|
|
|
llm_model = st.sidebar.selectbox( |
|
|
"Select LLM Model", ["Only OCR Mode", "llama3.1", "llama3", "gemma2"] |
|
|
) |
|
|
|
|
|
|
|
|
if llm_model != "Only OCR Mode" and llm_available: |
|
|
user_command = st.sidebar.text_input("Enter command:", "") |
|
|
|
|
|
task_type = st.sidebar.radio("Select task type:", ["Summarize", "Generate"]) |
|
|
elif llm_model != "Only OCR Mode" and not llm_available: |
|
|
st.sidebar.warning( |
|
|
"LLM features are not available. Please install 'ollama' to enable LLM processing." |
|
|
) |
|
|
llm_model = "Only OCR Mode" |
|
|
|
|
|
|
|
|
if device == "GPU (CUDA)" and not torch.cuda.is_available(): |
|
|
st.sidebar.warning("GPU (CUDA) not available. Switching to CPU.") |
|
|
device = "CPU" |
|
|
|
|
|
|
|
|
ocr_readers = ocr_engines.initialize_ocr_models( |
|
|
ocr_models, language_codes[language], device |
|
|
) |
|
|
|
|
|
|
|
|
uploaded_file = st.file_uploader( |
|
|
"Upload File (PDF, Image)", type=["pdf", "png", "jpg", "jpeg"] |
|
|
) |
|
|
|
|
|
|
|
|
if not os.path.exists("results"): |
|
|
os.makedirs("results") |
|
|
|
|
|
if uploaded_file is not None: |
|
|
start_time = time.time() |
|
|
|
|
|
if uploaded_file.type == "application/pdf": |
|
|
pdf_document = fitz.open(stream=uploaded_file.read(), filetype="pdf") |
|
|
images = [] |
|
|
for page_num in range(len(pdf_document)): |
|
|
page = pdf_document.load_page(page_num) |
|
|
pix = page.get_pixmap() |
|
|
img_data = pix.tobytes("png") |
|
|
img = Image.open(io.BytesIO(img_data)) |
|
|
images.append(img) |
|
|
total_pages = len(pdf_document) |
|
|
pdf_document.close() |
|
|
else: |
|
|
images = [Image.open(uploaded_file)] |
|
|
total_pages = 1 |
|
|
|
|
|
all_ocr_texts = { |
|
|
model_name: "" for model_name in ocr_models |
|
|
} |
|
|
|
|
|
for page_num, image in enumerate(images, start=1): |
|
|
st.image(image, caption=f"Page {page_num}/{total_pages}", use_column_width=True) |
|
|
|
|
|
|
|
|
for model_name in ocr_models: |
|
|
text = ocr_engines.perform_ocr( |
|
|
model_name, ocr_readers, image, language_codes[language] |
|
|
) |
|
|
all_ocr_texts[ |
|
|
model_name |
|
|
] += f"--- Page {page_num} ({model_name}) ---\n{text}\n\n" |
|
|
|
|
|
st.subheader(f"OCR Result ({model_name}) - Page {page_num}/{total_pages}:") |
|
|
st.text(text) |
|
|
|
|
|
end_time = time.time() |
|
|
process_time = end_time - start_time |
|
|
|
|
|
st.info(f"Processing time: {process_time:.2f} seconds") |
|
|
|
|
|
|
|
|
if save_output: |
|
|
attributes_of_output = { |
|
|
"Model Names": ocr_models, |
|
|
"Language": language, |
|
|
"Device": device, |
|
|
"Process Time": process_time, |
|
|
} |
|
|
for model_name, ocr_text in all_ocr_texts.items(): |
|
|
filename = f"results//ocr_output_{model_name}.txt" |
|
|
save_text_to_file( |
|
|
json.dumps(attributes_of_output, ensure_ascii=False), ocr_text, filename |
|
|
) |
|
|
|
|
|
|
|
|
if ( |
|
|
llm_model != "Only OCR Mode" |
|
|
and llm_available |
|
|
and st.sidebar.button("Start LLM Processing") |
|
|
): |
|
|
st.subheader("LLM Processing Result:") |
|
|
|
|
|
|
|
|
combined_ocr_text = "\n".join(all_ocr_texts.values()) |
|
|
|
|
|
|
|
|
if task_type == "Summarize": |
|
|
prompt = f"Please summarize the following text. Command: {user_command}\n\nText: {combined_ocr_text}" |
|
|
else: |
|
|
prompt = f"Please generate new text based on the following text. Command: {user_command}\n\nText: {combined_ocr_text}" |
|
|
|
|
|
llm_output = llm_processor.process_with_llm(llm_model, prompt) |
|
|
|
|
|
|
|
|
st.write(f"Processing completed using '{llm_model}' model.") |
|
|
st.text_area("LLM Output:", value=llm_output, height=300) |
|
|
|
|
|
|
|
|
if save_output: |
|
|
filename = "llm_output.txt" |
|
|
save_text_to_file(llm_output, "", filename) |
|
|
|
|
|
elif llm_model != "Only OCR Mode" and not llm_available: |
|
|
st.warning( |
|
|
"LLM features are not available. Please install 'ollama' to enable LLM processing." |
|
|
) |
|
|
|
|
|
st.sidebar.info(f"Selected device: {device}") |
|
|
|