"""OCR engine initializers and runners with safer Tesseract handling.""" import os import sys import tempfile import numpy as np try: import easyocr except Exception: easyocr = None try: from doctr.io import DocumentFile from doctr.models import ocr_predictor except Exception: DocumentFile = None ocr_predictor = None try: from paddleocr import PaddleOCR except Exception: PaddleOCR = None try: import pytesseract except Exception: pytesseract = None try: import cv2 except Exception: cv2 = None def initialize_ocr_models(ocr_models, language_code, device): ocr_readers = {} if "EasyOCR" in ocr_models and easyocr is not None: ocr_readers["EasyOCR"] = easyocr.Reader( [language_code], gpu=(device == "GPU (CUDA)") ) if "DocTR" in ocr_models and ocr_predictor is not None: ocr_readers["DocTR"] = ocr_predictor(pretrained=True) if "PaddleOCR" in ocr_models and PaddleOCR is not None: use_gpu = True if device == "GPU (CUDA)" else False ocr_readers["PaddleOCR"] = PaddleOCR(lang=language_code, use_gpu=use_gpu) # Tesseract: only set executable path for known Windows locations; on Unix, assume tesseract is on PATH if "Tesseract" in ocr_models and pytesseract is not None: if sys.platform.startswith("win"): # common Windows installation path pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe" else: # check common unix paths and set if tesseract binary exists there for p in ("/usr/bin/tesseract", "/usr/local/bin/tesseract"): if os.path.exists(p): pytesseract.pytesseract.tesseract_cmd = p break return ocr_readers def perform_ocr(model_name, ocr_readers, image, language_code): text = "" if model_name == "EasyOCR": reader = ocr_readers.get("EasyOCR") if reader is None: return "[EasyOCR not available]" result = reader.readtext(np.array(image)) text = "\n".join([res[1] for res in result]) elif model_name == "DocTR": predictor = ocr_readers.get("DocTR") if predictor is None or DocumentFile is None: return "[DocTR not available]" with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp_file: image.save(tmp_file, format="PNG") file_path = tmp_file.name doc = DocumentFile.from_images(file_path) result = predictor(doc) # Safely iterate pages/blocks pages = [] for page in result.pages: page_text_blocks = [] for block in page.blocks: lines = [" ".join([word.value for word in line.words]) for line in block.lines] page_text_blocks.append("\n".join(lines)) pages.append("\n\n".join(page_text_blocks)) text = "\n\n".join(pages) try: os.unlink(file_path) except Exception: pass elif model_name == "PaddleOCR": reader = ocr_readers.get("PaddleOCR") if reader is None: return "[PaddleOCR not available]" result = reader.ocr(np.array(image)) # result may be empty or structured per line try: text = "\n".join([line[1][0] for line in result[0]]) except Exception: # fallback: join any text tokens found tokens = [] for page in result: for line in page: if len(line) > 1 and isinstance(line[1], (list, tuple)): tokens.append(line[1][0]) text = "\n".join(tokens) elif model_name == "Tesseract": if pytesseract is None: return "[pytesseract not available]" # Convert PIL image to RGB if not already try: if image.mode != "RGB": image = image.convert("RGB") except Exception: pass # Convert image to OpenCV format if cv2 is available if cv2 is not None: opencv_image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR) else: # fallback: use raw numpy array opencv_image = np.array(image) config = f"--oem 3 --psm 6 -l {language_code}" try: text = pytesseract.image_to_string(opencv_image) # , config=config except Exception as e: text = f"[Tesseract error: {e}]" return text