File size: 4,538 Bytes
0f922c9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
"""OCR engine initializers and runners with safer Tesseract handling."""

import os
import sys
import tempfile
import numpy as np

try:
    import easyocr
except Exception:
    easyocr = None

try:
    from doctr.io import DocumentFile
    from doctr.models import ocr_predictor
except Exception:
    DocumentFile = None
    ocr_predictor = None

try:
    from paddleocr import PaddleOCR
except Exception:
    PaddleOCR = None

try:
    import pytesseract
except Exception:
    pytesseract = None

try:
    import cv2
except Exception:
    cv2 = None


def initialize_ocr_models(ocr_models, language_code, device):
    ocr_readers = {}

    if "EasyOCR" in ocr_models and easyocr is not None:
        ocr_readers["EasyOCR"] = easyocr.Reader(
            [language_code], gpu=(device == "GPU (CUDA)")
        )

    if "DocTR" in ocr_models and ocr_predictor is not None:
        ocr_readers["DocTR"] = ocr_predictor(pretrained=True)

    if "PaddleOCR" in ocr_models and PaddleOCR is not None:
        use_gpu = True if device == "GPU (CUDA)" else False
        ocr_readers["PaddleOCR"] = PaddleOCR(lang=language_code, use_gpu=use_gpu)

    # Tesseract: only set executable path for known Windows locations; on Unix, assume tesseract is on PATH
    if "Tesseract" in ocr_models and pytesseract is not None:
        if sys.platform.startswith("win"):
            # common Windows installation path
            pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
        else:
            # check common unix paths and set if tesseract binary exists there
            for p in ("/usr/bin/tesseract", "/usr/local/bin/tesseract"):
                if os.path.exists(p):
                    pytesseract.pytesseract.tesseract_cmd = p
                    break

    return ocr_readers


def perform_ocr(model_name, ocr_readers, image, language_code):
    text = ""

    if model_name == "EasyOCR":
        reader = ocr_readers.get("EasyOCR")
        if reader is None:
            return "[EasyOCR not available]"
        result = reader.readtext(np.array(image))
        text = "\n".join([res[1] for res in result])

    elif model_name == "DocTR":
        predictor = ocr_readers.get("DocTR")
        if predictor is None or DocumentFile is None:
            return "[DocTR not available]"
        with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp_file:
            image.save(tmp_file, format="PNG")
        file_path = tmp_file.name
        doc = DocumentFile.from_images(file_path)
        result = predictor(doc)
        # Safely iterate pages/blocks
        pages = []
        for page in result.pages:
            page_text_blocks = []
            for block in page.blocks:
                lines = [" ".join([word.value for word in line.words]) for line in block.lines]
                page_text_blocks.append("\n".join(lines))
            pages.append("\n\n".join(page_text_blocks))
        text = "\n\n".join(pages)
        try:
            os.unlink(file_path)
        except Exception:
            pass

    elif model_name == "PaddleOCR":
        reader = ocr_readers.get("PaddleOCR")
        if reader is None:
            return "[PaddleOCR not available]"
        result = reader.ocr(np.array(image))
        # result may be empty or structured per line
        try:
            text = "\n".join([line[1][0] for line in result[0]])
        except Exception:
            # fallback: join any text tokens found
            tokens = []
            for page in result:
                for line in page:
                    if len(line) > 1 and isinstance(line[1], (list, tuple)):
                        tokens.append(line[1][0])
            text = "\n".join(tokens)

    elif model_name == "Tesseract":
        if pytesseract is None:
            return "[pytesseract not available]"
        # Convert PIL image to RGB if not already
        try:
            if image.mode != "RGB":
                image = image.convert("RGB")
        except Exception:
            pass
        # Convert image to OpenCV format if cv2 is available
        if cv2 is not None:
            opencv_image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
        else:
            # fallback: use raw numpy array
            opencv_image = np.array(image)
        config = f"--oem 3 --psm 6 -l {language_code}"
        try:
            text = pytesseract.image_to_string(opencv_image)  # , config=config
        except Exception as e:
            text = f"[Tesseract error: {e}]"

    return text