Spaces:

chuuhtetnaing
/

myanmar-ocr-smol-vlm

Runtime error

File size: 2,301 Bytes

423e2c0

import cv2
import gradio as gr

import boot
from ocr import detection, recognition


boot.warmup_model()


def extract_text_from_image(image):
    """
    Main function that will be called by Gradio.
    This function processes the image and returns both the image and extracted text.
    """
    if image is None:
        return None, "Please upload an image first"

    progress = gr.Progress()

    # Progress callback function for OCR
    def ocr_progress(current, total, message):
        progress(current / total, desc=message)

    detection_result = detection.inference(image, lang="mya", psm=6, min_conf=-1)
    recognition_result = recognition.inference(detection_result, progress_callback=ocr_progress)

    result_img = detection.draw_boxes(image, detection_result)
    image_with_detection_boxes = cv2.cvtColor(result_img, cv2.COLOR_BGR2RGB)

    return image_with_detection_boxes, recognition_result


# Create the Gradio interface
with gr.Blocks(title="Myanmar OCR Demo", theme=gr.themes.Soft()) as demo:
    gr.Markdown("# Myanmar OCR Demo Application")

    with gr.Row():
        # Left column - Input
        with gr.Column(scale=1):
            gr.Markdown("## Input")
            input_image = gr.Image(label="Upload Image", type="numpy", height=400)
            process_btn = gr.Button("Extract Text", variant="primary", size="lg")

        # Right column - Output
        with gr.Column(scale=1):
            gr.Markdown("## Output")

            # Top part - Show input image
            with gr.Group():
                gr.Markdown("### Text Detection (Tesseract)")
                output_image = gr.Image(label="Detection Result", height=600, interactive=False)

            # Bottom part - Show extracted text
            with gr.Group():
                gr.Markdown("### Text Recognition (SMOL-VLM)")
                output_text = gr.Textbox(
                    label="OCR Results",
                    lines=10,
                    max_lines=15,
                    interactive=False,
                    placeholder="Extracted text will appear here...",
                )

    # Connect the button to the function
    process_btn.click(fn=extract_text_from_image, inputs=[input_image], outputs=[output_image, output_text])


if __name__ == "__main__":
    demo.launch()