Spaces:

chuuhtetnaing
/

myanmar-ocr-smol-vlm

Runtime error

App Files Files Community

myanmar-ocr-smol-vlm / app.py

chuuhtetnaing

update dockerfile for containerization on hf

67dfedf 4 months ago

raw

history blame contribute delete

2.3 kB

	import cv2
	import gradio as gr

	import boot
	from ocr import detection, recognition


	boot.warmup_model()


	def extract_text_from_image(image):
	"""
	Main function that will be called by Gradio.
	This function processes the image and returns both the image and extracted text.
	"""
	if image is None:
	return None, "Please upload an image first"

	progress = gr.Progress()

	# Progress callback function for OCR
	def ocr_progress(current, total, message):
	progress(current / total, desc=message)

	detection_result = detection.inference(image, lang="mya", psm=6, min_conf=-1)
	recognition_result = recognition.inference(detection_result, progress_callback=ocr_progress)

	result_img = detection.draw_boxes(image, detection_result)
	image_with_detection_boxes = cv2.cvtColor(result_img, cv2.COLOR_BGR2RGB)

	return image_with_detection_boxes, recognition_result


	# Create the Gradio interface
	with gr.Blocks(title="Myanmar OCR Demo", theme=gr.themes.Soft()) as demo:
	gr.Markdown("# Myanmar OCR Demo Application")

	with gr.Row():
	# Left column - Input
	with gr.Column(scale=1):
	gr.Markdown("## Input")
	input_image = gr.Image(label="Upload Image", type="numpy", height=400)
	process_btn = gr.Button("Extract Text", variant="primary", size="lg")

	# Right column - Output
	with gr.Column(scale=1):
	gr.Markdown("## Output")

	# Top part - Show input image
	with gr.Group():
	gr.Markdown("### Text Detection (Tesseract)")
	output_image = gr.Image(label="Detection Result", height=600, interactive=False)

	# Bottom part - Show extracted text
	with gr.Group():
	gr.Markdown("### Text Recognition (SMOL-VLM)")
	output_text = gr.Textbox(
	label="OCR Results",
	lines=10,
	max_lines=15,
	interactive=False,
	placeholder="Extracted text will appear here...",
	)

	# Connect the button to the function
	process_btn.click(fn=extract_text_from_image, inputs=[input_image], outputs=[output_image, output_text])


	if __name__ == "__main__":
	demo.launch()