Spaces:

Armaggheddon
/

yolo11-document-layout

Sleeping

App Files Files Community

yolo11-document-layout / app.py

Armaggheddon

Initial commit

81fc526 2 months ago

raw

history blame contribute delete

3.94 kB

	from pathlib import Path
	import gradio as gr
	import pymupdf
	from ultralytics import YOLO
	from PIL import Image
	from huggingface_hub import hf_hub_download

	SAMPLES = Path(__file__).parent / "samples"

	IMAGE_SAMPLES = [
	SAMPLES / "image1.png",
	SAMPLES / "image2.png",
	SAMPLES / "image3.png",
	SAMPLES / "image4.png",
	]

	AVAILABLE_MODELS = {
	"yolo11n": ("Armaggheddon/yolo11-document-layout", "yolo11n_doc_layout.pt"),
	"yolo11s": ("Armaggheddon/yolo11-document-layout", "yolo11s_doc_layout.pt"),
	"yolo11m": ("Armaggheddon/yolo11-document-layout", "yolo11m_doc_layout.pt"),
	}
	current_model = "yolo11n"
	model = None

	def load_model(selected_model):
	global model
	if model is None or current_model != selected_model:
	repo_id, filename = AVAILABLE_MODELS[selected_model]
	model_path = hf_hub_download(repo_id=repo_id, filename=filename)
	model = YOLO(model_path)

	def model_runner(image, conf=0.25, iou=0.45):
	result = model.predict(source=image, save=False, verbose=False, conf=conf, iou=iou, imgsz=1280)
	result_img = result[0].plot()
	return result_img

	def process_input(selected_model, pdf_input, image_input, conf=0.25, iou=0.45):
	if pdf_input is None and image_input is None:
	return gr.Error("Please upload a PDF or an image file.")

	load_model(selected_model)
	pages = []
	if pdf_input is not None and pdf_input.endswith(".pdf"):
	doc = pymupdf.open(pdf_input)
	for page in doc:
	pix = page.get_pixmap(dpi=200) # if A4 should result in above 1400px width
	pil_img = pix.pil_image()
	result_img = model_runner(pil_img)
	pages.append(result_img)
	elif image_input is not None and image_input.endswith((".png", ".jpg", ".jpeg")):
	image = image_input
	result_img = model_runner(image)
	pages.append(result_img)

	else:
	return gr.Error("Unsupported file type. Please upload a PDF or an image file with .pdf, .jpg or .jpeg extension.")

	return ((page, f"Page {i+1}") for i, page in enumerate(pages))

	with gr.Blocks() as demo:
	gr.Markdown("# YOLO11 Document Layout 🔎📄")
	gr.Markdown(
	"""
	Detects layout elements in documents (PDFs or images) using YOLOv11 models and the Ultralytics library.
	Upload a PDF or an image, select a model size, and click "Run" to see the detected layout elements.
	- Finetuned models available at [Armaggheddon/yolo11-document-layout](https://huggingface.co/Armaggheddon/yolo11-document-layout)
	- More available in the [GitHub Repository](https://github.com/Armaggheddon/yolo11_doc_layout)
	"""
	)
	with gr.Row():
	with gr.Column():
	pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"], file_count="single")
	image_input = gr.Image(label="Upload Image", type="filepath")
	clear_button = gr.Button("Clear")
	run_button = gr.Button("Run", variant="primary")
	with gr.Column():
	outputs = gr.Gallery(label="Output Image")
	with gr.Group():
	model_name = gr.Dropdown(
	list(AVAILABLE_MODELS.keys()),
	value="yolo11n",
	label="Model size",
	)
	conf = gr.Slider(0, 1, value=0.25, step=0.01, label="Confidence threshold")
	iou = gr.Slider(0, 1, value=0.45, step=0.01, label="IOU threshold")

	examples = gr.Examples(
	examples=[[str(p), "yolo11n"] for p in IMAGE_SAMPLES],
	inputs=[image_input, model_name],
	cache_examples=False,
	fn=process_input,
	outputs=outputs,
	)

	run_button.click(
	fn=process_input,
	inputs=[model_name, pdf_input, image_input, conf, iou],
	outputs=outputs,
	)

	clear_button.click(
	fn=lambda: (None, None, None),
	inputs=[],
	outputs=[pdf_input, image_input, outputs],
	)

	demo.launch()