File size: 3,944 Bytes
81fc526 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
from pathlib import Path
import gradio as gr
import pymupdf
from ultralytics import YOLO
from PIL import Image
from huggingface_hub import hf_hub_download
SAMPLES = Path(__file__).parent / "samples"
IMAGE_SAMPLES = [
SAMPLES / "image1.png",
SAMPLES / "image2.png",
SAMPLES / "image3.png",
SAMPLES / "image4.png",
]
AVAILABLE_MODELS = {
"yolo11n": ("Armaggheddon/yolo11-document-layout", "yolo11n_doc_layout.pt"),
"yolo11s": ("Armaggheddon/yolo11-document-layout", "yolo11s_doc_layout.pt"),
"yolo11m": ("Armaggheddon/yolo11-document-layout", "yolo11m_doc_layout.pt"),
}
current_model = "yolo11n"
model = None
def load_model(selected_model):
global model
if model is None or current_model != selected_model:
repo_id, filename = AVAILABLE_MODELS[selected_model]
model_path = hf_hub_download(repo_id=repo_id, filename=filename)
model = YOLO(model_path)
def model_runner(image, conf=0.25, iou=0.45):
result = model.predict(source=image, save=False, verbose=False, conf=conf, iou=iou, imgsz=1280)
result_img = result[0].plot()
return result_img
def process_input(selected_model, pdf_input, image_input, conf=0.25, iou=0.45):
if pdf_input is None and image_input is None:
return gr.Error("Please upload a PDF or an image file.")
load_model(selected_model)
pages = []
if pdf_input is not None and pdf_input.endswith(".pdf"):
doc = pymupdf.open(pdf_input)
for page in doc:
pix = page.get_pixmap(dpi=200) # if A4 should result in above 1400px width
pil_img = pix.pil_image()
result_img = model_runner(pil_img)
pages.append(result_img)
elif image_input is not None and image_input.endswith((".png", ".jpg", ".jpeg")):
image = image_input
result_img = model_runner(image)
pages.append(result_img)
else:
return gr.Error("Unsupported file type. Please upload a PDF or an image file with .pdf, .jpg or .jpeg extension.")
return ((page, f"Page {i+1}") for i, page in enumerate(pages))
with gr.Blocks() as demo:
gr.Markdown("# YOLO11 Document Layout ππ")
gr.Markdown(
"""
Detects layout elements in documents (PDFs or images) using YOLOv11 models and the Ultralytics library.
Upload a PDF or an image, select a model size, and click "Run" to see the detected layout elements.
- Finetuned models available at [Armaggheddon/yolo11-document-layout](https://huggingface.co/Armaggheddon/yolo11-document-layout)
- More available in the [GitHub Repository](https://github.com/Armaggheddon/yolo11_doc_layout)
"""
)
with gr.Row():
with gr.Column():
pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"], file_count="single")
image_input = gr.Image(label="Upload Image", type="filepath")
clear_button = gr.Button("Clear")
run_button = gr.Button("Run", variant="primary")
with gr.Column():
outputs = gr.Gallery(label="Output Image")
with gr.Group():
model_name = gr.Dropdown(
list(AVAILABLE_MODELS.keys()),
value="yolo11n",
label="Model size",
)
conf = gr.Slider(0, 1, value=0.25, step=0.01, label="Confidence threshold")
iou = gr.Slider(0, 1, value=0.45, step=0.01, label="IOU threshold")
examples = gr.Examples(
examples=[[str(p), "yolo11n"] for p in IMAGE_SAMPLES],
inputs=[image_input, model_name],
cache_examples=False,
fn=process_input,
outputs=outputs,
)
run_button.click(
fn=process_input,
inputs=[model_name, pdf_input, image_input, conf, iou],
outputs=outputs,
)
clear_button.click(
fn=lambda: (None, None, None),
inputs=[],
outputs=[pdf_input, image_input, outputs],
)
demo.launch()
|