Spaces:
Sleeping
Sleeping
| from pathlib import Path | |
| import gradio as gr | |
| import pymupdf | |
| from ultralytics import YOLO | |
| from PIL import Image | |
| from huggingface_hub import hf_hub_download | |
| SAMPLES = Path(__file__).parent / "samples" | |
| IMAGE_SAMPLES = [ | |
| SAMPLES / "image1.png", | |
| SAMPLES / "image2.png", | |
| SAMPLES / "image3.png", | |
| SAMPLES / "image4.png", | |
| ] | |
| AVAILABLE_MODELS = { | |
| "yolo11n": ("Armaggheddon/yolo11-document-layout", "yolo11n_doc_layout.pt"), | |
| "yolo11s": ("Armaggheddon/yolo11-document-layout", "yolo11s_doc_layout.pt"), | |
| "yolo11m": ("Armaggheddon/yolo11-document-layout", "yolo11m_doc_layout.pt"), | |
| } | |
| current_model = "yolo11n" | |
| model = None | |
| def load_model(selected_model): | |
| global model | |
| if model is None or current_model != selected_model: | |
| repo_id, filename = AVAILABLE_MODELS[selected_model] | |
| model_path = hf_hub_download(repo_id=repo_id, filename=filename) | |
| model = YOLO(model_path) | |
| def model_runner(image, conf=0.25, iou=0.45): | |
| result = model.predict(source=image, save=False, verbose=False, conf=conf, iou=iou, imgsz=1280) | |
| result_img = result[0].plot() | |
| return result_img | |
| def process_input(selected_model, pdf_input, image_input, conf=0.25, iou=0.45): | |
| if pdf_input is None and image_input is None: | |
| return gr.Error("Please upload a PDF or an image file.") | |
| load_model(selected_model) | |
| pages = [] | |
| if pdf_input is not None and pdf_input.endswith(".pdf"): | |
| doc = pymupdf.open(pdf_input) | |
| for page in doc: | |
| pix = page.get_pixmap(dpi=200) # if A4 should result in above 1400px width | |
| pil_img = pix.pil_image() | |
| result_img = model_runner(pil_img) | |
| pages.append(result_img) | |
| elif image_input is not None and image_input.endswith((".png", ".jpg", ".jpeg")): | |
| image = image_input | |
| result_img = model_runner(image) | |
| pages.append(result_img) | |
| else: | |
| return gr.Error("Unsupported file type. Please upload a PDF or an image file with .pdf, .jpg or .jpeg extension.") | |
| return ((page, f"Page {i+1}") for i, page in enumerate(pages)) | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# YOLO11 Document Layout ππ") | |
| gr.Markdown( | |
| """ | |
| Detects layout elements in documents (PDFs or images) using YOLOv11 models and the Ultralytics library. | |
| Upload a PDF or an image, select a model size, and click "Run" to see the detected layout elements. | |
| - Finetuned models available at [Armaggheddon/yolo11-document-layout](https://huggingface.co/Armaggheddon/yolo11-document-layout) | |
| - More available in the [GitHub Repository](https://github.com/Armaggheddon/yolo11_doc_layout) | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"], file_count="single") | |
| image_input = gr.Image(label="Upload Image", type="filepath") | |
| clear_button = gr.Button("Clear") | |
| run_button = gr.Button("Run", variant="primary") | |
| with gr.Column(): | |
| outputs = gr.Gallery(label="Output Image") | |
| with gr.Group(): | |
| model_name = gr.Dropdown( | |
| list(AVAILABLE_MODELS.keys()), | |
| value="yolo11n", | |
| label="Model size", | |
| ) | |
| conf = gr.Slider(0, 1, value=0.25, step=0.01, label="Confidence threshold") | |
| iou = gr.Slider(0, 1, value=0.45, step=0.01, label="IOU threshold") | |
| examples = gr.Examples( | |
| examples=[[str(p), "yolo11n"] for p in IMAGE_SAMPLES], | |
| inputs=[image_input, model_name], | |
| cache_examples=False, | |
| fn=process_input, | |
| outputs=outputs, | |
| ) | |
| run_button.click( | |
| fn=process_input, | |
| inputs=[model_name, pdf_input, image_input, conf, iou], | |
| outputs=outputs, | |
| ) | |
| clear_button.click( | |
| fn=lambda: (None, None, None), | |
| inputs=[], | |
| outputs=[pdf_input, image_input, outputs], | |
| ) | |
| demo.launch() | |