import os, time, json import numpy as np import gradio as gr import torch from PIL import Image, ImageDraw, ImageFont import cv2 from transformers import AutoImageProcessor, RTDetrForObjectDetection from ultralytics import YOLO from huggingface_hub import hf_hub_download DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Globale Modelle rtdetr_r50_model = None rtdetr_r50_processor = None yolo_o365_model = None def load_models(): global rtdetr_r50_model, rtdetr_r50_processor, yolo_o365_model print("Lade RT-DETR R50 (COCO 80 Klassen)...") model_id_r50 = "PekingU/rtdetr_r50vd_coco_o365" try: rtdetr_r50_model = RTDetrForObjectDetection.from_pretrained(model_id_r50).to(DEVICE) rtdetr_r50_processor = AutoImageProcessor.from_pretrained(model_id_r50) print(f"RT-DETR R50 geladen - kennt {len(rtdetr_r50_model.config.id2label)} Klassen") except Exception as e: return f"Fehler beim Laden von RT-DETR R50: {str(e)}" print("Lade YOLO11n (Objects365 - 365 Klassen)...") try: weights_path = hf_hub_download("NRtred/yolo11n_object365", "yolo11n_object365.pt") yolo_o365_model = YOLO(weights_path) print(f"YOLO11n geladen - kennt {len(yolo_o365_model.names)} Klassen") # Debug: Zeige einige YOLO Labels yolo_labels = list(yolo_o365_model.names.values())[:30] print(f"Erste 30 YOLO Labels: {yolo_labels}") # Suche nach Badezimmer-relevanten Labels in YOLO bad_labels_yolo = [l for l in yolo_o365_model.names.values() if any(word in l.lower() for word in ['toilet', 'sink', 'faucet', 'mirror', 'towel', 'bath', 'shower'])] print(f"YOLO Badezimmer-Labels: {bad_labels_yolo}") except Exception as e: return f"Fehler beim Laden von YOLO11n: {str(e)}" return f"Beide Modelle geladen! RT-DETR R50 (80 COCO) und YOLO11n (365 Objects365)" def detect_with_rtdetr(image: Image.Image, model, processor, confidence_threshold=0.25): start = time.time() inputs = processor(images=image, return_tensors="pt").to(DEVICE) with torch.no_grad(): outputs = model(**inputs) target_sizes = torch.tensor([image.size[::-1]], device=DEVICE) results = processor.post_process_object_detection( outputs, target_sizes=target_sizes, threshold=float(confidence_threshold) )[0] detections = [] annotated = image.copy() draw = ImageDraw.Draw(annotated) id2label = model.config.id2label if hasattr(model.config, 'id2label') else {} box_color = "red" for score, label, box in zip(results["scores"], results["labels"], results["boxes"]): x1, y1, x2, y2 = [float(x) for x in box.tolist()] confv = float(score.item()) label_id = int(label.item()) label_text = id2label.get(label_id, f"class_{label_id}") detections.append({ "label": label_text, "confidence": round(confv, 3), "bbox": [int(x1), int(y1), int(x2), int(y2)] }) draw.rectangle([x1, y1, x2, y2], outline=box_color, width=3) draw.text((x1, max(0, y1 - 14)), f"{label_text}: {confv:.2f}", fill=box_color) dur = time.time() - start return annotated, detections, dur def detect_with_yolo(image: Image.Image, model, confidence_threshold=0.25): start = time.time() # YOLO inference results = model(image, conf=confidence_threshold, device=DEVICE.type) detections = [] annotated = image.copy() draw = ImageDraw.Draw(annotated) box_color = "blue" for r in results: boxes = r.boxes if boxes is not None: for box in boxes: x1, y1, x2, y2 = box.xyxy[0].tolist() conf = float(box.conf[0]) cls = int(box.cls[0]) label = model.names[cls] detections.append({ "label": label, "confidence": round(conf, 3), "bbox": [int(x1), int(y1), int(x2), int(y2)] }) draw.rectangle([x1, y1, x2, y2], outline=box_color, width=3) draw.text((x1, max(0, y1 - 14)), f"{label}: {conf:.2f}", fill=box_color) dur = time.time() - start return annotated, detections, dur def compare_models(image: Image.Image, confidence_threshold: float): if image is None: return None, None, "Bitte lade ein Bild hoch." # RT-DETR R50 Detection (80 COCO Klassen) rtdetr_img, rtdetr_det, rtdetr_t = detect_with_rtdetr( image, rtdetr_r50_model, rtdetr_r50_processor, confidence_threshold ) # YOLO11n Detection (365 Objects365 Klassen) yolo_img, yolo_det, yolo_t = detect_with_yolo( image, yolo_o365_model, confidence_threshold ) # Labels extrahieren und zählen rtdetr_objects = {} for d in rtdetr_det: label = d["label"] if label not in rtdetr_objects: rtdetr_objects[label] = 0 rtdetr_objects[label] += 1 yolo_objects = {} for d in yolo_det: label = d["label"] if label not in yolo_objects: yolo_objects[label] = 0 yolo_objects[label] += 1 # Alle einzigartigen Labels all_labels = set(list(rtdetr_objects.keys()) + list(yolo_objects.keys())) # Nur in einem Modell gefunden nur_rtdetr = set(rtdetr_objects.keys()) - set(yolo_objects.keys()) nur_yolo = set(yolo_objects.keys()) - set(rtdetr_objects.keys()) beide = set(rtdetr_objects.keys()) & set(yolo_objects.keys()) # Vergleichstabelle comparison_table = "| Objekt | RT-DETR (80) | YOLO (365) | Anmerkung |\n" comparison_table += "|--------|--------------|------------|----------|\n" for label in sorted(all_labels): rtdetr_count = rtdetr_objects.get(label, 0) yolo_count = yolo_objects.get(label, 0) note = "" if label in nur_rtdetr: note = "⚠️ Nur COCO" elif label in nur_yolo: note = "✨ O365 Extra" comparison_table += f"| {label} | {rtdetr_count} | {yolo_count} | {note} |\n" # Vollständige JSON Ausgabe full_json = json.dumps({ "rtdetr_r50": { "total_objects": len(rtdetr_det), "inference_time_ms": round(rtdetr_t * 1000, 1), "unique_finds": list(nur_rtdetr), "available_classes": 80, "all_detections": rtdetr_det }, "yolo11n_o365": { "total_objects": len(yolo_det), "inference_time_ms": round(yolo_t * 1000, 1), "unique_finds": list(nur_yolo), "available_classes": 365, "all_detections": yolo_det }, "gemeinsam": { "beide_gefunden": list(beide), "anzahl_uebereinstimmungen": len(beide) } }, ensure_ascii=False, indent=2) # Markdown-String md = "## Modellvergleich: RT-DETR R50 (COCO) vs YOLO11n (Objects365)\n\n" md += "### Klassen-Unterschied\n" md += "- **RT-DETR R50:** 80 COCO Klassen (Standard-Objekte)\n" md += "- **YOLO11n:** 365 Objects365 Klassen (detaillierte Objekterkennung)\n\n" md += "### Zusammenfassung\n" md += f"- **RT-DETR R50:** {len(rtdetr_det)} Objekte in {rtdetr_t*1000:.1f}ms\n" md += f"- **YOLO11n O365:** {len(yolo_det)} Objekte in {yolo_t*1000:.1f}ms\n" md += f"- **Geschwindigkeitsfaktor:** {'RT-DETR' if rtdetr_t < yolo_t else 'YOLO'} ist {max(rtdetr_t, yolo_t)/min(rtdetr_t, yolo_t):.2f}x schneller\n\n" md += "### Exklusive Funde\n" md += f"- **Nur RT-DETR (COCO):** {', '.join(nur_rtdetr) if nur_rtdetr else 'Keine'}\n" md += f"- **Nur YOLO (O365 Extra):** {', '.join(nur_yolo) if nur_yolo else 'Keine'}\n" md += f"- **Beide gefunden:** {len(beide)} gemeinsame Objekttypen\n\n" md += "### Detaillierter Vergleich\n" md += comparison_table + "\n" md += "### Modell-Eigenschaften\n" md += "- **RT-DETR:** Transformer-basiert, End-to-End Detection, COCO-fokussiert\n" md += "- **YOLO11n:** CNN-basiert, ultraschnell, 365 detaillierte Objektklassen\n\n" md += "### Alle Erkennungen (JSON)\n" md += "
\n" md += "Klick für vollständige Daten\n\n" md += "```json\n" md += full_json md += "\n```\n" md += "
\n" return rtdetr_img, yolo_img, md # Modelle beim Start laden print("Starte Modell-Ladevorgang...") load_status = load_models() print(load_status) # Gradio Interface with gr.Blocks(title="RT-DETR vs YOLO Vergleich") as demo: gr.Markdown("# 🔍 Objekterkennung: RT-DETR (80 COCO) vs YOLO11n (365 Objects365)") gr.Markdown("Vergleiche RT-DETR mit Standard COCO gegen YOLO mit erweitertem Objects365 Datensatz") with gr.Row(): with gr.Column(): input_image = gr.Image(label="Eingabebild", type="pil") confidence_slider = gr.Slider( minimum=0.1, maximum=0.9, value=0.25, step=0.05, label="Confidence Threshold" ) detect_btn = gr.Button("🚀 Modelle vergleichen", variant="primary") with gr.Row(): rtdetr_output = gr.Image(label="RT-DETR R50 (80 COCO Klassen)") yolo_output = gr.Image(label="YOLO11n (365 Objects365 Klassen)") analysis_output = gr.Markdown(label="Vergleichsanalyse") # Button Event detect_btn.click( fn=compare_models, inputs=[input_image, confidence_slider], outputs=[rtdetr_output, yolo_output, analysis_output] ) # Beispiele gr.Examples( examples=[ ["example1.jpg", 0.25], ["example2.jpg", 0.3], ], inputs=[input_image, confidence_slider], outputs=[rtdetr_output, yolo_output, analysis_output], fn=compare_models, cache_examples=False ) # App starten if __name__ == "__main__": demo.launch()