Spaces:
Sleeping
Sleeping
| import os, time, json | |
| import numpy as np | |
| import gradio as gr | |
| import torch | |
| from PIL import Image, ImageDraw, ImageFont | |
| import cv2 | |
| from transformers import AutoImageProcessor, RTDetrForObjectDetection | |
| from ultralytics import YOLO | |
| from huggingface_hub import hf_hub_download | |
| DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| # Globale Modelle | |
| rtdetr_r50_model = None | |
| rtdetr_r50_processor = None | |
| yolo_o365_model = None | |
| def load_models(): | |
| global rtdetr_r50_model, rtdetr_r50_processor, yolo_o365_model | |
| print("Lade RT-DETR R50 (COCO 80 Klassen)...") | |
| model_id_r50 = "PekingU/rtdetr_r50vd_coco_o365" | |
| try: | |
| rtdetr_r50_model = RTDetrForObjectDetection.from_pretrained(model_id_r50).to(DEVICE) | |
| rtdetr_r50_processor = AutoImageProcessor.from_pretrained(model_id_r50) | |
| print(f"RT-DETR R50 geladen - kennt {len(rtdetr_r50_model.config.id2label)} Klassen") | |
| except Exception as e: | |
| return f"Fehler beim Laden von RT-DETR R50: {str(e)}" | |
| print("Lade YOLO11n (Objects365 - 365 Klassen)...") | |
| try: | |
| weights_path = hf_hub_download("NRtred/yolo11n_object365", "yolo11n_object365.pt") | |
| yolo_o365_model = YOLO(weights_path) | |
| print(f"YOLO11n geladen - kennt {len(yolo_o365_model.names)} Klassen") | |
| # Debug: Zeige einige YOLO Labels | |
| yolo_labels = list(yolo_o365_model.names.values())[:30] | |
| print(f"Erste 30 YOLO Labels: {yolo_labels}") | |
| # Suche nach Badezimmer-relevanten Labels in YOLO | |
| bad_labels_yolo = [l for l in yolo_o365_model.names.values() | |
| if any(word in l.lower() for word in ['toilet', 'sink', 'faucet', 'mirror', 'towel', 'bath', 'shower'])] | |
| print(f"YOLO Badezimmer-Labels: {bad_labels_yolo}") | |
| except Exception as e: | |
| return f"Fehler beim Laden von YOLO11n: {str(e)}" | |
| return f"Beide Modelle geladen! RT-DETR R50 (80 COCO) und YOLO11n (365 Objects365)" | |
| def detect_with_rtdetr(image: Image.Image, model, processor, confidence_threshold=0.25): | |
| start = time.time() | |
| inputs = processor(images=image, return_tensors="pt").to(DEVICE) | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| target_sizes = torch.tensor([image.size[::-1]], device=DEVICE) | |
| results = processor.post_process_object_detection( | |
| outputs, | |
| target_sizes=target_sizes, | |
| threshold=float(confidence_threshold) | |
| )[0] | |
| detections = [] | |
| annotated = image.copy() | |
| draw = ImageDraw.Draw(annotated) | |
| id2label = model.config.id2label if hasattr(model.config, 'id2label') else {} | |
| box_color = "red" | |
| for score, label, box in zip(results["scores"], results["labels"], results["boxes"]): | |
| x1, y1, x2, y2 = [float(x) for x in box.tolist()] | |
| confv = float(score.item()) | |
| label_id = int(label.item()) | |
| label_text = id2label.get(label_id, f"class_{label_id}") | |
| detections.append({ | |
| "label": label_text, | |
| "confidence": round(confv, 3), | |
| "bbox": [int(x1), int(y1), int(x2), int(y2)] | |
| }) | |
| draw.rectangle([x1, y1, x2, y2], outline=box_color, width=3) | |
| draw.text((x1, max(0, y1 - 14)), f"{label_text}: {confv:.2f}", fill=box_color) | |
| dur = time.time() - start | |
| return annotated, detections, dur | |
| def detect_with_yolo(image: Image.Image, model, confidence_threshold=0.25): | |
| start = time.time() | |
| # YOLO inference | |
| results = model(image, conf=confidence_threshold, device=DEVICE.type) | |
| detections = [] | |
| annotated = image.copy() | |
| draw = ImageDraw.Draw(annotated) | |
| box_color = "blue" | |
| for r in results: | |
| boxes = r.boxes | |
| if boxes is not None: | |
| for box in boxes: | |
| x1, y1, x2, y2 = box.xyxy[0].tolist() | |
| conf = float(box.conf[0]) | |
| cls = int(box.cls[0]) | |
| label = model.names[cls] | |
| detections.append({ | |
| "label": label, | |
| "confidence": round(conf, 3), | |
| "bbox": [int(x1), int(y1), int(x2), int(y2)] | |
| }) | |
| draw.rectangle([x1, y1, x2, y2], outline=box_color, width=3) | |
| draw.text((x1, max(0, y1 - 14)), f"{label}: {conf:.2f}", fill=box_color) | |
| dur = time.time() - start | |
| return annotated, detections, dur | |
| def compare_models(image: Image.Image, confidence_threshold: float): | |
| if image is None: | |
| return None, None, "Bitte lade ein Bild hoch." | |
| # RT-DETR R50 Detection (80 COCO Klassen) | |
| rtdetr_img, rtdetr_det, rtdetr_t = detect_with_rtdetr( | |
| image, rtdetr_r50_model, rtdetr_r50_processor, confidence_threshold | |
| ) | |
| # YOLO11n Detection (365 Objects365 Klassen) | |
| yolo_img, yolo_det, yolo_t = detect_with_yolo( | |
| image, yolo_o365_model, confidence_threshold | |
| ) | |
| # Labels extrahieren und zählen | |
| rtdetr_objects = {} | |
| for d in rtdetr_det: | |
| label = d["label"] | |
| if label not in rtdetr_objects: | |
| rtdetr_objects[label] = 0 | |
| rtdetr_objects[label] += 1 | |
| yolo_objects = {} | |
| for d in yolo_det: | |
| label = d["label"] | |
| if label not in yolo_objects: | |
| yolo_objects[label] = 0 | |
| yolo_objects[label] += 1 | |
| # Alle einzigartigen Labels | |
| all_labels = set(list(rtdetr_objects.keys()) + list(yolo_objects.keys())) | |
| # Nur in einem Modell gefunden | |
| nur_rtdetr = set(rtdetr_objects.keys()) - set(yolo_objects.keys()) | |
| nur_yolo = set(yolo_objects.keys()) - set(rtdetr_objects.keys()) | |
| beide = set(rtdetr_objects.keys()) & set(yolo_objects.keys()) | |
| # Vergleichstabelle | |
| comparison_table = "| Objekt | RT-DETR (80) | YOLO (365) | Anmerkung |\n" | |
| comparison_table += "|--------|--------------|------------|----------|\n" | |
| for label in sorted(all_labels): | |
| rtdetr_count = rtdetr_objects.get(label, 0) | |
| yolo_count = yolo_objects.get(label, 0) | |
| note = "" | |
| if label in nur_rtdetr: | |
| note = "⚠️ Nur COCO" | |
| elif label in nur_yolo: | |
| note = "✨ O365 Extra" | |
| comparison_table += f"| {label} | {rtdetr_count} | {yolo_count} | {note} |\n" | |
| # Vollständige JSON Ausgabe | |
| full_json = json.dumps({ | |
| "rtdetr_r50": { | |
| "total_objects": len(rtdetr_det), | |
| "inference_time_ms": round(rtdetr_t * 1000, 1), | |
| "unique_finds": list(nur_rtdetr), | |
| "available_classes": 80, | |
| "all_detections": rtdetr_det | |
| }, | |
| "yolo11n_o365": { | |
| "total_objects": len(yolo_det), | |
| "inference_time_ms": round(yolo_t * 1000, 1), | |
| "unique_finds": list(nur_yolo), | |
| "available_classes": 365, | |
| "all_detections": yolo_det | |
| }, | |
| "gemeinsam": { | |
| "beide_gefunden": list(beide), | |
| "anzahl_uebereinstimmungen": len(beide) | |
| } | |
| }, ensure_ascii=False, indent=2) | |
| # Markdown-String | |
| md = "## Modellvergleich: RT-DETR R50 (COCO) vs YOLO11n (Objects365)\n\n" | |
| md += "### Klassen-Unterschied\n" | |
| md += "- **RT-DETR R50:** 80 COCO Klassen (Standard-Objekte)\n" | |
| md += "- **YOLO11n:** 365 Objects365 Klassen (detaillierte Objekterkennung)\n\n" | |
| md += "### Zusammenfassung\n" | |
| md += f"- **RT-DETR R50:** {len(rtdetr_det)} Objekte in {rtdetr_t*1000:.1f}ms\n" | |
| md += f"- **YOLO11n O365:** {len(yolo_det)} Objekte in {yolo_t*1000:.1f}ms\n" | |
| md += f"- **Geschwindigkeitsfaktor:** {'RT-DETR' if rtdetr_t < yolo_t else 'YOLO'} ist {max(rtdetr_t, yolo_t)/min(rtdetr_t, yolo_t):.2f}x schneller\n\n" | |
| md += "### Exklusive Funde\n" | |
| md += f"- **Nur RT-DETR (COCO):** {', '.join(nur_rtdetr) if nur_rtdetr else 'Keine'}\n" | |
| md += f"- **Nur YOLO (O365 Extra):** {', '.join(nur_yolo) if nur_yolo else 'Keine'}\n" | |
| md += f"- **Beide gefunden:** {len(beide)} gemeinsame Objekttypen\n\n" | |
| md += "### Detaillierter Vergleich\n" | |
| md += comparison_table + "\n" | |
| md += "### Modell-Eigenschaften\n" | |
| md += "- **RT-DETR:** Transformer-basiert, End-to-End Detection, COCO-fokussiert\n" | |
| md += "- **YOLO11n:** CNN-basiert, ultraschnell, 365 detaillierte Objektklassen\n\n" | |
| md += "### Alle Erkennungen (JSON)\n" | |
| md += "<details>\n" | |
| md += "<summary>Klick für vollständige Daten</summary>\n\n" | |
| md += "```json\n" | |
| md += full_json | |
| md += "\n```\n" | |
| md += "</details>\n" | |
| return rtdetr_img, yolo_img, md | |
| # Modelle beim Start laden | |
| print("Starte Modell-Ladevorgang...") | |
| load_status = load_models() | |
| print(load_status) | |
| # Gradio Interface | |
| with gr.Blocks(title="RT-DETR vs YOLO Vergleich") as demo: | |
| gr.Markdown("# 🔍 Objekterkennung: RT-DETR (80 COCO) vs YOLO11n (365 Objects365)") | |
| gr.Markdown("Vergleiche RT-DETR mit Standard COCO gegen YOLO mit erweitertem Objects365 Datensatz") | |
| with gr.Row(): | |
| with gr.Column(): | |
| input_image = gr.Image(label="Eingabebild", type="pil") | |
| confidence_slider = gr.Slider( | |
| minimum=0.1, | |
| maximum=0.9, | |
| value=0.25, | |
| step=0.05, | |
| label="Confidence Threshold" | |
| ) | |
| detect_btn = gr.Button("🚀 Modelle vergleichen", variant="primary") | |
| with gr.Row(): | |
| rtdetr_output = gr.Image(label="RT-DETR R50 (80 COCO Klassen)") | |
| yolo_output = gr.Image(label="YOLO11n (365 Objects365 Klassen)") | |
| analysis_output = gr.Markdown(label="Vergleichsanalyse") | |
| # Button Event | |
| detect_btn.click( | |
| fn=compare_models, | |
| inputs=[input_image, confidence_slider], | |
| outputs=[rtdetr_output, yolo_output, analysis_output] | |
| ) | |
| # Beispiele | |
| gr.Examples( | |
| examples=[ | |
| ["example1.jpg", 0.25], | |
| ["example2.jpg", 0.3], | |
| ], | |
| inputs=[input_image, confidence_slider], | |
| outputs=[rtdetr_output, yolo_output, analysis_output], | |
| fn=compare_models, | |
| cache_examples=False | |
| ) | |
| # App starten | |
| if __name__ == "__main__": | |
| demo.launch() |