KlareKiste's picture
Update app.py
9eac079 verified
import os, time, json
import numpy as np
import gradio as gr
import torch
from PIL import Image, ImageDraw, ImageFont
import cv2
from transformers import AutoImageProcessor, RTDetrForObjectDetection
from ultralytics import YOLO
from huggingface_hub import hf_hub_download
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Globale Modelle
rtdetr_r50_model = None
rtdetr_r50_processor = None
yolo_o365_model = None
def load_models():
global rtdetr_r50_model, rtdetr_r50_processor, yolo_o365_model
print("Lade RT-DETR R50 (COCO 80 Klassen)...")
model_id_r50 = "PekingU/rtdetr_r50vd_coco_o365"
try:
rtdetr_r50_model = RTDetrForObjectDetection.from_pretrained(model_id_r50).to(DEVICE)
rtdetr_r50_processor = AutoImageProcessor.from_pretrained(model_id_r50)
print(f"RT-DETR R50 geladen - kennt {len(rtdetr_r50_model.config.id2label)} Klassen")
except Exception as e:
return f"Fehler beim Laden von RT-DETR R50: {str(e)}"
print("Lade YOLO11n (Objects365 - 365 Klassen)...")
try:
weights_path = hf_hub_download("NRtred/yolo11n_object365", "yolo11n_object365.pt")
yolo_o365_model = YOLO(weights_path)
print(f"YOLO11n geladen - kennt {len(yolo_o365_model.names)} Klassen")
# Debug: Zeige einige YOLO Labels
yolo_labels = list(yolo_o365_model.names.values())[:30]
print(f"Erste 30 YOLO Labels: {yolo_labels}")
# Suche nach Badezimmer-relevanten Labels in YOLO
bad_labels_yolo = [l for l in yolo_o365_model.names.values()
if any(word in l.lower() for word in ['toilet', 'sink', 'faucet', 'mirror', 'towel', 'bath', 'shower'])]
print(f"YOLO Badezimmer-Labels: {bad_labels_yolo}")
except Exception as e:
return f"Fehler beim Laden von YOLO11n: {str(e)}"
return f"Beide Modelle geladen! RT-DETR R50 (80 COCO) und YOLO11n (365 Objects365)"
def detect_with_rtdetr(image: Image.Image, model, processor, confidence_threshold=0.25):
start = time.time()
inputs = processor(images=image, return_tensors="pt").to(DEVICE)
with torch.no_grad():
outputs = model(**inputs)
target_sizes = torch.tensor([image.size[::-1]], device=DEVICE)
results = processor.post_process_object_detection(
outputs,
target_sizes=target_sizes,
threshold=float(confidence_threshold)
)[0]
detections = []
annotated = image.copy()
draw = ImageDraw.Draw(annotated)
id2label = model.config.id2label if hasattr(model.config, 'id2label') else {}
box_color = "red"
for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
x1, y1, x2, y2 = [float(x) for x in box.tolist()]
confv = float(score.item())
label_id = int(label.item())
label_text = id2label.get(label_id, f"class_{label_id}")
detections.append({
"label": label_text,
"confidence": round(confv, 3),
"bbox": [int(x1), int(y1), int(x2), int(y2)]
})
draw.rectangle([x1, y1, x2, y2], outline=box_color, width=3)
draw.text((x1, max(0, y1 - 14)), f"{label_text}: {confv:.2f}", fill=box_color)
dur = time.time() - start
return annotated, detections, dur
def detect_with_yolo(image: Image.Image, model, confidence_threshold=0.25):
start = time.time()
# YOLO inference
results = model(image, conf=confidence_threshold, device=DEVICE.type)
detections = []
annotated = image.copy()
draw = ImageDraw.Draw(annotated)
box_color = "blue"
for r in results:
boxes = r.boxes
if boxes is not None:
for box in boxes:
x1, y1, x2, y2 = box.xyxy[0].tolist()
conf = float(box.conf[0])
cls = int(box.cls[0])
label = model.names[cls]
detections.append({
"label": label,
"confidence": round(conf, 3),
"bbox": [int(x1), int(y1), int(x2), int(y2)]
})
draw.rectangle([x1, y1, x2, y2], outline=box_color, width=3)
draw.text((x1, max(0, y1 - 14)), f"{label}: {conf:.2f}", fill=box_color)
dur = time.time() - start
return annotated, detections, dur
def compare_models(image: Image.Image, confidence_threshold: float):
if image is None:
return None, None, "Bitte lade ein Bild hoch."
# RT-DETR R50 Detection (80 COCO Klassen)
rtdetr_img, rtdetr_det, rtdetr_t = detect_with_rtdetr(
image, rtdetr_r50_model, rtdetr_r50_processor, confidence_threshold
)
# YOLO11n Detection (365 Objects365 Klassen)
yolo_img, yolo_det, yolo_t = detect_with_yolo(
image, yolo_o365_model, confidence_threshold
)
# Labels extrahieren und zählen
rtdetr_objects = {}
for d in rtdetr_det:
label = d["label"]
if label not in rtdetr_objects:
rtdetr_objects[label] = 0
rtdetr_objects[label] += 1
yolo_objects = {}
for d in yolo_det:
label = d["label"]
if label not in yolo_objects:
yolo_objects[label] = 0
yolo_objects[label] += 1
# Alle einzigartigen Labels
all_labels = set(list(rtdetr_objects.keys()) + list(yolo_objects.keys()))
# Nur in einem Modell gefunden
nur_rtdetr = set(rtdetr_objects.keys()) - set(yolo_objects.keys())
nur_yolo = set(yolo_objects.keys()) - set(rtdetr_objects.keys())
beide = set(rtdetr_objects.keys()) & set(yolo_objects.keys())
# Vergleichstabelle
comparison_table = "| Objekt | RT-DETR (80) | YOLO (365) | Anmerkung |\n"
comparison_table += "|--------|--------------|------------|----------|\n"
for label in sorted(all_labels):
rtdetr_count = rtdetr_objects.get(label, 0)
yolo_count = yolo_objects.get(label, 0)
note = ""
if label in nur_rtdetr:
note = "⚠️ Nur COCO"
elif label in nur_yolo:
note = "✨ O365 Extra"
comparison_table += f"| {label} | {rtdetr_count} | {yolo_count} | {note} |\n"
# Vollständige JSON Ausgabe
full_json = json.dumps({
"rtdetr_r50": {
"total_objects": len(rtdetr_det),
"inference_time_ms": round(rtdetr_t * 1000, 1),
"unique_finds": list(nur_rtdetr),
"available_classes": 80,
"all_detections": rtdetr_det
},
"yolo11n_o365": {
"total_objects": len(yolo_det),
"inference_time_ms": round(yolo_t * 1000, 1),
"unique_finds": list(nur_yolo),
"available_classes": 365,
"all_detections": yolo_det
},
"gemeinsam": {
"beide_gefunden": list(beide),
"anzahl_uebereinstimmungen": len(beide)
}
}, ensure_ascii=False, indent=2)
# Markdown-String
md = "## Modellvergleich: RT-DETR R50 (COCO) vs YOLO11n (Objects365)\n\n"
md += "### Klassen-Unterschied\n"
md += "- **RT-DETR R50:** 80 COCO Klassen (Standard-Objekte)\n"
md += "- **YOLO11n:** 365 Objects365 Klassen (detaillierte Objekterkennung)\n\n"
md += "### Zusammenfassung\n"
md += f"- **RT-DETR R50:** {len(rtdetr_det)} Objekte in {rtdetr_t*1000:.1f}ms\n"
md += f"- **YOLO11n O365:** {len(yolo_det)} Objekte in {yolo_t*1000:.1f}ms\n"
md += f"- **Geschwindigkeitsfaktor:** {'RT-DETR' if rtdetr_t < yolo_t else 'YOLO'} ist {max(rtdetr_t, yolo_t)/min(rtdetr_t, yolo_t):.2f}x schneller\n\n"
md += "### Exklusive Funde\n"
md += f"- **Nur RT-DETR (COCO):** {', '.join(nur_rtdetr) if nur_rtdetr else 'Keine'}\n"
md += f"- **Nur YOLO (O365 Extra):** {', '.join(nur_yolo) if nur_yolo else 'Keine'}\n"
md += f"- **Beide gefunden:** {len(beide)} gemeinsame Objekttypen\n\n"
md += "### Detaillierter Vergleich\n"
md += comparison_table + "\n"
md += "### Modell-Eigenschaften\n"
md += "- **RT-DETR:** Transformer-basiert, End-to-End Detection, COCO-fokussiert\n"
md += "- **YOLO11n:** CNN-basiert, ultraschnell, 365 detaillierte Objektklassen\n\n"
md += "### Alle Erkennungen (JSON)\n"
md += "<details>\n"
md += "<summary>Klick für vollständige Daten</summary>\n\n"
md += "```json\n"
md += full_json
md += "\n```\n"
md += "</details>\n"
return rtdetr_img, yolo_img, md
# Modelle beim Start laden
print("Starte Modell-Ladevorgang...")
load_status = load_models()
print(load_status)
# Gradio Interface
with gr.Blocks(title="RT-DETR vs YOLO Vergleich") as demo:
gr.Markdown("# 🔍 Objekterkennung: RT-DETR (80 COCO) vs YOLO11n (365 Objects365)")
gr.Markdown("Vergleiche RT-DETR mit Standard COCO gegen YOLO mit erweitertem Objects365 Datensatz")
with gr.Row():
with gr.Column():
input_image = gr.Image(label="Eingabebild", type="pil")
confidence_slider = gr.Slider(
minimum=0.1,
maximum=0.9,
value=0.25,
step=0.05,
label="Confidence Threshold"
)
detect_btn = gr.Button("🚀 Modelle vergleichen", variant="primary")
with gr.Row():
rtdetr_output = gr.Image(label="RT-DETR R50 (80 COCO Klassen)")
yolo_output = gr.Image(label="YOLO11n (365 Objects365 Klassen)")
analysis_output = gr.Markdown(label="Vergleichsanalyse")
# Button Event
detect_btn.click(
fn=compare_models,
inputs=[input_image, confidence_slider],
outputs=[rtdetr_output, yolo_output, analysis_output]
)
# Beispiele
gr.Examples(
examples=[
["example1.jpg", 0.25],
["example2.jpg", 0.3],
],
inputs=[input_image, confidence_slider],
outputs=[rtdetr_output, yolo_output, analysis_output],
fn=compare_models,
cache_examples=False
)
# App starten
if __name__ == "__main__":
demo.launch()