doinglean's picture
Update app.py
dee9ee3 verified
raw
history blame
2.97 kB
import gradio as gr
from transformers import AutoModel, AutoImageProcessor
import cv2
import numpy as np
import torch
import os
from huggingface_hub import login
# Authentifizierung mit Hugging Face API-Token
HF_TOKEN = os.getenv("HF_TOKEN")
if HF_TOKEN:
login(HF_TOKEN)
else:
raise ValueError("HF_TOKEN environment variable not set. Please add it in Space settings.")
# Lade das Modell und den Image Processor
model = AutoModel.from_pretrained("facebook/dinov3-convnext-small-pretrain-lvd1689m")
image_processor = AutoImageProcessor.from_pretrained("facebook/dinov3-convnext-small-pretrain-lvd1689m")
def analyze_image(image, prompt):
# Konvertiere PIL-Bild zu OpenCV-Format
image_np = np.array(image)
image_cv = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
# Extrahiere Features mit DINOv3
inputs = image_processor(images=image_np, return_tensors="pt")
with torch.no_grad():
outputs = model(**inputs)
# Einfache Bildanalyse mit OpenCV (Konturen für Objekte)
gray = cv2.cvtColor(image_cv, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Analysiere das Bild basierend auf dem Prompt
description = []
if "what do you see" in prompt.lower() or "was siehst du" in prompt.lower():
if len(contours) == 0:
description.append("Das Bild enthält keine klar erkennbaren Objekte.")
else:
for idx, contour in enumerate(contours[:10]): # Begrenze auf 10 Objekte
if cv2.contourArea(contour) < 100: # Ignoriere kleine Konturen
continue
x, y, w, h = cv2.boundingRect(contour)
# Extrahiere Farbe der Region
roi = image_cv[y:y+h, x:x+w]
if roi.size == 0:
continue
mean_color = np.mean(roi, axis=(0, 1)).astype(int)
color_rgb = f"RGB({mean_color[2]},{mean_color[1]},{mean_color[0]})"
description.append({
"object": f"Object_{idx}",
"color": color_rgb,
"position": f"x={x}, y={y}, width={w}, height={h}"
})
return {
"prompt": prompt,
"description": description if description else "No objects detected.",
"features_shape": str(outputs.last_hidden_state.shape) if hasattr(outputs, 'last_hidden_state') else "No features extracted."
}
# Erstelle Gradio-Schnittstelle
iface = gr.Interface(
fn=analyze_image,
inputs=[
gr.Image(type="pil", label="Upload an Image"),
gr.Textbox(label="Prompt", placeholder="Enter your prompt, e.g., 'Was siehst du auf dem Bild?'")
],
outputs="json",
title="General Image Analysis with DINOv3",
description="Upload an image and provide a prompt to get a description of what the model sees."
)
iface.launch()