Spaces:
Runtime error
Runtime error
File size: 2,965 Bytes
dd9548e dee9ee3 dd9548e 9b9b214 dd9548e dee9ee3 dd9548e dee9ee3 dd9548e dee9ee3 dd9548e dee9ee3 dd9548e dee9ee3 dd9548e dee9ee3 dd9548e dee9ee3 dd9548e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
import gradio as gr
from transformers import AutoModel, AutoImageProcessor
import cv2
import numpy as np
import torch
import os
from huggingface_hub import login
# Authentifizierung mit Hugging Face API-Token
HF_TOKEN = os.getenv("HF_TOKEN")
if HF_TOKEN:
login(HF_TOKEN)
else:
raise ValueError("HF_TOKEN environment variable not set. Please add it in Space settings.")
# Lade das Modell und den Image Processor
model = AutoModel.from_pretrained("facebook/dinov3-convnext-small-pretrain-lvd1689m")
image_processor = AutoImageProcessor.from_pretrained("facebook/dinov3-convnext-small-pretrain-lvd1689m")
def analyze_image(image, prompt):
# Konvertiere PIL-Bild zu OpenCV-Format
image_np = np.array(image)
image_cv = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
# Extrahiere Features mit DINOv3
inputs = image_processor(images=image_np, return_tensors="pt")
with torch.no_grad():
outputs = model(**inputs)
# Einfache Bildanalyse mit OpenCV (Konturen für Objekte)
gray = cv2.cvtColor(image_cv, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Analysiere das Bild basierend auf dem Prompt
description = []
if "what do you see" in prompt.lower() or "was siehst du" in prompt.lower():
if len(contours) == 0:
description.append("Das Bild enthält keine klar erkennbaren Objekte.")
else:
for idx, contour in enumerate(contours[:10]): # Begrenze auf 10 Objekte
if cv2.contourArea(contour) < 100: # Ignoriere kleine Konturen
continue
x, y, w, h = cv2.boundingRect(contour)
# Extrahiere Farbe der Region
roi = image_cv[y:y+h, x:x+w]
if roi.size == 0:
continue
mean_color = np.mean(roi, axis=(0, 1)).astype(int)
color_rgb = f"RGB({mean_color[2]},{mean_color[1]},{mean_color[0]})"
description.append({
"object": f"Object_{idx}",
"color": color_rgb,
"position": f"x={x}, y={y}, width={w}, height={h}"
})
return {
"prompt": prompt,
"description": description if description else "No objects detected.",
"features_shape": str(outputs.last_hidden_state.shape) if hasattr(outputs, 'last_hidden_state') else "No features extracted."
}
# Erstelle Gradio-Schnittstelle
iface = gr.Interface(
fn=analyze_image,
inputs=[
gr.Image(type="pil", label="Upload an Image"),
gr.Textbox(label="Prompt", placeholder="Enter your prompt, e.g., 'Was siehst du auf dem Bild?'")
],
outputs="json",
title="General Image Analysis with DINOv3",
description="Upload an image and provide a prompt to get a description of what the model sees."
)
iface.launch() |