Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from transformers import AutoModel, AutoImageProcessor | |
| import cv2 | |
| import numpy as np | |
| import torch | |
| import os | |
| from huggingface_hub import login | |
| # Authentifizierung mit Hugging Face API-Token | |
| HF_TOKEN = os.getenv("HF_TOKEN") | |
| if HF_TOKEN: | |
| login(HF_TOKEN) | |
| else: | |
| raise ValueError("HF_TOKEN environment variable not set. Please add it in Space settings.") | |
| # Lade das Modell und den Image Processor | |
| model = AutoModel.from_pretrained("facebook/dinov3-convnext-small-pretrain-lvd1689m") | |
| image_processor = AutoImageProcessor.from_pretrained("facebook/dinov3-convnext-small-pretrain-lvd1689m") | |
| def analyze_image(image, prompt): | |
| # Konvertiere PIL-Bild zu OpenCV-Format | |
| image_np = np.array(image) | |
| image_cv = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR) | |
| # Extrahiere Features mit DINOv3 | |
| inputs = image_processor(images=image_np, return_tensors="pt") | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| # Einfache Bildanalyse mit OpenCV (Konturen für Objekte) | |
| gray = cv2.cvtColor(image_cv, cv2.COLOR_BGR2GRAY) | |
| _, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY) | |
| contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) | |
| # Analysiere das Bild basierend auf dem Prompt | |
| description = [] | |
| if "what do you see" in prompt.lower() or "was siehst du" in prompt.lower(): | |
| if len(contours) == 0: | |
| description.append("Das Bild enthält keine klar erkennbaren Objekte.") | |
| else: | |
| for idx, contour in enumerate(contours[:10]): # Begrenze auf 10 Objekte | |
| if cv2.contourArea(contour) < 100: # Ignoriere kleine Konturen | |
| continue | |
| x, y, w, h = cv2.boundingRect(contour) | |
| # Extrahiere Farbe der Region | |
| roi = image_cv[y:y+h, x:x+w] | |
| if roi.size == 0: | |
| continue | |
| mean_color = np.mean(roi, axis=(0, 1)).astype(int) | |
| color_rgb = f"RGB({mean_color[2]},{mean_color[1]},{mean_color[0]})" | |
| description.append({ | |
| "object": f"Object_{idx}", | |
| "color": color_rgb, | |
| "position": f"x={x}, y={y}, width={w}, height={h}" | |
| }) | |
| return { | |
| "prompt": prompt, | |
| "description": description if description else "No objects detected.", | |
| "features_shape": str(outputs.last_hidden_state.shape) if hasattr(outputs, 'last_hidden_state') else "No features extracted." | |
| } | |
| # Erstelle Gradio-Schnittstelle | |
| iface = gr.Interface( | |
| fn=analyze_image, | |
| inputs=[ | |
| gr.Image(type="pil", label="Upload an Image"), | |
| gr.Textbox(label="Prompt", placeholder="Enter your prompt, e.g., 'Was siehst du auf dem Bild?'") | |
| ], | |
| outputs="json", | |
| title="General Image Analysis with DINOv3", | |
| description="Upload an image and provide a prompt to get a description of what the model sees." | |
| ) | |
| iface.launch() |