Spaces:

doinglean
/

dinov3-candlestick-api

Runtime error

File size: 2,965 Bytes

dd9548e
dee9ee3
dd9548e
 
 
9b9b214
 
 
 
 
 
 
 
 
dd9548e
 
dee9ee3
dd9548e
 
 
 
 
 
 
dee9ee3
dd9548e
 
 
 
dee9ee3
 
 
 
dd9548e
dee9ee3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dd9548e
dee9ee3
 
 
 
 
dd9548e
 
 
 
 
dee9ee3
 
dd9548e
 
dee9ee3
 
dd9548e

import gradio as gr
from transformers import AutoModel, AutoImageProcessor
import cv2
import numpy as np
import torch
import os
from huggingface_hub import login

# Authentifizierung mit Hugging Face API-Token
HF_TOKEN = os.getenv("HF_TOKEN")
if HF_TOKEN:
    login(HF_TOKEN)
else:
    raise ValueError("HF_TOKEN environment variable not set. Please add it in Space settings.")

# Lade das Modell und den Image Processor
model = AutoModel.from_pretrained("facebook/dinov3-convnext-small-pretrain-lvd1689m")
image_processor = AutoImageProcessor.from_pretrained("facebook/dinov3-convnext-small-pretrain-lvd1689m")

def analyze_image(image, prompt):
    # Konvertiere PIL-Bild zu OpenCV-Format
    image_np = np.array(image)
    image_cv = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)

    # Extrahiere Features mit DINOv3
    inputs = image_processor(images=image_np, return_tensors="pt")
    with torch.no_grad():
        outputs = model(**inputs)

    # Einfache Bildanalyse mit OpenCV (Konturen für Objekte)
    gray = cv2.cvtColor(image_cv, cv2.COLOR_BGR2GRAY)
    _, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Analysiere das Bild basierend auf dem Prompt
    description = []
    if "what do you see" in prompt.lower() or "was siehst du" in prompt.lower():
        if len(contours) == 0:
            description.append("Das Bild enthält keine klar erkennbaren Objekte.")
        else:
            for idx, contour in enumerate(contours[:10]):  # Begrenze auf 10 Objekte
                if cv2.contourArea(contour) < 100:  # Ignoriere kleine Konturen
                    continue
                x, y, w, h = cv2.boundingRect(contour)
                # Extrahiere Farbe der Region
                roi = image_cv[y:y+h, x:x+w]
                if roi.size == 0:
                    continue
                mean_color = np.mean(roi, axis=(0, 1)).astype(int)
                color_rgb = f"RGB({mean_color[2]},{mean_color[1]},{mean_color[0]})"
                description.append({
                    "object": f"Object_{idx}",
                    "color": color_rgb,
                    "position": f"x={x}, y={y}, width={w}, height={h}"
                })

    return {
        "prompt": prompt,
        "description": description if description else "No objects detected.",
        "features_shape": str(outputs.last_hidden_state.shape) if hasattr(outputs, 'last_hidden_state') else "No features extracted."
    }

# Erstelle Gradio-Schnittstelle
iface = gr.Interface(
    fn=analyze_image,
    inputs=[
        gr.Image(type="pil", label="Upload an Image"),
        gr.Textbox(label="Prompt", placeholder="Enter your prompt, e.g., 'Was siehst du auf dem Bild?'")
    ],
    outputs="json",
    title="General Image Analysis with DINOv3",
    description="Upload an image and provide a prompt to get a description of what the model sees."
)

iface.launch()