Spaces:

doinglean
/

dinov3-candlestick-api

Runtime error

App Files Files Community

dinov3-candlestick-api / app.py

doinglean

Update app.py

dee9ee3 verified 3 months ago

raw

history blame

2.97 kB

	import gradio as gr
	from transformers import AutoModel, AutoImageProcessor
	import cv2
	import numpy as np
	import torch
	import os
	from huggingface_hub import login

	# Authentifizierung mit Hugging Face API-Token
	HF_TOKEN = os.getenv("HF_TOKEN")
	if HF_TOKEN:
	login(HF_TOKEN)
	else:
	raise ValueError("HF_TOKEN environment variable not set. Please add it in Space settings.")

	# Lade das Modell und den Image Processor
	model = AutoModel.from_pretrained("facebook/dinov3-convnext-small-pretrain-lvd1689m")
	image_processor = AutoImageProcessor.from_pretrained("facebook/dinov3-convnext-small-pretrain-lvd1689m")

	def analyze_image(image, prompt):
	# Konvertiere PIL-Bild zu OpenCV-Format
	image_np = np.array(image)
	image_cv = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)

	# Extrahiere Features mit DINOv3
	inputs = image_processor(images=image_np, return_tensors="pt")
	with torch.no_grad():
	outputs = model(**inputs)

	# Einfache Bildanalyse mit OpenCV (Konturen für Objekte)
	gray = cv2.cvtColor(image_cv, cv2.COLOR_BGR2GRAY)
	_, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
	contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

	# Analysiere das Bild basierend auf dem Prompt
	description = []
	if "what do you see" in prompt.lower() or "was siehst du" in prompt.lower():
	if len(contours) == 0:
	description.append("Das Bild enthält keine klar erkennbaren Objekte.")
	else:
	for idx, contour in enumerate(contours[:10]): # Begrenze auf 10 Objekte
	if cv2.contourArea(contour) < 100: # Ignoriere kleine Konturen
	continue
	x, y, w, h = cv2.boundingRect(contour)
	# Extrahiere Farbe der Region
	roi = image_cv[y:y+h, x:x+w]
	if roi.size == 0:
	continue
	mean_color = np.mean(roi, axis=(0, 1)).astype(int)
	color_rgb = f"RGB({mean_color[2]},{mean_color[1]},{mean_color[0]})"
	description.append({
	"object": f"Object_{idx}",
	"color": color_rgb,
	"position": f"x={x}, y={y}, width={w}, height={h}"
	})

	return {
	"prompt": prompt,
	"description": description if description else "No objects detected.",
	"features_shape": str(outputs.last_hidden_state.shape) if hasattr(outputs, 'last_hidden_state') else "No features extracted."
	}

	# Erstelle Gradio-Schnittstelle
	iface = gr.Interface(
	fn=analyze_image,
	inputs=[
	gr.Image(type="pil", label="Upload an Image"),
	gr.Textbox(label="Prompt", placeholder="Enter your prompt, e.g., 'Was siehst du auf dem Bild?'")
	],
	outputs="json",
	title="General Image Analysis with DINOv3",
	description="Upload an image and provide a prompt to get a description of what the model sees."
	)

	iface.launch()