Spaces:

DawnC
/

Pixcribe

Sleeping

File size: 3,516 Bytes

6a3bd1f


import torch
import numpy as np
from PIL import Image
import cv2
from typing import List, Dict
import torchvision.transforms as transforms

class SaliencyDetectionManager:
    """Visual saliency detection using U2-Net"""

    def __init__(self):
        print("Loading U2-Net model...")
        try:
            from torchvision.models.segmentation import deeplabv3_resnet50
            self.model = deeplabv3_resnet50(pretrained=True)
            self.model.eval()
            if torch.cuda.is_available():
                self.model = self.model.cuda()
        except Exception as e:
            print(f"Warning: Cannot load deep learning model, using fallback: {e}")
            self.model = None

        self.threshold = 0.5
        self.min_area = 1600
        self.min_saliency = 0.6

        self.transform = transforms.Compose([
            transforms.Resize((320, 320)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])

        print("✓ SaliencyDetectionManager initialized")

    def detect_salient_regions(self, image: Image.Image) -> List[Dict]:
        """Detect salient regions"""
        img_array = np.array(image)
        gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)

        _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
        contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        regions = []
        height, width = img_array.shape[:2]

        for contour in contours:
            area = cv2.contourArea(contour)
            if area < self.min_area:
                continue

            x, y, w, h = cv2.boundingRect(contour)
            bbox = [float(x), float(y), float(x + w), float(y + h)]
            region_img = image.crop(bbox)

            regions.append({
                'bbox': bbox,
                'area': area,
                'saliency_score': min(area / (width * height), 1.0),
                'image': region_img
            })

        regions = sorted(regions, key=lambda x: x['saliency_score'], reverse=True)
        return regions[:10]

    def extract_unknown_regions(self, salient_regions: List[Dict], yolo_detections: List[Dict]) -> List[Dict]:
        """Extract salient regions not detected by YOLO"""
        unknown_regions = []

        for region in salient_regions:
            max_iou = 0.0
            for det in yolo_detections:
                iou = self._calculate_iou(region['bbox'], det['bbox'])
                max_iou = max(max_iou, iou)

            if max_iou < 0.3:
                unknown_regions.append(region)

        return unknown_regions

    def _calculate_iou(self, box1: List[float], box2: List[float]) -> float:
        """Calculate IoU (Intersection over Union)"""
        x1_min, y1_min, x1_max, y1_max = box1
        x2_min, y2_min, x2_max, y2_max = box2

        inter_xmin = max(x1_min, x2_min)
        inter_ymin = max(y1_min, y2_min)
        inter_xmax = min(x1_max, x2_max)
        inter_ymax = min(y1_max, y2_max)

        if inter_xmax < inter_xmin or inter_ymax < inter_ymin:
            return 0.0

        inter_area = (inter_xmax - inter_xmin) * (inter_ymax - inter_ymin)
        box1_area = (x1_max - x1_min) * (y1_max - y1_min)
        box2_area = (x2_max - x2_min) * (y2_max - y2_min)
        union_area = box1_area + box2_area - inter_area

        return inter_area / union_area if union_area > 0 else 0.0

print("✓ SaliencyDetectionManager defined")