Spaces:

DawnC
/

Pixcribe

Sleeping

File size: 7,475 Bytes

6a3bd1f


import torch
from PIL import Image
from typing import Dict, List, Tuple
import numpy as np

class BrandDetectionOptimizer:
    """
    智能品牌檢測優化器 - 性能與準確度平衡
    通過快速預篩選減少不必要的深度檢測
    """

    def __init__(self, clip_manager, ocr_manager, prompt_library):
        self.clip_manager = clip_manager
        self.ocr_manager = ocr_manager
        self.prompt_library = prompt_library

    def quick_brand_prescreening(self, image: Image.Image) -> List[str]:
        """
        快速品牌預篩選 - 只檢測最可能的品牌類別
        大幅減少需要深度檢測的品牌數量

        Returns:
            List of brand names that are likely present
        """
        # Step 1: OCR 快速掃描（最快且最準確的方法）
        likely_brands = set()

        ocr_results = self.ocr_manager.extract_text(image, use_brand_preprocessing=True)

        for ocr_item in ocr_results:
            text = ocr_item['text'].upper()

            # 過濾太短的文字（避免誤匹配）
            if len(text) < 2:
                continue

            # 檢查所有品牌的別名
            for brand_name, brand_info in self.prompt_library.get_all_brands().items():
                aliases = [alias.upper() for alias in brand_info.get('aliases', [])]

                # 完全匹配或部分匹配（但要求較高相似度）
                for alias in aliases:
                    # 完全匹配
                    if alias == text:
                        likely_brands.add(brand_name)
                        break
                    # 部分匹配：要求別名長度 >= 3 且匹配度高
                    elif len(alias) >= 3:
                        if alias in text and len(alias) / len(text) > 0.6:
                            likely_brands.add(brand_name)
                            break
                        elif text in alias and len(text) / len(alias) > 0.6:
                            likely_brands.add(brand_name)
                            break

        # Step 2: 視覺特徵快速分類（使用通用品牌類別）
        category_prompts = {
            'luxury': 'luxury brand product with monogram pattern and leather details',
            'sportswear': 'sportswear brand product with athletic logo and swoosh design',
            'tech': 'technology brand product with minimalist design and metal finish',
            'automotive': 'luxury car brand with distinctive grille and emblem',
            'watches': 'luxury watch with distinctive dial and brand logo',
            'fashion': 'fashion brand product with signature pattern or logo'
        }

        category_scores = self.clip_manager.classify_zero_shot(
            image, list(category_prompts.values())
        )

        # 獲取最可能的類別（top 2）
        sorted_categories = sorted(
            category_scores.items(), key=lambda x: x[1], reverse=True
        )[:2]

        # 將類別映射回品牌
        category_mapping = {v: k for k, v in category_prompts.items()}

        for prompt_text, score in sorted_categories:
            if score > 0.30:  # 提高閾值，減少誤判（0.15 → 0.30）
                category = category_mapping[prompt_text]
                # 添加該類別的所有品牌
                category_brands = self.prompt_library.get_brands_by_category(category)
                likely_brands.update(category_brands.keys())

        # Step 3: 如果完全沒有線索，只添加視覺特徵最明顯的 3 個品牌（保底）
        # 注意：這不是硬編碼，而是在無任何線索時的合理默認值
        if not likely_brands:
            # 只添加視覺特徵極其明顯且常見的品牌
            default_brands = ['Louis Vuitton', 'Gucci', 'Nike']
            likely_brands.update(default_brands)

        # 返回品牌列表（不限制數量，而是依賴質量過濾）
        return list(likely_brands)

    def smart_region_selection(self, image: Image.Image,
                                 saliency_regions: List[Dict]) -> List[Tuple[int, int, int, int]]:
        """
        智能區域選擇 - 只掃描有品牌可能性的區域
        替代低效的網格掃描

        Args:
            image: PIL Image
            saliency_regions: Saliency detection results

        Returns:
            List of bboxes (x1, y1, x2, y2) to scan
        """
        regions_to_scan = []
        img_width, img_height = image.size

        # Strategy 1: 使用顯著性區域（最有可能包含品牌）
        if saliency_regions:
            for region in saliency_regions[:3]:  # Top 3 salient regions
                bbox = region.get('bbox')
                if bbox:
                    # 擴展區域以包含周邊context
                    x1, y1, x2, y2 = bbox
                    padding = 20
                    x1 = max(0, x1 - padding)
                    y1 = max(0, y1 - padding)
                    x2 = min(img_width, x2 + padding)
                    y2 = min(img_height, y2 + padding)

                    # 確保區域夠大
                    if (x2 - x1) > 100 and (y2 - y1) > 100:
                        regions_to_scan.append((x1, y1, x2, y2))

        # Strategy 2: 中心區域（品牌通常在中心）
        center_x = img_width // 2
        center_y = img_height // 2
        center_size = min(img_width, img_height) // 2

        center_bbox = (
            max(0, center_x - center_size // 2),
            max(0, center_y - center_size // 2),
            min(img_width, center_x + center_size // 2),
            min(img_height, center_y + center_size // 2)
        )
        regions_to_scan.append(center_bbox)

        # Strategy 3: 如果沒有顯著區域，使用全圖
        if not regions_to_scan:
            regions_to_scan.append((0, 0, img_width, img_height))

        return regions_to_scan

    def compute_brand_confidence_boost(self, brand_name: str,
                                        ocr_results: List[Dict],
                                        base_confidence: float) -> float:
        """
        基於 OCR 結果提升品牌信心度
        如果 OCR 檢測到品牌名稱，大幅提升信心度

        Args:
            brand_name: Brand name
            ocr_results: OCR detection results
            base_confidence: Base confidence from visual matching

        Returns:
            Boosted confidence score
        """
        brand_info = self.prompt_library.get_brand_prompts(brand_name)
        if not brand_info:
            return base_confidence

        aliases = [alias.upper() for alias in brand_info.get('aliases', [])]

        max_boost = 0.0
        for ocr_item in ocr_results:
            text = ocr_item['text'].upper()
            ocr_conf = ocr_item['confidence']

            for alias in aliases:
                # 完全匹配
                if alias == text:
                    max_boost = max(max_boost, 0.40 * ocr_conf)  # 最高提升 0.40
                # 部分匹配
                elif alias in text or text in alias:
                    if len(alias) > 2:  # 避免短字符串誤匹配
                        max_boost = max(max_boost, 0.25 * ocr_conf)

        # 應用提升，但不超過 0.95
        boosted_confidence = min(base_confidence + max_boost, 0.95)
        return boosted_confidence

print("✓ BrandDetectionOptimizer (performance and accuracy optimizer) defined")