Spaces:

DawnC
/

Pixcribe

Sleeping

App Files Files Community

Pixcribe / brand_detection_optimizer.py

DawnC

Upload 22 files

6a3bd1f verified about 1 month ago

raw

history blame contribute delete

7.48 kB


	import torch
	from PIL import Image
	from typing import Dict, List, Tuple
	import numpy as np

	class BrandDetectionOptimizer:
	"""
	智能品牌檢測優化器 - 性能與準確度平衡
	通過快速預篩選減少不必要的深度檢測
	"""

	def __init__(self, clip_manager, ocr_manager, prompt_library):
	self.clip_manager = clip_manager
	self.ocr_manager = ocr_manager
	self.prompt_library = prompt_library

	def quick_brand_prescreening(self, image: Image.Image) -> List[str]:
	"""
	快速品牌預篩選 - 只檢測最可能的品牌類別
	大幅減少需要深度檢測的品牌數量

	Returns:
	List of brand names that are likely present
	"""
	# Step 1: OCR 快速掃描（最快且最準確的方法）
	likely_brands = set()

	ocr_results = self.ocr_manager.extract_text(image, use_brand_preprocessing=True)

	for ocr_item in ocr_results:
	text = ocr_item['text'].upper()

	# 過濾太短的文字（避免誤匹配）
	if len(text) < 2:
	continue

	# 檢查所有品牌的別名
	for brand_name, brand_info in self.prompt_library.get_all_brands().items():
	aliases = [alias.upper() for alias in brand_info.get('aliases', [])]

	# 完全匹配或部分匹配（但要求較高相似度）
	for alias in aliases:
	# 完全匹配
	if alias == text:
	likely_brands.add(brand_name)
	break
	# 部分匹配：要求別名長度 >= 3 且匹配度高
	elif len(alias) >= 3:
	if alias in text and len(alias) / len(text) > 0.6:
	likely_brands.add(brand_name)
	break
	elif text in alias and len(text) / len(alias) > 0.6:
	likely_brands.add(brand_name)
	break

	# Step 2: 視覺特徵快速分類（使用通用品牌類別）
	category_prompts = {
	'luxury': 'luxury brand product with monogram pattern and leather details',
	'sportswear': 'sportswear brand product with athletic logo and swoosh design',
	'tech': 'technology brand product with minimalist design and metal finish',
	'automotive': 'luxury car brand with distinctive grille and emblem',
	'watches': 'luxury watch with distinctive dial and brand logo',
	'fashion': 'fashion brand product with signature pattern or logo'
	}

	category_scores = self.clip_manager.classify_zero_shot(
	image, list(category_prompts.values())
	)

	# 獲取最可能的類別（top 2）
	sorted_categories = sorted(
	category_scores.items(), key=lambda x: x[1], reverse=True
	)[:2]

	# 將類別映射回品牌
	category_mapping = {v: k for k, v in category_prompts.items()}

	for prompt_text, score in sorted_categories:
	if score > 0.30: # 提高閾值，減少誤判（0.15 → 0.30）
	category = category_mapping[prompt_text]
	# 添加該類別的所有品牌
	category_brands = self.prompt_library.get_brands_by_category(category)
	likely_brands.update(category_brands.keys())

	# Step 3: 如果完全沒有線索，只添加視覺特徵最明顯的 3 個品牌（保底）
	# 注意：這不是硬編碼，而是在無任何線索時的合理默認值
	if not likely_brands:
	# 只添加視覺特徵極其明顯且常見的品牌
	default_brands = ['Louis Vuitton', 'Gucci', 'Nike']
	likely_brands.update(default_brands)

	# 返回品牌列表（不限制數量，而是依賴質量過濾）
	return list(likely_brands)

	def smart_region_selection(self, image: Image.Image,
	saliency_regions: List[Dict]) -> List[Tuple[int, int, int, int]]:
	"""
	智能區域選擇 - 只掃描有品牌可能性的區域
	替代低效的網格掃描

	Args:
	image: PIL Image
	saliency_regions: Saliency detection results

	Returns:
	List of bboxes (x1, y1, x2, y2) to scan
	"""
	regions_to_scan = []
	img_width, img_height = image.size

	# Strategy 1: 使用顯著性區域（最有可能包含品牌）
	if saliency_regions:
	for region in saliency_regions[:3]: # Top 3 salient regions
	bbox = region.get('bbox')
	if bbox:
	# 擴展區域以包含周邊context
	x1, y1, x2, y2 = bbox
	padding = 20
	x1 = max(0, x1 - padding)
	y1 = max(0, y1 - padding)
	x2 = min(img_width, x2 + padding)
	y2 = min(img_height, y2 + padding)

	# 確保區域夠大
	if (x2 - x1) > 100 and (y2 - y1) > 100:
	regions_to_scan.append((x1, y1, x2, y2))

	# Strategy 2: 中心區域（品牌通常在中心）
	center_x = img_width // 2
	center_y = img_height // 2
	center_size = min(img_width, img_height) // 2

	center_bbox = (
	max(0, center_x - center_size // 2),
	max(0, center_y - center_size // 2),
	min(img_width, center_x + center_size // 2),
	min(img_height, center_y + center_size // 2)
	)
	regions_to_scan.append(center_bbox)

	# Strategy 3: 如果沒有顯著區域，使用全圖
	if not regions_to_scan:
	regions_to_scan.append((0, 0, img_width, img_height))

	return regions_to_scan

	def compute_brand_confidence_boost(self, brand_name: str,
	ocr_results: List[Dict],
	base_confidence: float) -> float:
	"""
	基於 OCR 結果提升品牌信心度
	如果 OCR 檢測到品牌名稱，大幅提升信心度

	Args:
	brand_name: Brand name
	ocr_results: OCR detection results
	base_confidence: Base confidence from visual matching

	Returns:
	Boosted confidence score
	"""
	brand_info = self.prompt_library.get_brand_prompts(brand_name)
	if not brand_info:
	return base_confidence

	aliases = [alias.upper() for alias in brand_info.get('aliases', [])]

	max_boost = 0.0
	for ocr_item in ocr_results:
	text = ocr_item['text'].upper()
	ocr_conf = ocr_item['confidence']

	for alias in aliases:
	# 完全匹配
	if alias == text:
	max_boost = max(max_boost, 0.40 * ocr_conf) # 最高提升 0.40
	# 部分匹配
	elif alias in text or text in alias:
	if len(alias) > 2: # 避免短字符串誤匹配
	max_boost = max(max_boost, 0.25 * ocr_conf)

	# 應用提升，但不超過 0.95
	boosted_confidence = min(base_confidence + max_boost, 0.95)
	return boosted_confidence

	print("✓ BrandDetectionOptimizer (performance and accuracy optimizer) defined")