ColamanAI commited on
Commit
3e7f3f4
·
verified ·
1 Parent(s): 5fcf7e1

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +334 -47
app.py CHANGED
@@ -89,14 +89,26 @@ high_level_config = {
89
  "resolution": 518,
90
  }
91
 
92
- # GroundingDINO and SAM Configuration (CPU-friendly versions)
93
- GROUNDING_DINO_MODEL_ID = "IDEA-Research/grounding-dino-tiny" # 已经是tiny版本
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  GROUNDING_DINO_BOX_THRESHOLD = 0.25
95
  GROUNDING_DINO_TEXT_THRESHOLD = 0.2
96
-
97
- # 使用 MobileSAM (CPU友好,比SAM-huge快60倍,只有10MB)
98
- SAM_MODEL_ID = "dhkim2810/MobileSAM" # 轻量级SAM,适合CPU
99
- USE_MOBILE_SAM = True # 标记使用MobileSAM
100
 
101
  DEFAULT_TEXT_PROMPT = "chair . table . sofa . bed . desk . cabinet"
102
 
@@ -151,11 +163,95 @@ grounding_dino_model = None
151
  grounding_dino_processor = None
152
  sam_predictor = None
153
 
 
 
 
 
 
 
 
 
154
 
155
  # ============================================================================
156
  # Model Loading Functions
157
  # ============================================================================
158
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  def load_grounding_dino_model(device="cpu"):
160
  """Load GroundingDINO model from HuggingFace (CPU优化)"""
161
  global grounding_dino_model, grounding_dino_processor
@@ -166,19 +262,49 @@ def load_grounding_dino_model(device="cpu"):
166
 
167
  try:
168
  from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection
 
169
 
170
  # 强制使用 CPU ���行分割(节省 GPU 资源)
171
  seg_device = "cpu"
172
  print(f"📥 Loading GroundingDINO from HuggingFace: {GROUNDING_DINO_MODEL_ID} (使用 {seg_device.upper()})")
173
- grounding_dino_processor = AutoProcessor.from_pretrained(GROUNDING_DINO_MODEL_ID)
 
 
 
 
 
 
 
 
 
 
 
 
174
  grounding_dino_model = AutoModelForZeroShotObjectDetection.from_pretrained(
175
- GROUNDING_DINO_MODEL_ID
 
 
 
176
  ).to(seg_device).eval()
177
 
178
  print(f"✅ GroundingDINO loaded successfully on {seg_device.upper()}")
179
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
  except Exception as e:
181
- print(f"❌ GroundingDINO loading failed: {e}")
182
  import traceback
183
  traceback.print_exc()
184
 
@@ -193,14 +319,28 @@ def load_sam_model(device="cpu"):
193
 
194
  try:
195
  from transformers import SamModel, SamProcessor
 
196
 
197
  # 强制使用 CPU 进行分割(MobileSAM 专为移动设备/CPU优化)
198
  seg_device = "cpu"
199
  print(f"📥 Loading MobileSAM from HuggingFace: {SAM_MODEL_ID} (使用 {seg_device.upper()})")
200
  print(f" 💡 MobileSAM 是轻量级版本,比 SAM-huge 快60倍,只有10MB,适合CPU运行")
201
 
202
- sam_model = SamModel.from_pretrained(SAM_MODEL_ID).to(seg_device).eval()
203
- sam_processor = SamProcessor.from_pretrained(SAM_MODEL_ID)
 
 
 
 
 
 
 
 
 
 
 
 
 
204
 
205
  # Wrap in a predictor-like interface
206
  class SAMPredictor:
@@ -244,9 +384,22 @@ def load_sam_model(device="cpu"):
244
  sam_predictor = SAMPredictor(sam_model, sam_processor, seg_device)
245
  print(f"✅ MobileSAM loaded successfully on {seg_device.upper()}")
246
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
247
  except Exception as e:
248
- print(f"❌ SAM loading failed: {e}")
249
- print(" Falling back to bbox-based masks")
250
  import traceback
251
  traceback.print_exc()
252
 
@@ -271,6 +424,98 @@ def generate_distinct_colors(n):
271
  return colors
272
 
273
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
  def run_grounding_dino_detection(image_np, text_prompt, device="cpu"):
275
  """Run GroundingDINO detection (CPU优化)"""
276
  if grounding_dino_model is None or grounding_dino_processor is None:
@@ -720,20 +965,38 @@ def run_model(
720
  if enable_segmentation:
721
  progress(0.1, desc="🎯 加载分割模型 (CPU)...")
722
  print(f"\n{'='*70}")
723
- print("🎯 分割模型加载开始...")
724
  print(f"{'='*70}")
725
- load_grounding_dino_model("cpu") # 分割使用CPU
726
- load_sam_model("cpu") # MobileSAM在CPU上运行良好
727
-
728
- # 验证模型是否成功加载
729
- if grounding_dino_model is None:
730
- print("❌ GroundingDINO 模型加载失败!")
731
- raise RuntimeError("GroundingDINO 模型加载失败,请检查网络连接或模型配置")
732
- if sam_predictor is None:
733
- print("❌ SAM 模型加载失败!")
734
- raise RuntimeError("SAM 模型加载失败,请检查网络连接或模型配置")
735
-
736
- print(f" 所有分割模型加载成功")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
737
  print(f"{'='*70}\n")
738
 
739
  # Load images
@@ -807,13 +1070,14 @@ def run_model(
807
 
808
  # Segmentation processing
809
  segmented_glb = None
810
- if enable_segmentation and grounding_dino_model is not None:
811
  progress(0.65, desc="🎯 开始物体分割...")
812
  print(f"\n{'='*70}")
813
- print("🎯 开始物体分割...")
814
- print(f"🔍 检测提示词: {text_prompt[:100]}...")
815
- print(f"📊 置信度阈值: {GROUNDING_DINO_BOX_THRESHOLD}")
816
  print(f"📐 最小掩码面积: {MIN_MASK_AREA} px")
 
 
 
817
  print(f"{'='*70}\n")
818
 
819
  all_view_detections = []
@@ -829,23 +1093,46 @@ def run_model(
829
  else:
830
  ref_image_np = ref_image
831
 
832
- detections = run_grounding_dino_detection(ref_image_np, text_prompt, "cpu") # 使用CPU进行检测
833
- print(f" ✓ 检测到 {len(detections)} 个物体")
834
-
835
- if len(detections) > 0:
836
- for i, det in enumerate(detections):
837
- print(f" 物体 {i+1}: {det['label']} (置信度: {det['confidence']:.2f})")
838
- boxes = [d['bbox'] for d in detections]
839
- masks = run_sam_refinement(ref_image_np, boxes)
840
-
841
- points3d = world_points_list[view_idx]
842
-
843
- for det_idx, (det, mask) in enumerate(zip(detections, masks)):
844
- center_3d = compute_object_3d_center(points3d, mask)
845
- det['center_3d'] = center_3d
846
- det['mask_2d'] = mask
847
-
848
- all_view_detections.append(detections)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
849
  all_view_masks.append(masks)
850
  else:
851
  all_view_detections.append([])
 
89
  "resolution": 518,
90
  }
91
 
92
+ # ============ 分割模型配置 ============
93
+ # 方案选择:
94
+ # 1. "segformer" - SegFormer (最轻量,~14MB,最快)
95
+ # 2. "maskformer" - MaskFormer (中等,~100MB,实例分割)
96
+ # 3. "grounding_sam" - GroundingDINO + SAM (最强,~110MB,文本提示)
97
+
98
+ SEGMENTATION_METHOD = "segformer" # 默认使用最轻量的方案
99
+
100
+ # SegFormer Configuration (推荐 - CPU友好)
101
+ SEGFORMER_MODEL_ID = "nvidia/segformer-b0-finetuned-ade-512-512" # 14MB,150类物体
102
+
103
+ # MaskFormer Configuration (备选)
104
+ MASKFORMER_MODEL_ID = "facebook/maskformer-swin-tiny-ade" # 100MB,实例分割
105
+
106
+ # GroundingDINO + SAM Configuration (原方案 - 需要文本提示)
107
+ GROUNDING_DINO_MODEL_ID = "IDEA-Research/grounding-dino-tiny"
108
  GROUNDING_DINO_BOX_THRESHOLD = 0.25
109
  GROUNDING_DINO_TEXT_THRESHOLD = 0.2
110
+ SAM_MODEL_ID = "dhkim2810/MobileSAM"
111
+ USE_MOBILE_SAM = True
 
 
112
 
113
  DEFAULT_TEXT_PROMPT = "chair . table . sofa . bed . desk . cabinet"
114
 
 
163
  grounding_dino_processor = None
164
  sam_predictor = None
165
 
166
+ # SegFormer 模型(轻量级语义分割)
167
+ segformer_processor = None
168
+ segformer_model = None
169
+
170
+ # MaskFormer 模型(实例分割)
171
+ maskformer_processor = None
172
+ maskformer_model = None
173
+
174
 
175
  # ============================================================================
176
  # Model Loading Functions
177
  # ============================================================================
178
 
179
+ def load_segformer_model(device="cpu"):
180
+ """加载 SegFormer 模型(最轻量,CPU友好)"""
181
+ global segformer_processor, segformer_model
182
+
183
+ if segformer_model is not None:
184
+ print("✅ SegFormer already loaded")
185
+ return
186
+
187
+ try:
188
+ from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation
189
+ import os
190
+
191
+ print(f"📥 Loading SegFormer from HuggingFace: {SEGFORMER_MODEL_ID}")
192
+ print(f" 💡 SegFormer-B0: ~14MB, 150类物体, CPU优化")
193
+
194
+ cache_dir = os.getenv("HF_HOME", "./hf_cache")
195
+
196
+ print(f" 正在下载 processor...")
197
+ segformer_processor = SegformerImageProcessor.from_pretrained(
198
+ SEGFORMER_MODEL_ID,
199
+ cache_dir=cache_dir
200
+ )
201
+
202
+ print(f" 正在下载 model...")
203
+ segformer_model = SegformerForSemanticSegmentation.from_pretrained(
204
+ SEGFORMER_MODEL_ID,
205
+ cache_dir=cache_dir,
206
+ low_cpu_mem_usage=True
207
+ ).to(device).eval()
208
+
209
+ print(f"✅ SegFormer loaded successfully on {device.upper()}")
210
+ print(f" 可识别类别: 人、家具、墙壁、地板等150类")
211
+
212
+ except Exception as e:
213
+ print(f"❌ SegFormer loading failed: {type(e).__name__}: {e}")
214
+ import traceback
215
+ traceback.print_exc()
216
+
217
+
218
+ def load_maskformer_model(device="cpu"):
219
+ """加载 MaskFormer 模型(实例分割)"""
220
+ global maskformer_processor, maskformer_model
221
+
222
+ if maskformer_model is not None:
223
+ print("✅ MaskFormer already loaded")
224
+ return
225
+
226
+ try:
227
+ from transformers import MaskFormerImageProcessor, MaskFormerForInstanceSegmentation
228
+ import os
229
+
230
+ print(f"📥 Loading MaskFormer from HuggingFace: {MASKFORMER_MODEL_ID}")
231
+ print(f" 💡 MaskFormer: ~100MB, 实例分割")
232
+
233
+ cache_dir = os.getenv("HF_HOME", "./hf_cache")
234
+
235
+ print(f" 正在下载 processor...")
236
+ maskformer_processor = MaskFormerImageProcessor.from_pretrained(
237
+ MASKFORMER_MODEL_ID,
238
+ cache_dir=cache_dir
239
+ )
240
+
241
+ print(f" 正在下载 model...")
242
+ maskformer_model = MaskFormerForInstanceSegmentation.from_pretrained(
243
+ MASKFORMER_MODEL_ID,
244
+ cache_dir=cache_dir,
245
+ low_cpu_mem_usage=True
246
+ ).to(device).eval()
247
+
248
+ print(f"✅ MaskFormer loaded successfully on {device.upper()}")
249
+
250
+ except Exception as e:
251
+ print(f"❌ MaskFormer loading failed: {type(e).__name__}: {e}")
252
+ import traceback
253
+ traceback.print_exc()
254
+
255
  def load_grounding_dino_model(device="cpu"):
256
  """Load GroundingDINO model from HuggingFace (CPU优化)"""
257
  global grounding_dino_model, grounding_dino_processor
 
262
 
263
  try:
264
  from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection
265
+ import os
266
 
267
  # 强制使用 CPU ���行分割(节省 GPU 资源)
268
  seg_device = "cpu"
269
  print(f"📥 Loading GroundingDINO from HuggingFace: {GROUNDING_DINO_MODEL_ID} (使用 {seg_device.upper()})")
270
+
271
+ # 设置缓存目录(HuggingFace Spaces友好)
272
+ cache_dir = os.getenv("HF_HOME", "./hf_cache")
273
+
274
+ # 加载模型(带重试和详细日志)
275
+ print(f" 正在下载 processor...")
276
+ grounding_dino_processor = AutoProcessor.from_pretrained(
277
+ GROUNDING_DINO_MODEL_ID,
278
+ cache_dir=cache_dir,
279
+ trust_remote_code=True # 允许运行远程代码
280
+ )
281
+
282
+ print(f" 正在下载 model...")
283
  grounding_dino_model = AutoModelForZeroShotObjectDetection.from_pretrained(
284
+ GROUNDING_DINO_MODEL_ID,
285
+ cache_dir=cache_dir,
286
+ trust_remote_code=True,
287
+ low_cpu_mem_usage=True # 降低CPU内存使用
288
  ).to(seg_device).eval()
289
 
290
  print(f"✅ GroundingDINO loaded successfully on {seg_device.upper()}")
291
 
292
+ except ImportError as e:
293
+ print(f"❌ ImportError: {e}")
294
+ print(f"💡 请检查 requirements.txt 是否包含 transformers 库")
295
+ import traceback
296
+ traceback.print_exc()
297
+ except OSError as e:
298
+ print(f"❌ OSError (网络/文件问题): {e}")
299
+ print(f"💡 可能是网络连接问题或模型仓库不可访问")
300
+ print(f"💡 尝试解决方案:")
301
+ print(f" 1. 检查 HuggingFace Spaces 的网络连接")
302
+ print(f" 2. 检查模型ID是否正确: {GROUNDING_DINO_MODEL_ID}")
303
+ print(f" 3. 确保有足够的磁盘空间")
304
+ import traceback
305
+ traceback.print_exc()
306
  except Exception as e:
307
+ print(f"❌ GroundingDINO loading failed: {type(e).__name__}: {e}")
308
  import traceback
309
  traceback.print_exc()
310
 
 
319
 
320
  try:
321
  from transformers import SamModel, SamProcessor
322
+ import os
323
 
324
  # 强制使用 CPU 进行分割(MobileSAM 专为移动设备/CPU优化)
325
  seg_device = "cpu"
326
  print(f"📥 Loading MobileSAM from HuggingFace: {SAM_MODEL_ID} (使用 {seg_device.upper()})")
327
  print(f" 💡 MobileSAM 是轻量级版本,比 SAM-huge 快60倍,只有10MB,适合CPU运行")
328
 
329
+ # 设置缓存目录
330
+ cache_dir = os.getenv("HF_HOME", "./hf_cache")
331
+
332
+ print(f" 正在下载 processor...")
333
+ sam_processor = SamProcessor.from_pretrained(
334
+ SAM_MODEL_ID,
335
+ cache_dir=cache_dir
336
+ )
337
+
338
+ print(f" 正在下载 model...")
339
+ sam_model = SamModel.from_pretrained(
340
+ SAM_MODEL_ID,
341
+ cache_dir=cache_dir,
342
+ low_cpu_mem_usage=True
343
+ ).to(seg_device).eval()
344
 
345
  # Wrap in a predictor-like interface
346
  class SAMPredictor:
 
384
  sam_predictor = SAMPredictor(sam_model, sam_processor, seg_device)
385
  print(f"✅ MobileSAM loaded successfully on {seg_device.upper()}")
386
 
387
+ except ImportError as e:
388
+ print(f"❌ ImportError: {e}")
389
+ print(f"💡 请检查 requirements.txt 是否包含 transformers 库")
390
+ import traceback
391
+ traceback.print_exc()
392
+ except OSError as e:
393
+ print(f"❌ OSError (网络/文件问题): {e}")
394
+ print(f"💡 可能是网络连接问题或模型仓库不可访问")
395
+ print(f"💡 尝试解决方案:")
396
+ print(f" 1. 检查 HuggingFace Spaces 的网络连接")
397
+ print(f" 2. 检查模型ID是否正确: {SAM_MODEL_ID}")
398
+ print(f" 3. 确保有足够的磁盘空间")
399
+ import traceback
400
+ traceback.print_exc()
401
  except Exception as e:
402
+ print(f"❌ SAM loading failed: {type(e).__name__}: {e}")
 
403
  import traceback
404
  traceback.print_exc()
405
 
 
424
  return colors
425
 
426
 
427
+ # ============================================================================
428
+ # SegFormer 分割函数(简化方案)
429
+ # ============================================================================
430
+
431
+ def run_segformer_segmentation(image_np, device="cpu"):
432
+ """使用 SegFormer 进行语义分割(最简单,CPU友好)"""
433
+ if segformer_model is None or segformer_processor is None:
434
+ print("❌ SegFormer model not loaded")
435
+ return []
436
+
437
+ try:
438
+ import torch
439
+ from PIL import Image
440
+
441
+ # 准备图片
442
+ if image_np.dtype != np.uint8:
443
+ image_np = (image_np * 255).astype(np.uint8)
444
+ image_pil = Image.fromarray(image_np)
445
+
446
+ # 推理
447
+ inputs = segformer_processor(images=image_pil, return_tensors="pt")
448
+ inputs = {k: v.to(device) for k, v in inputs.items()}
449
+
450
+ with torch.no_grad():
451
+ outputs = segformer_model(**inputs)
452
+
453
+ # 获取分割结果
454
+ logits = outputs.logits # (1, num_classes, H, W)
455
+ predicted_segmentation = logits.argmax(dim=1).squeeze().cpu().numpy()
456
+
457
+ # 生成实例掩码(将相同类别的连续区域分开)
458
+ from scipy import ndimage
459
+
460
+ # ADE20K 常见类别映射(部分)
461
+ ade20k_labels = {
462
+ 5: "wall", 7: "floor", 11: "ceiling", 18: "window", 14: "door",
463
+ 19: "table", 20: "chair", 22: "sofa", 23: "bed", 28: "cabinet",
464
+ 34: "desk", 39: "lamp", 65: "television", 89: "shelf"
465
+ }
466
+
467
+ detections = []
468
+ masks = []
469
+
470
+ # 对每个类别提取实例
471
+ unique_labels = np.unique(predicted_segmentation)
472
+ for label_id in unique_labels:
473
+ if label_id == 0: # 跳过背景
474
+ continue
475
+
476
+ # 获取该类别的掩码
477
+ class_mask = (predicted_segmentation == label_id)
478
+
479
+ # 分离连通区域(不同实例)
480
+ labeled_mask, num_features = ndimage.label(class_mask)
481
+
482
+ for instance_id in range(1, num_features + 1):
483
+ instance_mask = (labeled_mask == instance_id)
484
+ mask_area = instance_mask.sum()
485
+
486
+ # 过滤小区域
487
+ if mask_area < MIN_MASK_AREA:
488
+ continue
489
+
490
+ # 计算边界框
491
+ rows, cols = np.where(instance_mask)
492
+ if len(rows) == 0:
493
+ continue
494
+
495
+ y_min, y_max = rows.min(), rows.max()
496
+ x_min, x_max = cols.min(), cols.max()
497
+ bbox = [x_min, y_min, x_max, y_max]
498
+
499
+ # 获取类别名称
500
+ label_name = ade20k_labels.get(int(label_id), f"object_{label_id}")
501
+
502
+ detections.append({
503
+ 'bbox': bbox,
504
+ 'label': label_name,
505
+ 'confidence': 0.9, # SegFormer 不提供置信度,给固定值
506
+ 'class_id': int(label_id)
507
+ })
508
+ masks.append(instance_mask)
509
+
510
+ return detections, masks
511
+
512
+ except Exception as e:
513
+ print(f"❌ SegFormer segmentation failed: {e}")
514
+ import traceback
515
+ traceback.print_exc()
516
+ return [], []
517
+
518
+
519
  def run_grounding_dino_detection(image_np, text_prompt, device="cpu"):
520
  """Run GroundingDINO detection (CPU优化)"""
521
  if grounding_dino_model is None or grounding_dino_processor is None:
 
965
  if enable_segmentation:
966
  progress(0.1, desc="🎯 加载分割模型 (CPU)...")
967
  print(f"\n{'='*70}")
968
+ print(f"🎯 分割模型加载开始... (方案: {SEGMENTATION_METHOD})")
969
  print(f"{'='*70}")
970
+
971
+ if SEGMENTATION_METHOD == "segformer":
972
+ # 方案1: SegFormer (最轻量,~14MB,最快)
973
+ print("📌 使用方案: SegFormer (轻量级,无需文本提示)")
974
+ load_segformer_model("cpu")
975
+ if segformer_model is None:
976
+ print(" SegFormer 模型加载失败!")
977
+ raise RuntimeError("SegFormer 模型加载失败,请检查网络连接")
978
+
979
+ elif SEGMENTATION_METHOD == "maskformer":
980
+ # 方案2: MaskFormer (中等,~100MB)
981
+ print("📌 使用方案: MaskFormer (实例分割)")
982
+ load_maskformer_model("cpu")
983
+ if maskformer_model is None:
984
+ print("❌ MaskFormer 模型加载失败!")
985
+ raise RuntimeError("MaskFormer 模型加载失败,请检查网络连接")
986
+
987
+ else: # "grounding_sam"
988
+ # 方案3: GroundingDINO + SAM (最强,~110MB,需要文本提示)
989
+ print("📌 使用方案: GroundingDINO + SAM (文本提示驱动)")
990
+ load_grounding_dino_model("cpu")
991
+ load_sam_model("cpu")
992
+ if grounding_dino_model is None:
993
+ print("❌ GroundingDINO 模型加载失败!")
994
+ raise RuntimeError("GroundingDINO 模型加载失败,请检查网络连接")
995
+ if sam_predictor is None:
996
+ print("❌ SAM 模型加载失败!")
997
+ raise RuntimeError("SAM 模型加载失败,请检查网络连接")
998
+
999
+ print(f"✅ 分割模型加载成功")
1000
  print(f"{'='*70}\n")
1001
 
1002
  # Load images
 
1070
 
1071
  # Segmentation processing
1072
  segmented_glb = None
1073
+ if enable_segmentation:
1074
  progress(0.65, desc="🎯 开始物体分割...")
1075
  print(f"\n{'='*70}")
1076
+ print(f"🎯 开始物体分割... (方案: {SEGMENTATION_METHOD})")
 
 
1077
  print(f"📐 最小掩码面积: {MIN_MASK_AREA} px")
1078
+ if SEGMENTATION_METHOD == "grounding_sam":
1079
+ print(f"🔍 检测提示词: {text_prompt[:100]}...")
1080
+ print(f"📊 置信度阈值: {GROUNDING_DINO_BOX_THRESHOLD}")
1081
  print(f"{'='*70}\n")
1082
 
1083
  all_view_detections = []
 
1093
  else:
1094
  ref_image_np = ref_image
1095
 
1096
+ # 根据分割方法选择不同的处理流程
1097
+ if SEGMENTATION_METHOD == "segformer":
1098
+ # SegFormer: 直接语义分割,无需文本提示
1099
+ detections, masks = run_segformer_segmentation(ref_image_np, "cpu")
1100
+ print(f" ✓ 检测到 {len(detections)} 个物体")
1101
+
1102
+ if len(detections) > 0:
1103
+ for i, det in enumerate(detections):
1104
+ print(f" 物体 {i+1}: {det['label']}")
1105
+
1106
+ points3d = world_points_list[view_idx]
1107
+ for det_idx, (det, mask) in enumerate(zip(detections, masks)):
1108
+ center_3d = compute_object_3d_center(points3d, mask)
1109
+ det['center_3d'] = center_3d
1110
+ det['mask_2d'] = mask
1111
+
1112
+ all_view_detections.append(detections)
1113
+ all_view_masks.append(masks)
1114
+ else:
1115
+ all_view_detections.append([])
1116
+ all_view_masks.append([])
1117
+
1118
+ elif SEGMENTATION_METHOD == "grounding_sam":
1119
+ # GroundingDINO + SAM: 文本提示驱动
1120
+ detections = run_grounding_dino_detection(ref_image_np, text_prompt, "cpu")
1121
+ print(f" ✓ 检测到 {len(detections)} 个物体")
1122
+
1123
+ if len(detections) > 0:
1124
+ for i, det in enumerate(detections):
1125
+ print(f" 物体 {i+1}: {det['label']} (置信度: {det['confidence']:.2f})")
1126
+ boxes = [d['bbox'] for d in detections]
1127
+ masks = run_sam_refinement(ref_image_np, boxes)
1128
+
1129
+ points3d = world_points_list[view_idx]
1130
+ for det_idx, (det, mask) in enumerate(zip(detections, masks)):
1131
+ center_3d = compute_object_3d_center(points3d, mask)
1132
+ det['center_3d'] = center_3d
1133
+ det['mask_2d'] = mask
1134
+
1135
+ all_view_detections.append(detections)
1136
  all_view_masks.append(masks)
1137
  else:
1138
  all_view_detections.append([])