| | |
| | |
| |
|
| | |
| | |
| |
|
| | import torch |
| | import torch.nn as nn |
| | import torch.nn.functional as F |
| | from torchvision.transforms import Normalize, Resize, ToTensor |
| |
|
| |
|
| | class SAM2Transforms(nn.Module): |
| | def __init__( |
| | self, resolution, mask_threshold, max_hole_area=0.0, max_sprinkle_area=0.0 |
| | ): |
| | """ |
| | Transforms for SAM2. |
| | """ |
| | super().__init__() |
| | self.resolution = resolution |
| | self.mask_threshold = mask_threshold |
| | self.max_hole_area = max_hole_area |
| | self.max_sprinkle_area = max_sprinkle_area |
| | self.mean = [0.485, 0.456, 0.406] |
| | self.std = [0.229, 0.224, 0.225] |
| | self.to_tensor = ToTensor() |
| | self.transforms = torch.jit.script( |
| | nn.Sequential( |
| | Resize((self.resolution, self.resolution)), |
| | Normalize(self.mean, self.std), |
| | ) |
| | ) |
| |
|
| | def __call__(self, x): |
| | x = self.to_tensor(x) |
| | return self.transforms(x) |
| |
|
| | def forward_batch(self, img_list): |
| | img_batch = [self.transforms(self.to_tensor(img)) for img in img_list] |
| | img_batch = torch.stack(img_batch, dim=0) |
| | return img_batch |
| |
|
| | def transform_coords( |
| | self, coords: torch.Tensor, normalize=False, orig_hw=None |
| | ) -> torch.Tensor: |
| | """ |
| | Expects a torch tensor with length 2 in the last dimension. The coordinates can be in absolute image or normalized coordinates, |
| | If the coords are in absolute image coordinates, normalize should be set to True and original image size is required. |
| | |
| | Returns |
| | Un-normalized coordinates in the range of [0, 1] which is expected by the SAM2 model. |
| | """ |
| | if normalize: |
| | assert orig_hw is not None |
| | h, w = orig_hw |
| | coords = coords.clone() |
| | coords[..., 0] = coords[..., 0] / w |
| | coords[..., 1] = coords[..., 1] / h |
| |
|
| | coords = coords * self.resolution |
| | return coords |
| |
|
| | def transform_boxes( |
| | self, boxes: torch.Tensor, normalize=False, orig_hw=None |
| | ) -> torch.Tensor: |
| | """ |
| | Expects a tensor of shape Bx4. The coordinates can be in absolute image or normalized coordinates, |
| | if the coords are in absolute image coordinates, normalize should be set to True and original image size is required. |
| | """ |
| | boxes = self.transform_coords(boxes.reshape(-1, 2, 2), normalize, orig_hw) |
| | return boxes |
| |
|
| | def postprocess_masks(self, masks: torch.Tensor, orig_hw) -> torch.Tensor: |
| | """ |
| | Perform PostProcessing on output masks. |
| | """ |
| | from sam2.utils.misc import get_connected_components |
| |
|
| | masks = masks.float() |
| | if self.max_hole_area > 0: |
| | |
| | |
| | mask_flat = masks.flatten(0, 1).unsqueeze(1) |
| | labels, areas = get_connected_components(mask_flat <= self.mask_threshold) |
| | is_hole = (labels > 0) & (areas <= self.max_hole_area) |
| | is_hole = is_hole.reshape_as(masks) |
| | |
| | masks = torch.where(is_hole, self.mask_threshold + 10.0, masks) |
| |
|
| | if self.max_sprinkle_area > 0: |
| | labels, areas = get_connected_components(mask_flat > self.mask_threshold) |
| | is_hole = (labels > 0) & (areas <= self.max_sprinkle_area) |
| | is_hole = is_hole.reshape_as(masks) |
| | |
| | masks = torch.where(is_hole, self.mask_threshold - 10.0, masks) |
| |
|
| | masks = F.interpolate(masks, orig_hw, mode="bilinear", align_corners=False) |
| | return masks |
| |
|