import random import torch import torchvision.transforms.functional as F def resize(image, target, size, max_size=None): # size can be min_size (scalar) or (w, h) tuple def get_size_with_aspect_ratio(image_size, size, max_size=None): w, h = image_size if max_size is not None: min_original_size = float(min((w, h))) max_original_size = float(max((w, h))) if max_original_size / min_original_size * size > max_size: size = int(round(max_size * min_original_size / max_original_size)) if (w <= h and w == size) or (h <= w and h == size): return (h, w) if w < h: ow = size oh = int(size * h / w) else: oh = size ow = int(size * w / h) return (oh, ow) def get_size(image_size, size, max_size=None): if isinstance(size, (list, tuple)): return size[::-1] else: return get_size_with_aspect_ratio(image_size, size, max_size) size = get_size(image.size, size, max_size) rescaled_image = F.resize(image, size) if target is None: return rescaled_image, None ratios = tuple( float(s) / float(s_orig) for s, s_orig in zip(rescaled_image.size, image.size) ) ratio_width, ratio_height = ratios target = target.copy() if "exampler_box" in target: boxes = target["exampler_box"] if isinstance(boxes, torch.Tensor): scaled_boxes = boxes * torch.as_tensor( [ratio_width, ratio_height, ratio_width, ratio_height] ) target["exampler_box"] = scaled_boxes elif isinstance(boxes, dict): for k, v in boxes.items(): scaled_boxes = v * torch.as_tensor( [ratio_width, ratio_height, ratio_width, ratio_height] ) target["exampler_box"][k] = scaled_boxes if "demo_pos_exampler_box" in target: boxes = target["demo_pos_exampler_box"] scaled_boxes = boxes * torch.as_tensor( [ratio_width, ratio_height, ratio_width, ratio_height] ) target["demo_pos_exampler_box"] = scaled_boxes if "demo_neg_exampler_box" in target: boxes = target["demo_neg_exampler_box"] scaled_boxes = boxes * torch.as_tensor( [ratio_width, ratio_height, ratio_width, ratio_height] ) target["demo_neg_exampler_box"] = scaled_boxes if "boxes" in target: boxes = target["boxes"] scaled_boxes = boxes * torch.as_tensor( [ratio_width, ratio_height, ratio_width, ratio_height] ) target["boxes"] = scaled_boxes if "area" in target: area = target["area"] scaled_area = area * (ratio_width * ratio_height) target["area"] = scaled_area h, w = size target["size"] = torch.tensor([h, w]) return rescaled_image, target class RandomResize(object): def __init__(self, sizes, max_size=None): assert isinstance(sizes, (list, tuple)) self.sizes = sizes self.max_size = max_size def __call__(self, img, target=None): size = random.choice(self.sizes) return resize(img, target, size, self.max_size) class ToTensor(object): def __call__(self, img, target): return F.to_tensor(img), target class Normalize(object): def __init__(self, mean, std): self.mean = mean self.std = std def __call__(self, image, target=None): image = F.normalize(image, mean=self.mean, std=self.std) if target is None: return image, None target = target.copy() h, w = image.shape[-2:] return image, target class Compose(object): def __init__(self, transforms): self.transforms = transforms def __call__(self, image, target): for t in self.transforms: image, target = t(image, target) return image, target def __repr__(self): format_string = self.__class__.__name__ + "(" for t in self.transforms: format_string += "\n" format_string += " {0}".format(t) format_string += "\n)" return format_string