Spaces:
Sleeping
Sleeping
| """ | |
| common functions for image operations | |
| """ | |
| import cv2 | |
| import numpy as np | |
| def crop(img, center, crop_size): | |
| """ | |
| crop image around the given center, pad zeros for borders | |
| :param img: | |
| :param center: np array | |
| :param crop_size: np array or a float size of the resulting crop | |
| :return: a square crop around the center | |
| """ | |
| assert isinstance(img, np.ndarray) | |
| h, w = img.shape[:2] | |
| topleft = np.round(center - crop_size / 2).astype(int) | |
| bottom_right = np.round(center + crop_size / 2).astype(int) | |
| x1 = max(0, topleft[0]) | |
| y1 = max(0, topleft[1]) | |
| x2 = min(w - 1, bottom_right[0]) | |
| y2 = min(h - 1, bottom_right[1]) | |
| cropped = img[y1:y2, x1:x2] | |
| p1 = max(0, -topleft[0]) # padding in x, top | |
| p2 = max(0, -topleft[1]) # padding in y, top | |
| p3 = max(0, bottom_right[0] - w + 1) # padding in x, bottom | |
| p4 = max(0, bottom_right[1] - h + 1) # padding in y, bottom | |
| dim = len(img.shape) | |
| if dim == 3: | |
| padded = np.pad(cropped, [[p2, p4], [p1, p3], [0, 0]]) | |
| elif dim == 2: | |
| padded = np.pad(cropped, [[p2, p4], [p1, p3]]) | |
| else: | |
| raise NotImplemented | |
| return padded | |
| def resize(img, img_size, mode=cv2.INTER_LINEAR): | |
| """ | |
| resize image to the input | |
| :param img: | |
| :param img_size: (width, height) of the target image size | |
| :param mode: | |
| :return: | |
| """ | |
| h, w = img.shape[:2] | |
| load_ratio = 1.0 * w / h | |
| netin_ratio = 1.0 * img_size[0] / img_size[1] | |
| assert load_ratio == netin_ratio, "image aspect ration not matching, given image: {}, net input: {}".format( | |
| img.shape, img_size) | |
| resized = cv2.resize(img, img_size, interpolation=mode) | |
| return resized | |
| def masks2bbox(masks, threshold=127): | |
| """ | |
| :param masks: | |
| :param threshold: | |
| :return: bounding box corner coordinate | |
| """ | |
| mask_comb = np.zeros_like(masks[0], dtype=bool) | |
| for m in masks: | |
| mask_comb = mask_comb | (m > threshold) | |
| yid, xid = np.where(mask_comb) | |
| bmin = np.array([xid.min(), yid.min()]) | |
| bmax = np.array([xid.max(), yid.max()]) | |
| return bmin, bmax | |
| def compute_translation(crop_center, crop_size, is_behave=True, std_coverage=3.5): | |
| """ | |
| solve for an optimal translation that project gaussian in origin to the crop | |
| Parameters | |
| ---------- | |
| crop_center: (x, y) of the crop center | |
| crop_size: float, the size of the square crop | |
| std_coverage: which edge point should be projected back to the edge of the 2d crop | |
| Returns | |
| ------- | |
| the estimated translation | |
| """ | |
| x0, y0 = crop_center | |
| x1, y1 = x0 + crop_size/2, y0 | |
| x2, y2 = x0 - crop_size/2, y0 | |
| x3, y3 = x0, y0 + crop_size/2. | |
| # predefined kinect intrinsics | |
| if is_behave: | |
| fx = 979.7844 | |
| fy = 979.840 | |
| cx = 1018.952 | |
| cy = 779.486 | |
| else: | |
| # intercap camera | |
| fx, fy = 918.457763671875, 918.4373779296875 | |
| cx, cy = 956.9661865234375, 555.944580078125 | |
| # Construct the matrix | |
| # First two equations: origin (0, 0, 0) is projected to the crop center | |
| # Last two equations: edge point (std_coverage, 0, z) is projected to the edge of crop | |
| A = np.array([ | |
| [fx, 0, cx-x0, cx-x0], | |
| [0, fy, cy-y0, cy-y0], | |
| [fx, 0, fx-x1, 0], | |
| [0, fy, cy-y1, 0] | |
| ]) | |
| # b = np.array([0, 0, -3.5*fx, 0]).reshape((-1, 1)) # 3.5->half of 7.0 | |
| b = np.array([0, 0, -std_coverage * fx, 0]).reshape((-1, 1)) # 3.5->half of 7.0 | |
| x = np.matmul(np.linalg.inv(A), b) | |
| # A is always a full-rank matrix | |
| return x.flatten()[:3] | |