|
|
from typing import List, Optional, Tuple |
|
|
|
|
|
import cv2 |
|
|
import numpy as np |
|
|
import numpy.typing as npt |
|
|
from PIL import ImageColor |
|
|
import matplotlib.pyplot as plt |
|
|
from pyquaternion import Quaternion |
|
|
|
|
|
from navsim.common.dataclasses import Camera, Lidar, Annotations |
|
|
from navsim.common.enums import LidarIndex, BoundingBoxIndex |
|
|
from navsim.visualization.config import AGENT_CONFIG |
|
|
from navsim.visualization.lidar import filter_lidar_pc, get_lidar_pc_color |
|
|
from navsim.planning.scenario_builder.navsim_scenario_utils import tracked_object_types |
|
|
|
|
|
|
|
|
def add_camera_ax(ax: plt.Axes, camera: Camera) -> plt.Axes: |
|
|
""" |
|
|
Adds camera image to matplotlib ax object |
|
|
:param ax: matplotlib ax object |
|
|
:param camera: navsim camera dataclass |
|
|
:return: ax object with image |
|
|
""" |
|
|
ax.imshow(camera.image) |
|
|
return ax |
|
|
|
|
|
|
|
|
def add_lidar_to_camera_ax(ax: plt.Axes, camera: Camera, lidar: Lidar) -> plt.Axes: |
|
|
""" |
|
|
Adds camera image with lidar point cloud on matplotlib ax object |
|
|
:param ax: matplotlib ax object |
|
|
:param camera: navsim camera dataclass |
|
|
:param lidar: navsim lidar dataclass |
|
|
:return: ax object with image |
|
|
""" |
|
|
|
|
|
image, lidar_pc = camera.image.copy(), lidar.lidar_pc.copy() |
|
|
image_height, image_width = image.shape[:2] |
|
|
|
|
|
lidar_pc = filter_lidar_pc(lidar_pc) |
|
|
lidar_pc_colors = np.array(get_lidar_pc_color(lidar_pc)) |
|
|
|
|
|
pc_in_cam, pc_in_fov_mask = _transform_pcs_to_images( |
|
|
lidar_pc, |
|
|
camera.sensor2lidar_rotation, |
|
|
camera.sensor2lidar_translation, |
|
|
camera.intrinsics, |
|
|
img_shape=(image_height, image_width), |
|
|
) |
|
|
|
|
|
for (x, y), color in zip(pc_in_cam[pc_in_fov_mask], lidar_pc_colors[pc_in_fov_mask]): |
|
|
color = (int(color[0]), int(color[1]), int(color[2])) |
|
|
cv2.circle(image, (int(x), int(y)), 5, color, -1) |
|
|
|
|
|
ax.imshow(image) |
|
|
return ax |
|
|
|
|
|
|
|
|
def add_annotations_to_camera_ax(ax: plt.Axes, camera: Camera, annotations: Annotations) -> plt.Axes: |
|
|
""" |
|
|
Adds camera image with bounding boxes on matplotlib ax object |
|
|
:param ax: matplotlib ax object |
|
|
:param camera: navsim camera dataclass |
|
|
:param annotations: navsim annotations dataclass |
|
|
:return: ax object with image |
|
|
""" |
|
|
|
|
|
box_labels = annotations.names |
|
|
boxes = _transform_annotations_to_camera( |
|
|
annotations.boxes, |
|
|
camera.sensor2lidar_rotation, |
|
|
camera.sensor2lidar_translation, |
|
|
) |
|
|
box_positions, box_dimensions, box_heading = ( |
|
|
boxes[:, BoundingBoxIndex.POSITION], |
|
|
boxes[:, BoundingBoxIndex.DIMENSION], |
|
|
boxes[:, BoundingBoxIndex.HEADING], |
|
|
) |
|
|
corners_norm = np.stack(np.unravel_index(np.arange(8), [2] * 3), axis=1) |
|
|
corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]] |
|
|
corners_norm = corners_norm - np.array([0.5, 0.5, 0.5]) |
|
|
corners = box_dimensions.reshape([-1, 1, 3]) * corners_norm.reshape([1, 8, 3]) |
|
|
corners = _rotation_3d_in_axis(corners, box_heading, axis=1) |
|
|
corners += box_positions.reshape(-1, 1, 3) |
|
|
|
|
|
|
|
|
box_corners, corners_pc_in_fov = _transform_points_to_image(corners.reshape(-1, 3), camera.intrinsics) |
|
|
box_corners = box_corners.reshape(-1, 8, 2) |
|
|
corners_pc_in_fov = corners_pc_in_fov.reshape(-1, 8) |
|
|
valid_corners = corners_pc_in_fov.any(-1) |
|
|
|
|
|
box_corners, box_labels = box_corners[valid_corners], box_labels[valid_corners] |
|
|
image = _plot_rect_3d_on_img(camera.image.copy(), box_corners, box_labels) |
|
|
|
|
|
ax.imshow(image) |
|
|
return ax |
|
|
|
|
|
|
|
|
def _transform_annotations_to_camera( |
|
|
boxes: npt.NDArray[np.float32], |
|
|
sensor2lidar_rotation: npt.NDArray[np.float32], |
|
|
sensor2lidar_translation: npt.NDArray[np.float32], |
|
|
) -> npt.NDArray[np.float32]: |
|
|
""" |
|
|
Helper function to transform bounding boxes into camera frame |
|
|
TODO: Refactor |
|
|
:param boxes: array representation of bounding boxes |
|
|
:param sensor2lidar_rotation: camera rotation |
|
|
:param sensor2lidar_translation: camera translation |
|
|
:return: bounding boxes in camera coordinates |
|
|
""" |
|
|
|
|
|
locs, rots = ( |
|
|
boxes[:, BoundingBoxIndex.POSITION], |
|
|
boxes[:, BoundingBoxIndex.HEADING :], |
|
|
) |
|
|
dims_cam = boxes[ |
|
|
:, [BoundingBoxIndex.LENGTH, BoundingBoxIndex.HEIGHT, BoundingBoxIndex.WIDTH] |
|
|
] |
|
|
|
|
|
rots_cam = np.zeros_like(rots) |
|
|
for idx, rot in enumerate(rots): |
|
|
rot = Quaternion(axis=[0, 0, 1], radians=rot) |
|
|
rot = Quaternion(matrix=sensor2lidar_rotation).inverse * rot |
|
|
rots_cam[idx] = -rot.yaw_pitch_roll[0] |
|
|
|
|
|
lidar2cam_r = np.linalg.inv(sensor2lidar_rotation) |
|
|
lidar2cam_t = sensor2lidar_translation @ lidar2cam_r.T |
|
|
lidar2cam_rt = np.eye(4) |
|
|
lidar2cam_rt[:3, :3] = lidar2cam_r.T |
|
|
lidar2cam_rt[3, :3] = -lidar2cam_t |
|
|
|
|
|
locs_cam = np.concatenate([locs, np.ones_like(locs)[:, :1]], -1) |
|
|
locs_cam = lidar2cam_rt.T @ locs_cam.T |
|
|
locs_cam = locs_cam.T |
|
|
locs_cam = locs_cam[:, :-1] |
|
|
return np.concatenate([locs_cam, dims_cam, rots_cam], -1) |
|
|
|
|
|
|
|
|
def _rotation_3d_in_axis(points: npt.NDArray[np.float32], angles: npt.NDArray[np.float32], axis: int = 0): |
|
|
""" |
|
|
Rotate 3D points by angles according to axis. |
|
|
TODO: Refactor |
|
|
:param points: array of points |
|
|
:param angles: array of angles |
|
|
:param axis: axis to perform rotation, defaults to 0 |
|
|
:raises value: _description_ |
|
|
:raises ValueError: if axis invalid |
|
|
:return: rotated points |
|
|
""" |
|
|
rot_sin = np.sin(angles) |
|
|
rot_cos = np.cos(angles) |
|
|
ones = np.ones_like(rot_cos) |
|
|
zeros = np.zeros_like(rot_cos) |
|
|
if axis == 1: |
|
|
rot_mat_T = np.stack( |
|
|
[ |
|
|
np.stack([rot_cos, zeros, -rot_sin]), |
|
|
np.stack([zeros, ones, zeros]), |
|
|
np.stack([rot_sin, zeros, rot_cos]), |
|
|
] |
|
|
) |
|
|
elif axis == 2 or axis == -1: |
|
|
rot_mat_T = np.stack( |
|
|
[ |
|
|
np.stack([rot_cos, -rot_sin, zeros]), |
|
|
np.stack([rot_sin, rot_cos, zeros]), |
|
|
np.stack([zeros, zeros, ones]), |
|
|
] |
|
|
) |
|
|
elif axis == 0: |
|
|
rot_mat_T = np.stack( |
|
|
[ |
|
|
np.stack([zeros, rot_cos, -rot_sin]), |
|
|
np.stack([zeros, rot_sin, rot_cos]), |
|
|
np.stack([ones, zeros, zeros]), |
|
|
] |
|
|
) |
|
|
else: |
|
|
raise ValueError(f"axis should in range [0, 1, 2], got {axis}") |
|
|
return np.einsum("aij,jka->aik", points, rot_mat_T) |
|
|
|
|
|
|
|
|
def _plot_rect_3d_on_img( |
|
|
image: npt.NDArray[np.float32], |
|
|
box_corners: npt.NDArray[np.float32], |
|
|
box_labels: List[str], |
|
|
thickness: int = 3, |
|
|
) -> npt.NDArray[np.uint8]: |
|
|
""" |
|
|
Plot the boundary lines of 3D rectangular on 2D images. |
|
|
TODO: refactor |
|
|
:param image: The numpy array of image. |
|
|
:param box_corners: Coordinates of the corners of 3D, shape of [N, 8, 2]. |
|
|
:param box_labels: labels of boxes for coloring |
|
|
:param thickness: pixel width of liens, defaults to 3 |
|
|
:return: image with 3D bounding boxes |
|
|
""" |
|
|
line_indices = ( |
|
|
(0, 1), |
|
|
(0, 3), |
|
|
(0, 4), |
|
|
(1, 2), |
|
|
(1, 5), |
|
|
(3, 2), |
|
|
(3, 7), |
|
|
(4, 5), |
|
|
(4, 7), |
|
|
(2, 6), |
|
|
(5, 6), |
|
|
(6, 7), |
|
|
) |
|
|
for i in range(len(box_corners)): |
|
|
layer = tracked_object_types[box_labels[i]] |
|
|
color = ImageColor.getcolor(AGENT_CONFIG[layer]["fill_color"], "RGB") |
|
|
corners = box_corners[i].astype(np.int) |
|
|
for start, end in line_indices: |
|
|
cv2.line( |
|
|
image, |
|
|
(corners[start, 0], corners[start, 1]), |
|
|
(corners[end, 0], corners[end, 1]), |
|
|
color, |
|
|
thickness, |
|
|
cv2.LINE_AA, |
|
|
) |
|
|
return image.astype(np.uint8) |
|
|
|
|
|
|
|
|
def _transform_points_to_image( |
|
|
points: npt.NDArray[np.float32], |
|
|
intrinsic: npt.NDArray[np.float32], |
|
|
image_shape: Optional[Tuple[int, int]] = None, |
|
|
eps: float = 1e-3, |
|
|
) -> Tuple[npt.NDArray[np.float32], npt.NDArray[np.bool_]]: |
|
|
""" |
|
|
Transforms points in camera frame to image pixel coordinates |
|
|
TODO: refactor |
|
|
:param points: points in camera frame |
|
|
:param intrinsic: camera intrinsics |
|
|
:param image_shape: shape of image in pixel |
|
|
:param eps: lower threshold of points, defaults to 1e-3 |
|
|
:return: points in pixel coordinates, mask of values in frame |
|
|
""" |
|
|
points = points[:, :3] |
|
|
|
|
|
viewpad = np.eye(4) |
|
|
viewpad[: intrinsic.shape[0], : intrinsic.shape[1]] = intrinsic |
|
|
|
|
|
pc_img = np.concatenate([points, np.ones_like(points)[:, :1]], -1) |
|
|
pc_img = viewpad @ pc_img.T |
|
|
pc_img = pc_img.T |
|
|
|
|
|
cur_pc_in_fov = pc_img[:, 2] > eps |
|
|
pc_img = pc_img[..., 0:2] / np.maximum(pc_img[..., 2:3], np.ones_like(pc_img[..., 2:3]) * eps) |
|
|
if image_shape is not None: |
|
|
img_h, img_w = image_shape |
|
|
cur_pc_in_fov = ( |
|
|
cur_pc_in_fov |
|
|
& (pc_img[:, 0] < (img_w - 1)) |
|
|
& (pc_img[:, 0] > 0) |
|
|
& (pc_img[:, 1] < (img_h - 1)) |
|
|
& (pc_img[:, 1] > 0) |
|
|
) |
|
|
return pc_img, cur_pc_in_fov |
|
|
|
|
|
|
|
|
def _transform_pcs_to_images( |
|
|
lidar_pc: npt.NDArray[np.float32], |
|
|
sensor2lidar_rotation: npt.NDArray[np.float32], |
|
|
sensor2lidar_translation: npt.NDArray[np.float32], |
|
|
intrinsic: npt.NDArray[np.float32], |
|
|
img_shape: Optional[Tuple[int, int]] = None, |
|
|
eps: float = 1e-3, |
|
|
) -> Tuple[npt.NDArray[np.float32], npt.NDArray[np.bool_]]: |
|
|
""" |
|
|
Transforms points in camera frame to image pixel coordinates |
|
|
TODO: refactor |
|
|
:param lidar_pc: lidar point cloud |
|
|
:param sensor2lidar_rotation: camera rotation |
|
|
:param sensor2lidar_translation: camera translation |
|
|
:param intrinsic: camera intrinsics |
|
|
:param img_shape: image shape in pixels, defaults to None |
|
|
:param eps: threshold for lidar pc height, defaults to 1e-3 |
|
|
:return: lidar pc in pixel coordinates, mask of values in frame |
|
|
""" |
|
|
pc_xyz = lidar_pc[LidarIndex.POSITION, :].T |
|
|
|
|
|
lidar2cam_r = np.linalg.inv(sensor2lidar_rotation) |
|
|
lidar2cam_t = sensor2lidar_translation @ lidar2cam_r.T |
|
|
lidar2cam_rt = np.eye(4) |
|
|
lidar2cam_rt[:3, :3] = lidar2cam_r.T |
|
|
lidar2cam_rt[3, :3] = -lidar2cam_t |
|
|
|
|
|
viewpad = np.eye(4) |
|
|
viewpad[: intrinsic.shape[0], : intrinsic.shape[1]] = intrinsic |
|
|
lidar2img_rt = viewpad @ lidar2cam_rt.T |
|
|
|
|
|
cur_pc_xyz = np.concatenate([pc_xyz, np.ones_like(pc_xyz)[:, :1]], -1) |
|
|
cur_pc_cam = lidar2img_rt @ cur_pc_xyz.T |
|
|
cur_pc_cam = cur_pc_cam.T |
|
|
cur_pc_in_fov = cur_pc_cam[:, 2] > eps |
|
|
cur_pc_cam = cur_pc_cam[..., 0:2] / np.maximum(cur_pc_cam[..., 2:3], np.ones_like(cur_pc_cam[..., 2:3]) * eps) |
|
|
|
|
|
if img_shape is not None: |
|
|
img_h, img_w = img_shape |
|
|
cur_pc_in_fov = ( |
|
|
cur_pc_in_fov |
|
|
& (cur_pc_cam[:, 0] < (img_w - 1)) |
|
|
& (cur_pc_cam[:, 0] > 0) |
|
|
& (cur_pc_cam[:, 1] < (img_h - 1)) |
|
|
& (cur_pc_cam[:, 1] > 0) |
|
|
) |
|
|
return cur_pc_cam, cur_pc_in_fov |
|
|
|