###################################################################### # Copyright (c) 2022 OpenPerceptionX. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ###################################################################### ###################################################################### # This file includes concrete implementation for different data augmentation # methods in transforms.py. ###################################################################### from typing import List, Tuple import cv2 import numpy as np # Available interpolation modes (opencv) cv2_interp_codes = { 'nearest': cv2.INTER_NEAREST, 'bilinear': cv2.INTER_LINEAR, 'bicubic': cv2.INTER_CUBIC, 'area': cv2.INTER_AREA, 'lanczos': cv2.INTER_LANCZOS4 } def scale_image_multiple_view( imgs: List[np.ndarray], cam_intrinsics: List[np.ndarray], # cam_extrinsics: List[np.ndarray], lidar2img: List[np.ndarray], rand_scale: float, interpolation='bilinear' ) -> Tuple[List[np.ndarray], List[np.ndarray], List[np.ndarray]]: """Resize the multiple-view images with the same scale selected randomly. Notably used in :class:`.transforms.RandomScaleImageMultiViewImage_naive Args: imgs (list of numpy.array): Multiple-view images to be resized. len(img) is the number of cameras. img shape: [H, W, 3]. cam_intrinsics (list of numpy.array): Intrinsic parameters of different cameras. Transformations from camera to image. len(cam_intrinsics) is the number of camera. For each camera, shape is 4 * 4. cam_extrinsics (list of numpy.array): Extrinsic parameters of different cameras. Transformations from lidar to cameras. len(cam_extrinsics) is the number of camera. For each camera, shape is 4 * 4. lidar2img (list of numpy.array): Transformations from lidar to images. len(lidar2img) is the number of camera. For each camera, shape is 4 * 4. rand_scale (float): resize ratio interpolation (string): mode for interpolation in opencv. Returns: imgs_new (list of numpy.array): Updated multiple-view images cam_intrinsics_new (list of numpy.array): Updated intrinsic parameters of different cameras. lidar2img_new (list of numpy.array): Updated Transformations from lidar to images. """ y_size = [int(img.shape[0] * rand_scale) for img in imgs] x_size = [int(img.shape[1] * rand_scale) for img in imgs] scale_factor = np.eye(4) scale_factor[0, 0] *= rand_scale scale_factor[1, 1] *= rand_scale imgs_new = [ cv2.resize( img, (x_size[idx], y_size[idx]), interpolation=cv2_interp_codes[interpolation]) for idx, img in enumerate(imgs) ] cam_intrinsics_new = [ scale_factor @ cam_intrinsic for cam_intrinsic in cam_intrinsics ] lidar2img_new = [scale_factor @ l2i for l2i in lidar2img] return imgs_new, cam_intrinsics_new, lidar2img_new def horizontal_flip_image_multiview( imgs: List[np.ndarray]) -> List[np.ndarray]: """Flip every image horizontally. Args: imgs (list of numpy.array): Multiple-view images to be resized. len(img) is the number of cameras. img shape: [H, W, 3]. Returns: imgs_new (list of numpy.array): Flippd multiple-view images """ imgs_new = [np.flip(img, axis=1) for img in imgs] return imgs_new def vertical_flip_image_multiview(imgs: List[np.ndarray]) -> List[np.ndarray]: """Flip every image vertically. Args: imgs (list of numpy.array): Multiple-view images to be resized. len(img) is the number of cameras. img shape: [H, W, 3]. Returns: imgs_new (list of numpy.array): Flippd multiple-view images """ imgs_new = [np.flip(img, axis=0) for img in imgs] return imgs_new def horizontal_flip_bbox(bboxes_3d: np.ndarray, dataset: str) -> np.ndarray: """Flip bounding boxes horizontally. Args: bboxes_3d (np.ndarray): bounding boxes of shape [N * 7], N is the number of objects. dataset (string): 'waymo' coordinate system or 'nuscenes' coordinate system. Returns: bboxes_3d (numpy.array): Flippd bounding boxes. """ if dataset == 'nuScenes': bboxes_3d.tensor[:, 0::7] = -bboxes_3d.tensor[:, 0::7] bboxes_3d.tensor[:, 6] = -bboxes_3d.tensor[:, 6] # + np.pi elif dataset == 'waymo': bboxes_3d[:, 1::7] = -bboxes_3d[:, 1::7] bboxes_3d[:, 6] = -bboxes_3d[:, 6] + np.pi return bboxes_3d def horizontal_flip_cam_params( img_shape: np.ndarray, cam_intrinsics: List[np.ndarray], cam_extrinsics: List[np.ndarray], lidar2imgs: List[np.ndarray], dataset: str ) -> Tuple[List[np.ndarray], List[np.ndarray], List[np.ndarray]]: """Flip camera parameters horizontally. Args: img_shape (numpy.array) of shape [3]. cam_intrinsics (list of numpy.array): Intrinsic parameters of different cameras. Transformations from camera to image. len(cam_intrinsics) is the number of camera. For each camera, shape is 4 * 4. cam_extrinsics (list of numpy.array): Extrinsic parameters of different cameras. Transformations from lidar to cameras. len(cam_extrinsics) is the number of camera. For each camera, shape is 4 * 4. lidar2img (list of numpy.array): Transformations from lidar to images. len(lidar2img) is the number of camera. For each camera, shape is 4 * 4. dataset (string): Specify 'waymo' coordinate system or 'nuscenes' coordinate system. Returns: cam_intrinsics (list of numpy.array): Updated intrinsic parameters of different cameras. cam_extrinsics (list of numpy.array): Updated extrinsic parameters of different cameras. lidar2img (list of numpy.array): Updated Transformations from lidar to images. """ flip_factor = np.eye(4) lidar2imgs = [] w = img_shape[1] if dataset == 'nuScenes': flip_factor[0, 0] = -1 cam_extrinsics = [l2c @ flip_factor for l2c in cam_extrinsics] for cam_intrinsic, l2c in zip(cam_intrinsics, cam_extrinsics): cam_intrinsic[0, 0] = -cam_intrinsic[0, 0] cam_intrinsic[0, 2] = w - cam_intrinsic[0, 2] lidar2imgs.append(cam_intrinsic @ l2c) elif dataset == 'waymo': flip_factor[1, 1] = -1 cam_extrinsics = [l2c @ flip_factor for l2c in cam_extrinsics] for cam_intrinsic, l2c in zip(cam_intrinsics, cam_extrinsics): cam_intrinsic[0, 0] = -cam_intrinsic[0, 0] cam_intrinsic[0, 2] = w - cam_intrinsic[0, 2] lidar2imgs.append(cam_intrinsic @ l2c) else: assert False return cam_intrinsics, cam_extrinsics, lidar2imgs def horizontal_flip_canbus(canbus: np.ndarray, dataset: str) -> np.ndarray: """Flip can bus horizontally. Args: canbus (numpy.ndarray) of shape [18,] dataset (string): 'waymo' or 'nuscenes' Returns: canbus_new (list of numpy.array): Flipped canbus. """ if dataset == 'nuScenes': # results['canbus'][1] = -results['canbus'][1] # flip location # results['canbus'][-2] = -results['canbus'][-2] # flip direction canbus[-1] = -canbus[-1] # flip direction elif dataset == 'waymo': # results['canbus'][1] = -results['canbus'][-1] # flip location # results['canbus'][-2] = -results['canbus'][-2] # flip direction canbus[-1] = -canbus[-1] # flip direction else: raise NotImplementedError((f'Not support {dataset} dataset')) return canbus