diff --git a/mmcv/transforms/loading.py b/mmcv/transforms/loading.py index 0c27ceb01..2fd0f4630 100644 --- a/mmcv/transforms/loading.py +++ b/mmcv/transforms/loading.py @@ -111,7 +111,7 @@ class LoadAnnotation(BaseTransform): } ] # Filename of semantic or panoptic segmentation ground truth file. - 'seg_map': 'a/b/c' + 'seg_map_path': 'a/b/c' } After this module, the annotation has been changed to the format below: @@ -125,7 +125,7 @@ class LoadAnnotation(BaseTransform): # In int type. 'gt_bboxes_labels': np.ndarray(N, ) # In uint8 type. - 'gt_semantic_seg': np.ndarray (H, W) + 'gt_seg_map': np.ndarray (H, W) # in (x, y, v) order, float type. 'gt_keypoints': np.ndarray(N, NK, 3) } @@ -138,13 +138,13 @@ class LoadAnnotation(BaseTransform): - bbox_label - keypoints (optional) - - seg_map (optional) + - seg_map_path (optional) Added Keys: - gt_bboxes - gt_bboxes_labels - - gt_semantic_seg + - gt_seg_map - gt_keypoints Args: @@ -154,8 +154,8 @@ class LoadAnnotation(BaseTransform): Defaults to True. with_seg (bool): Whether to parse and load the semantic segmentation annotation. Defaults to False. - with_kps (bool): Whether to parse and load the keypoints annotation. - Defaults to False. + with_keypoints (bool): Whether to parse and load the keypoints + annotation. Defaults to False. imdecode_backend (str): The image decoding backend type. The backend argument for :func:``mmcv.imfrombytes``. See :fun:``mmcv.imfrombytes`` for details. @@ -170,7 +170,7 @@ class LoadAnnotation(BaseTransform): with_bbox: bool = True, with_label: bool = True, with_seg: bool = False, - with_kps: bool = False, + with_keypoints: bool = False, imdecode_backend: str = 'cv2', file_client_args: dict = dict(backend='disk') ) -> None: @@ -178,7 +178,7 @@ class LoadAnnotation(BaseTransform): self.with_bbox = with_bbox self.with_label = with_label self.with_seg = with_seg - self.with_kps = with_kps + self.with_keypoints = with_keypoints self.imdecode_backend = imdecode_backend self.file_client_args = file_client_args.copy() self.file_client = mmcv.FileClient(**self.file_client_args) @@ -210,7 +210,7 @@ class LoadAnnotation(BaseTransform): gt_bboxes_labels.append(instance['bbox_label']) results['gt_bboxes_labels'] = np.array(gt_bboxes_labels) - def _load_semantic_seg(self, results: dict) -> None: + def _load_seg_map(self, results: dict) -> None: """Private function to load semantic segmentation annotations. Args: @@ -220,8 +220,8 @@ class LoadAnnotation(BaseTransform): dict: The dict contains loaded semantic segmentation annotations. """ - img_bytes = self.file_client.get(results['seg_map']) - results['gt_semantic_seg'] = mmcv.imfrombytes( + img_bytes = self.file_client.get(results['seg_map_path']) + results['gt_seg_map'] = mmcv.imfrombytes( img_bytes, flag='unchanged', backend=self.imdecode_backend).squeeze() @@ -255,8 +255,8 @@ class LoadAnnotation(BaseTransform): if self.with_label: self._load_labels(results) if self.with_seg: - self._load_semantic_seg(results) - if self.with_kps: + self._load_seg_map(results) + if self.with_keypoints: self._load_kps(results) return results @@ -265,7 +265,7 @@ class LoadAnnotation(BaseTransform): repr_str += f'(with_bbox={self.with_bbox}, ' repr_str += f'with_label={self.with_label}, ' repr_str += f'with_seg={self.with_seg}, ' - repr_str += f'with_kps={self.with_kps}, ' + repr_str += f'with_keypoints={self.with_keypoints}, ' repr_str += f"imdecode_backend='{self.imdecode_backend}', " repr_str += f'file_client_args={self.file_client_args})' return repr_str diff --git a/mmcv/transforms/processing.py b/mmcv/transforms/processing.py index ceeb1e768..183e5cbe2 100644 --- a/mmcv/transforms/processing.py +++ b/mmcv/transforms/processing.py @@ -23,6 +23,10 @@ class Normalize(BaseTransform): - img + Modified Keys: + + - img + Added Keys: - img_norm_cfg @@ -38,12 +42,12 @@ class Normalize(BaseTransform): to_rgb (bool): Whether to convert the image from BGR to RGB before normlizing the image. If ``to_rgb=True``, the order of mean and std should be RGB. If ``to_rgb=False``, the order of mean and std - should be BGR. Defaults to True. + should be the same order of the image. Defaults to True. """ def __init__(self, - mean: Sequence[float], - std: Sequence[float], + mean: Sequence[Number], + std: Sequence[Number], to_rgb: bool = True) -> None: self.mean = np.array(mean, dtype=np.float32) self.std = np.array(std, dtype=np.float32) @@ -120,7 +124,7 @@ class Resize(BaseTransform): interpolation (str): Interpolation method, accepted values are "nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2' backend, "nearest", "bilinear" for 'pillow' backend. Defaults - to 'cv2'. + to 'bilinear'. """ def __init__(self, @@ -275,7 +279,6 @@ class Pad(BaseTransform): Required Keys: - img - - gt_bboxes (optional) - gt_semantic_seg (optional) Modified Keys: @@ -298,9 +301,15 @@ class Pad(BaseTransform): None. pad_to_square (bool): Whether to pad the image into a square. Currently only used for YOLOX. Defaults to False. - pad_val (int or dict): A dict for padding value. - if ``type(pad_val) == int``, the val to pad seg is 255. Defaults to - ``dict(img=0, seg=255)``. + pad_val (Number | dict[str, Number], optional) - Padding value for if + the pad_mode is "constant". If it is a single number, the value + to pad the image is the number and to pad the semantic + segmentation map is 255. If it is a dict, it should have the + following keys: + + - img: The value to pad the image. + - seg: The value to pad the semantic segmentation map. + Defaults to dict(img=0, seg=255). padding_mode (str): Type of padding. Should be: constant, edge, reflect or symmetric. Defaults to 'constant'. @@ -321,7 +330,7 @@ class Pad(BaseTransform): size: Optional[Tuple[int, int]] = None, size_divisor: Optional[int] = None, pad_to_square: bool = False, - pad_val: Union[int, dict] = dict(img=0, seg=255), + pad_val: Union[Number, dict] = dict(img=0, seg=255), padding_mode: str = 'constant') -> None: self.size = size self.size_divisor = size_divisor @@ -938,7 +947,7 @@ class RandomMultiscaleResize(BaseTransform): self.resize_cfg = resize_cfg @staticmethod - def random_select(scales: List[Tuple]) -> Tuple[Number, int]: + def random_select(scales: List[Tuple]) -> Tuple[tuple, int]: """Randomly select an img_scale from given candidates. Args: diff --git a/tests/test_transforms/test_transforms_loading.py b/tests/test_transforms/test_transforms_loading.py index 3ee86a570..a7d3ad623 100644 --- a/tests/test_transforms/test_transforms_loading.py +++ b/tests/test_transforms/test_transforms_loading.py @@ -50,7 +50,7 @@ class TestLoadAnnotation: data_prefix = osp.join(osp.dirname(__file__), '../data') seg_map = osp.join(data_prefix, 'grayscale.jpg') cls.results = { - 'seg_map': + 'seg_map_path': seg_map, 'instances': [{ 'bbox': [0, 0, 10, 20], @@ -68,7 +68,7 @@ class TestLoadAnnotation: with_bbox=True, with_label=False, with_seg=False, - with_kps=False, + with_keypoints=False, ) results = transform(copy.deepcopy(self.results)) assert 'gt_bboxes' in results @@ -80,7 +80,7 @@ class TestLoadAnnotation: with_bbox=False, with_label=True, with_seg=False, - with_kps=False, + with_keypoints=False, ) results = transform(copy.deepcopy(self.results)) assert 'gt_bboxes_labels' in results @@ -91,7 +91,7 @@ class TestLoadAnnotation: with_bbox=False, with_label=False, with_seg=False, - with_kps=True, + with_keypoints=True, ) results = transform(copy.deepcopy(self.results)) assert 'gt_keypoints' in results @@ -103,20 +103,21 @@ class TestLoadAnnotation: with_bbox=False, with_label=False, with_seg=True, - with_kps=False, + with_keypoints=False, ) results = transform(copy.deepcopy(self.results)) - assert 'gt_semantic_seg' in results - assert results['gt_semantic_seg'].shape[:2] == (300, 400) + assert 'gt_seg_map' in results + assert results['gt_seg_map'].shape[:2] == (300, 400) def test_repr(self): transform = LoadAnnotation( with_bbox=True, with_label=False, with_seg=False, - with_kps=False, + with_keypoints=False, ) - assert repr(transform) == ('LoadAnnotation(with_bbox=True, ' - 'with_label=False, with_seg=False, ' - "with_kps=False, imdecode_backend='cv2', " - "file_client_args={'backend': 'disk'})") + assert repr(transform) == ( + 'LoadAnnotation(with_bbox=True, ' + 'with_label=False, with_seg=False, ' + "with_keypoints=False, imdecode_backend='cv2', " + "file_client_args={'backend': 'disk'})")