[Fix] fix some typo in transforms (#1831)

* [Fix] fix some typo according to review

* fix comment
pull/2133/head
liukuikun 2022-03-29 11:34:12 +08:00 committed by zhouzaida
parent e2ca0733c6
commit e7592a70f2
3 changed files with 46 additions and 36 deletions

View File

@ -111,7 +111,7 @@ class LoadAnnotation(BaseTransform):
}
]
# Filename of semantic or panoptic segmentation ground truth file.
'seg_map': 'a/b/c'
'seg_map_path': 'a/b/c'
}
After this module, the annotation has been changed to the format below:
@ -125,7 +125,7 @@ class LoadAnnotation(BaseTransform):
# In int type.
'gt_bboxes_labels': np.ndarray(N, )
# In uint8 type.
'gt_semantic_seg': np.ndarray (H, W)
'gt_seg_map': np.ndarray (H, W)
# in (x, y, v) order, float type.
'gt_keypoints': np.ndarray(N, NK, 3)
}
@ -138,13 +138,13 @@ class LoadAnnotation(BaseTransform):
- bbox_label
- keypoints (optional)
- seg_map (optional)
- seg_map_path (optional)
Added Keys:
- gt_bboxes
- gt_bboxes_labels
- gt_semantic_seg
- gt_seg_map
- gt_keypoints
Args:
@ -154,8 +154,8 @@ class LoadAnnotation(BaseTransform):
Defaults to True.
with_seg (bool): Whether to parse and load the semantic segmentation
annotation. Defaults to False.
with_kps (bool): Whether to parse and load the keypoints annotation.
Defaults to False.
with_keypoints (bool): Whether to parse and load the keypoints
annotation. Defaults to False.
imdecode_backend (str): The image decoding backend type. The backend
argument for :func:``mmcv.imfrombytes``.
See :fun:``mmcv.imfrombytes`` for details.
@ -170,7 +170,7 @@ class LoadAnnotation(BaseTransform):
with_bbox: bool = True,
with_label: bool = True,
with_seg: bool = False,
with_kps: bool = False,
with_keypoints: bool = False,
imdecode_backend: str = 'cv2',
file_client_args: dict = dict(backend='disk')
) -> None:
@ -178,7 +178,7 @@ class LoadAnnotation(BaseTransform):
self.with_bbox = with_bbox
self.with_label = with_label
self.with_seg = with_seg
self.with_kps = with_kps
self.with_keypoints = with_keypoints
self.imdecode_backend = imdecode_backend
self.file_client_args = file_client_args.copy()
self.file_client = mmcv.FileClient(**self.file_client_args)
@ -210,7 +210,7 @@ class LoadAnnotation(BaseTransform):
gt_bboxes_labels.append(instance['bbox_label'])
results['gt_bboxes_labels'] = np.array(gt_bboxes_labels)
def _load_semantic_seg(self, results: dict) -> None:
def _load_seg_map(self, results: dict) -> None:
"""Private function to load semantic segmentation annotations.
Args:
@ -220,8 +220,8 @@ class LoadAnnotation(BaseTransform):
dict: The dict contains loaded semantic segmentation annotations.
"""
img_bytes = self.file_client.get(results['seg_map'])
results['gt_semantic_seg'] = mmcv.imfrombytes(
img_bytes = self.file_client.get(results['seg_map_path'])
results['gt_seg_map'] = mmcv.imfrombytes(
img_bytes, flag='unchanged',
backend=self.imdecode_backend).squeeze()
@ -255,8 +255,8 @@ class LoadAnnotation(BaseTransform):
if self.with_label:
self._load_labels(results)
if self.with_seg:
self._load_semantic_seg(results)
if self.with_kps:
self._load_seg_map(results)
if self.with_keypoints:
self._load_kps(results)
return results
@ -265,7 +265,7 @@ class LoadAnnotation(BaseTransform):
repr_str += f'(with_bbox={self.with_bbox}, '
repr_str += f'with_label={self.with_label}, '
repr_str += f'with_seg={self.with_seg}, '
repr_str += f'with_kps={self.with_kps}, '
repr_str += f'with_keypoints={self.with_keypoints}, '
repr_str += f"imdecode_backend='{self.imdecode_backend}', "
repr_str += f'file_client_args={self.file_client_args})'
return repr_str

View File

@ -23,6 +23,10 @@ class Normalize(BaseTransform):
- img
Modified Keys:
- img
Added Keys:
- img_norm_cfg
@ -38,12 +42,12 @@ class Normalize(BaseTransform):
to_rgb (bool): Whether to convert the image from BGR to RGB before
normlizing the image. If ``to_rgb=True``, the order of mean and std
should be RGB. If ``to_rgb=False``, the order of mean and std
should be BGR. Defaults to True.
should be the same order of the image. Defaults to True.
"""
def __init__(self,
mean: Sequence[float],
std: Sequence[float],
mean: Sequence[Number],
std: Sequence[Number],
to_rgb: bool = True) -> None:
self.mean = np.array(mean, dtype=np.float32)
self.std = np.array(std, dtype=np.float32)
@ -120,7 +124,7 @@ class Resize(BaseTransform):
interpolation (str): Interpolation method, accepted values are
"nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2'
backend, "nearest", "bilinear" for 'pillow' backend. Defaults
to 'cv2'.
to 'bilinear'.
"""
def __init__(self,
@ -275,7 +279,6 @@ class Pad(BaseTransform):
Required Keys:
- img
- gt_bboxes (optional)
- gt_semantic_seg (optional)
Modified Keys:
@ -298,9 +301,15 @@ class Pad(BaseTransform):
None.
pad_to_square (bool): Whether to pad the image into a square.
Currently only used for YOLOX. Defaults to False.
pad_val (int or dict): A dict for padding value.
if ``type(pad_val) == int``, the val to pad seg is 255. Defaults to
``dict(img=0, seg=255)``.
pad_val (Number | dict[str, Number], optional) - Padding value for if
the pad_mode is "constant". If it is a single number, the value
to pad the image is the number and to pad the semantic
segmentation map is 255. If it is a dict, it should have the
following keys:
- img: The value to pad the image.
- seg: The value to pad the semantic segmentation map.
Defaults to dict(img=0, seg=255).
padding_mode (str): Type of padding. Should be: constant, edge,
reflect or symmetric. Defaults to 'constant'.
@ -321,7 +330,7 @@ class Pad(BaseTransform):
size: Optional[Tuple[int, int]] = None,
size_divisor: Optional[int] = None,
pad_to_square: bool = False,
pad_val: Union[int, dict] = dict(img=0, seg=255),
pad_val: Union[Number, dict] = dict(img=0, seg=255),
padding_mode: str = 'constant') -> None:
self.size = size
self.size_divisor = size_divisor
@ -938,7 +947,7 @@ class RandomMultiscaleResize(BaseTransform):
self.resize_cfg = resize_cfg
@staticmethod
def random_select(scales: List[Tuple]) -> Tuple[Number, int]:
def random_select(scales: List[Tuple]) -> Tuple[tuple, int]:
"""Randomly select an img_scale from given candidates.
Args:

View File

@ -50,7 +50,7 @@ class TestLoadAnnotation:
data_prefix = osp.join(osp.dirname(__file__), '../data')
seg_map = osp.join(data_prefix, 'grayscale.jpg')
cls.results = {
'seg_map':
'seg_map_path':
seg_map,
'instances': [{
'bbox': [0, 0, 10, 20],
@ -68,7 +68,7 @@ class TestLoadAnnotation:
with_bbox=True,
with_label=False,
with_seg=False,
with_kps=False,
with_keypoints=False,
)
results = transform(copy.deepcopy(self.results))
assert 'gt_bboxes' in results
@ -80,7 +80,7 @@ class TestLoadAnnotation:
with_bbox=False,
with_label=True,
with_seg=False,
with_kps=False,
with_keypoints=False,
)
results = transform(copy.deepcopy(self.results))
assert 'gt_bboxes_labels' in results
@ -91,7 +91,7 @@ class TestLoadAnnotation:
with_bbox=False,
with_label=False,
with_seg=False,
with_kps=True,
with_keypoints=True,
)
results = transform(copy.deepcopy(self.results))
assert 'gt_keypoints' in results
@ -103,20 +103,21 @@ class TestLoadAnnotation:
with_bbox=False,
with_label=False,
with_seg=True,
with_kps=False,
with_keypoints=False,
)
results = transform(copy.deepcopy(self.results))
assert 'gt_semantic_seg' in results
assert results['gt_semantic_seg'].shape[:2] == (300, 400)
assert 'gt_seg_map' in results
assert results['gt_seg_map'].shape[:2] == (300, 400)
def test_repr(self):
transform = LoadAnnotation(
with_bbox=True,
with_label=False,
with_seg=False,
with_kps=False,
with_keypoints=False,
)
assert repr(transform) == ('LoadAnnotation(with_bbox=True, '
'with_label=False, with_seg=False, '
"with_kps=False, imdecode_backend='cv2', "
"file_client_args={'backend': 'disk'})")
assert repr(transform) == (
'LoadAnnotation(with_bbox=True, '
'with_label=False, with_seg=False, '
"with_keypoints=False, imdecode_backend='cv2', "
"file_client_args={'backend': 'disk'})")