From e8d69cf2ff8a3a40fdb548872a044e1253a9987b Mon Sep 17 00:00:00 2001 From: yingfhu Date: Thu, 30 Jun 2022 09:15:15 +0000 Subject: [PATCH] [Fix] adjust default dataset path to empty string --- mmcls/datasets/base_dataset.py | 13 +++----- mmcls/datasets/cifar.py | 10 +++--- mmcls/datasets/cub.py | 2 +- mmcls/datasets/custom.py | 31 ++++++++----------- mmcls/datasets/imagenet.py | 32 +++++++++----------- mmcls/datasets/mnist.py | 10 +++--- mmcls/datasets/multi_label.py | 7 ++--- tests/test_data/test_datasets/test_common.py | 18 +++++------ 8 files changed, 56 insertions(+), 67 deletions(-) diff --git a/mmcls/datasets/base_dataset.py b/mmcls/datasets/base_dataset.py index dd0e9121..9e257726 100644 --- a/mmcls/datasets/base_dataset.py +++ b/mmcls/datasets/base_dataset.py @@ -38,10 +38,9 @@ class BaseDataset(_BaseDataset): ann_file (str): Annotation file path. metainfo (dict, optional): Meta information for dataset, such as class information. Defaults to None. - data_root (str, optional): The root directory for ``data_prefix`` and - ``ann_file``. Defaults to None. - data_prefix (str | dict, optional): Prefix for training data. Defaults - to None. + data_root (str): The root directory for ``data_prefix`` and + ``ann_file``. Defaults to ''. + data_prefix (str | dict): Prefix for training data. Defaults to ''. filter_cfg (dict, optional): Config for filter data. Defaults to None. indices (int or Sequence[int], optional): Support using first few data in annotation file to facilitate training/testing on a smaller @@ -74,8 +73,8 @@ class BaseDataset(_BaseDataset): def __init__(self, ann_file: str, metainfo: Optional[dict] = None, - data_root: Optional[str] = None, - data_prefix: Union[str, dict, None] = None, + data_root: str = '', + data_prefix: Union[str, dict] = '', filter_cfg: Optional[dict] = None, indices: Optional[Union[int, Sequence[int]]] = None, serialize_data: bool = True, @@ -86,8 +85,6 @@ class BaseDataset(_BaseDataset): classes: Union[str, Sequence[str], None] = None): if isinstance(data_prefix, str): data_prefix = dict(img_path=expanduser(data_prefix)) - elif data_prefix is None: - data_prefix = dict(img_path=None) ann_file = expanduser(ann_file) metainfo = self._compat_classes(metainfo, classes) diff --git a/mmcls/datasets/cifar.py b/mmcls/datasets/cifar.py index 4ff6539b..da4fe3dd 100644 --- a/mmcls/datasets/cifar.py +++ b/mmcls/datasets/cifar.py @@ -25,8 +25,8 @@ class CIFAR10(BaseDataset): It determines to use the training set or test set. metainfo (dict, optional): Meta information for dataset, such as categories information. Defaults to None. - data_root (str, optional): The root directory for ``data_prefix``. - Defaults to None. + data_root (str): The root directory for ``data_prefix``. + Defaults to ''. download (bool): Whether to download the dataset if not exists. Defaults to True. **kwargs: Other keyword arguments in :class:`BaseDataset`. @@ -58,7 +58,7 @@ class CIFAR10(BaseDataset): data_prefix: str, test_mode: bool, metainfo: Optional[dict] = None, - data_root: Optional[str] = None, + data_root: str = '', download: bool = True, **kwargs): self.download = download @@ -181,8 +181,8 @@ class CIFAR100(CIFAR10): It determines to use the training set or test set. metainfo (dict, optional): Meta information for dataset, such as categories information. Defaults to None. - data_root (str, optional): The root directory for ``data_prefix``. - Defaults to None. + data_root (str): The root directory for ``data_prefix``. + Defaults to ''. download (bool): Whether to download the dataset if not exists. Defaults to True. **kwargs: Other keyword arguments in :class:`BaseDataset`. diff --git a/mmcls/datasets/cub.py b/mmcls/datasets/cub.py index a6c40009..2db4511c 100644 --- a/mmcls/datasets/cub.py +++ b/mmcls/datasets/cub.py @@ -41,7 +41,7 @@ class CUB(BaseDataset): to use the training set or test set. ann_file (str, optional): Annotation file path, path relative to ``data_root``. Defaults to 'images.txt'. - data_prefix (str, optional): Prefix for iamges, path relative to + data_prefix (str): Prefix for iamges, path relative to ``data_root``. Defaults to 'images'. image_class_labels_file (str, optional): The label file, path relative to ``data_root``. Defaults to 'image_class_labels.txt'. diff --git a/mmcls/datasets/custom.py b/mmcls/datasets/custom.py index 65f827f6..883eaff7 100644 --- a/mmcls/datasets/custom.py +++ b/mmcls/datasets/custom.py @@ -124,13 +124,12 @@ class CustomDataset(BaseDataset): first way, otherwise, try the second way. Args: - ann_file (str, optional): Annotation file path. Defaults to None. + ann_file (str): Annotation file path. Defaults to ''. metainfo (dict, optional): Meta information for dataset, such as class information. Defaults to None. - data_root (str, optional): The root directory for ``data_prefix`` and - ``ann_file``. Defaults to None. - data_prefix (str | dict, optional): Prefix for the data. Defaults - to None. + data_root (str): The root directory for ``data_prefix`` and + ``ann_file``. Defaults to ''. + data_prefix (str | dict): Prefix for the data. Defaults to ''. extensions (Sequence[str]): A sequence of allowed extensions. Defaults to ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif'). lazy_init (bool): Whether to load annotation during instantiation. @@ -142,16 +141,15 @@ class CustomDataset(BaseDataset): """ def __init__(self, - ann_file: Optional[str] = None, + ann_file: str = '', metainfo: Optional[dict] = None, - data_root: Optional[str] = None, - data_prefix: Union[str, dict, None] = None, + data_root: str = '', + data_prefix: Union[str, dict] = '', extensions: Sequence[str] = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif'), lazy_init: bool = False, **kwargs): - assert (ann_file is not None or data_prefix is not None - or data_root is not None), \ + assert (ann_file or data_prefix or data_root), \ 'One of `ann_file`, `data_root` and `data_prefix` must '\ 'be specified.' @@ -159,7 +157,7 @@ class CustomDataset(BaseDataset): super().__init__( # The base class requires string ann_file but this class doesn't - ann_file=ann_file if ann_file is not None else '', + ann_file=ann_file, metainfo=metainfo, data_root=data_root, data_prefix=data_prefix, @@ -167,9 +165,6 @@ class CustomDataset(BaseDataset): lazy_init=True, **kwargs) - if ann_file is None: - self.ann_file = None - # Full initialize the dataset. if not lazy_init: self.full_init() @@ -210,17 +205,17 @@ class CustomDataset(BaseDataset): def load_data_list(self): """Load image paths and gt_labels.""" - if self.img_prefix is not None: + if self.img_prefix: file_client = FileClient.infer_client(uri=self.img_prefix) - if self.ann_file is None: + if not self.ann_file: samples = self._find_samples(file_client) else: lines = mmengine.list_from_file(self.ann_file) samples = [x.strip().rsplit(' ', 1) for x in lines] - def add_prefix(filename, prefix=None): - if prefix is None: + def add_prefix(filename, prefix=''): + if not prefix: return filename else: return file_client.join_path(prefix, filename) diff --git a/mmcls/datasets/imagenet.py b/mmcls/datasets/imagenet.py index ffe2dbf9..1100e9f2 100644 --- a/mmcls/datasets/imagenet.py +++ b/mmcls/datasets/imagenet.py @@ -15,13 +15,12 @@ class ImageNet(CustomDataset): found in :class:`CustomDataset`. Args: - ann_file (str, optional): Annotation file path. Defaults to None. + ann_file (str): Annotation file path. Defaults to ''. metainfo (dict, optional): Meta information for dataset, such as class information. Defaults to None. - data_root (str, optional): The root directory for ``data_prefix`` and - ``ann_file``. Defaults to None. - data_prefix (str | dict, optional): Prefix for training data. Defaults - to None. + data_root (str): The root directory for ``data_prefix`` and + ``ann_file``. Defaults to ''. + data_prefix (str | dict): Prefix for training data. Defaults to ''. **kwargs: Other keyword arguments in :class:`CustomDataset` and :class:`BaseDataset`. """ # noqa: E501 @@ -30,10 +29,10 @@ class ImageNet(CustomDataset): METAINFO = {'classes': IMAGENET_CATEGORIES} def __init__(self, - ann_file: Optional[str] = None, + ann_file: str = '', metainfo: Optional[dict] = None, - data_root: Optional[str] = None, - data_prefix: Union[str, dict, None] = None, + data_root: str = '', + data_prefix: Union[str, dict] = '', **kwargs): kwargs = {'extensions': self.IMG_EXTENSIONS, **kwargs} super().__init__( @@ -53,13 +52,12 @@ class ImageNet21k(CustomDataset): specify it from the ``classes`` argument. Args: - ann_file (str, optional): Annotation file path. Defaults to None. + ann_file (str): Annotation file path. Defaults to ''. metainfo (dict, optional): Meta information for dataset, such as class information. Defaults to None. - data_root (str, optional): The root directory for ``data_prefix`` and - ``ann_file``. Defaults to None. - data_prefix (str | dict, optional): Prefix for training data. Defaults - to None. + data_root (str): The root directory for ``data_prefix`` and + ``ann_file``. Defaults to ''. + data_prefix (str | dict): Prefix for training data. Defaults to ''. multi_label (bool): Not implement by now. Use multi label or not. Defaults to False. **kwargs: Other keyword arguments in :class:`CustomDataset` and @@ -69,10 +67,10 @@ class ImageNet21k(CustomDataset): IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif') def __init__(self, - ann_file: Optional[str] = None, + ann_file: str = '', metainfo: Optional[dict] = None, - data_root: Optional[str] = None, - data_prefix: Union[str, dict, None] = None, + data_root: str = '', + data_prefix: Union[str, dict] = '', multi_label: bool = False, **kwargs): if multi_label: @@ -82,7 +80,7 @@ class ImageNet21k(CustomDataset): logger = get_root_logger() - if ann_file is None: + if not ann_file: logger.warning( 'The ImageNet21k dataset is large, and scanning directory may ' 'consume long time. Considering to specify the `ann_file` to ' diff --git a/mmcls/datasets/mnist.py b/mmcls/datasets/mnist.py index 22a60934..4250bfb1 100644 --- a/mmcls/datasets/mnist.py +++ b/mmcls/datasets/mnist.py @@ -27,8 +27,8 @@ class MNIST(BaseDataset): It determines to use the training set or test set. metainfo (dict, optional): Meta information for dataset, such as categories information. Defaults to None. - data_root (str, optional): The root directory for ``data_prefix``. - Defaults to None. + data_root (str): The root directory for ``data_prefix``. + Defaults to ''. download (bool): Whether to download the dataset if not exists. Defaults to True. **kwargs: Other keyword arguments in :class:`BaseDataset`. @@ -51,7 +51,7 @@ class MNIST(BaseDataset): data_prefix: str, test_mode: bool, metainfo: Optional[dict] = None, - data_root: Optional[str] = None, + data_root: str = '', download: bool = True, **kwargs): self.download = download @@ -148,8 +148,8 @@ class FashionMNIST(MNIST): It determines to use the training set or test set. metainfo (dict, optional): Meta information for dataset, such as categories information. Defaults to None. - data_root (str, optional): The root directory for ``data_prefix``. - Defaults to None. + data_root (str): The root directory for ``data_prefix``. + Defaults to ''. download (bool): Whether to download the dataset if not exists. Defaults to True. **kwargs: Other keyword arguments in :class:`BaseDataset`. diff --git a/mmcls/datasets/multi_label.py b/mmcls/datasets/multi_label.py index 50f3ddff..59ec2221 100644 --- a/mmcls/datasets/multi_label.py +++ b/mmcls/datasets/multi_label.py @@ -42,10 +42,9 @@ class MultiLabelDataset(BaseDataset): ann_file (str): Annotation file path. metainfo (dict, optional): Meta information for dataset, such as class information. Defaults to None. - data_root (str, optional): The root directory for ``data_prefix`` and - ``ann_file``. Defaults to None. - data_prefix (str | dict, optional): Prefix for training data. Defaults - to None. + data_root (str): The root directory for ``data_prefix`` and + ``ann_file``. Defaults to ''. + data_prefix (str | dict): Prefix for training data. Defaults to ''. filter_cfg (dict, optional): Config for filter data. Defaults to None. indices (int or Sequence[int], optional): Support using first few data in annotation file to facilitate training/testing on a smaller diff --git a/tests/test_data/test_datasets/test_common.py b/tests/test_data/test_datasets/test_common.py index 3838deec..a6f0e53e 100644 --- a/tests/test_data/test_datasets/test_common.py +++ b/tests/test_data/test_datasets/test_common.py @@ -134,7 +134,7 @@ class TestCustomDataset(TestBaseDataset): cfg = { **self.DEFAULT_ARGS, 'data_prefix': ASSETS_ROOT, - 'ann_file': None, + 'ann_file': '', } dataset = dataset_class(**cfg) self.assertEqual(len(dataset), 3) @@ -184,8 +184,8 @@ class TestCustomDataset(TestBaseDataset): # test load with absolute ann_file cfg = { **self.DEFAULT_ARGS, - 'data_root': None, - 'data_prefix': None, + 'data_root': '', + 'data_prefix': '', 'ann_file': osp.join(ASSETS_ROOT, 'ann.txt'), } dataset = dataset_class(**cfg) @@ -206,7 +206,7 @@ class TestCustomDataset(TestBaseDataset): # test extensions filter cfg = { **self.DEFAULT_ARGS, 'data_prefix': dict(img_path=ASSETS_ROOT), - 'ann_file': None, + 'ann_file': '', 'extensions': ('.txt', ) } with self.assertRaisesRegex(RuntimeError, @@ -215,7 +215,7 @@ class TestCustomDataset(TestBaseDataset): cfg = { **self.DEFAULT_ARGS, 'data_prefix': ASSETS_ROOT, - 'ann_file': None, + 'ann_file': '', 'extensions': ('.jpeg', ) } with self.assertLogs(mmcls_logger, 'WARN') as log: @@ -233,7 +233,7 @@ class TestCustomDataset(TestBaseDataset): **self.DEFAULT_ARGS, 'data_prefix': ASSETS_ROOT, 'classes': ('apple', 'banana'), - 'ann_file': None, + 'ann_file': '', } dataset = dataset_class(**cfg) self.assertEqual(dataset.CLASSES, ('apple', 'banana')) @@ -256,7 +256,7 @@ class TestImageNet(TestCustomDataset): cfg = { **self.DEFAULT_ARGS, 'data_prefix': ASSETS_ROOT, - 'ann_file': None, + 'ann_file': '', } with self.assertRaisesRegex( AssertionError, r"\(2\) doesn't match .* classes \(1000\)"): @@ -267,7 +267,7 @@ class TestImageNet(TestCustomDataset): **self.DEFAULT_ARGS, 'data_prefix': ASSETS_ROOT, 'classes': ['cat', 'dog'], - 'ann_file': None, + 'ann_file': '', } dataset = dataset_class(**cfg) self.assertEqual(len(dataset), 3) @@ -290,7 +290,7 @@ class TestImageNet21k(TestCustomDataset): dataset_class(**cfg) # Warn about ann_file - cfg = {**self.DEFAULT_ARGS, 'ann_file': None} + cfg = {**self.DEFAULT_ARGS, 'ann_file': ''} with self.assertLogs(mmcls_logger, 'WARN') as log: dataset_class(**cfg) self.assertIn('specify the `ann_file`', log.output[0])