[Refactor] Deprecate imgs_per_gpu and use samples_per_gpu (#204)

* [Refactor] change imgs_per_gpu to samples_per_gpu in config files * [Docs] change imgs_per_gpu to samples_per_gpu in docs * [Refactor] change imgs_per_gpu to samples_per_gpu in codes and add warnings * [Fix] fix isort * [Docs] fix docs format * [Refactor] add related UT codes * [Fix] fix isort
2025-06-03 14:59:38 +08:00 · 2022-02-09 17:45:41 +08:00 · 2022-02-09 17:45:41 +08:00 · af331b043f
commit af331b043f
parent 16b3f7b61e
41 changed files with 168 additions and 60 deletions
--- a/configs/benchmarks/classification/_base_/datasets/cifar10.py
+++ b/configs/benchmarks/classification/_base_/datasets/cifar10.py
@ -20,7 +20,7 @@ if not prefetch:

 # dataset summary
 data = dict(
-    imgs_per_gpu=128,
+    samples_per_gpu=128,
    workers_per_gpu=2,
    train=dict(
        type=dataset_type,
--- a/configs/benchmarks/classification/_base_/datasets/imagenet.py
+++ b/configs/benchmarks/classification/_base_/datasets/imagenet.py
@ -23,7 +23,7 @@ if not prefetch:

 # dataset summary
 data = dict(
-    imgs_per_gpu=32,  # total 32x8=256, 8GPU linear cls
+    samples_per_gpu=32,  # total 32x8=256, 8GPU linear cls
    workers_per_gpu=4,
    train=dict(
        type=dataset_type,
--- a/configs/benchmarks/classification/_base_/datasets/inaturalist2018.py
+++ b/configs/benchmarks/classification/_base_/datasets/inaturalist2018.py
@ -23,7 +23,7 @@ if not prefetch:

 # dataset summary
 data = dict(
-    imgs_per_gpu=32,  # total 32x8=256, 8GPU linear cls
+    samples_per_gpu=32,  # total 32x8=256, 8GPU linear cls
    workers_per_gpu=4,
    train=dict(
        type=dataset_type,
--- a/configs/benchmarks/classification/_base_/datasets/places205.py
+++ b/configs/benchmarks/classification/_base_/datasets/places205.py
@ -25,7 +25,7 @@ if not prefetch:

 # dataset summary
 data = dict(
-    imgs_per_gpu=32,  # total 32x8=256, 8GPU linear cls
+    samples_per_gpu=32,  # total 32x8=256, 8GPU linear cls
    workers_per_gpu=4,
    train=dict(
        type=dataset_type,
--- a/configs/benchmarks/classification/imagenet/imagenet_10percent/resnet50_head1_4xb64-steplr1e-1-20e_in1k-10pct.py
+++ b/configs/benchmarks/classification/imagenet/imagenet_10percent/resnet50_head1_4xb64-steplr1e-1-20e_in1k-10pct.py
@ -10,7 +10,7 @@ model = dict(backbone=dict(norm_cfg=dict(type='SyncBN')))

 # dataset settings
 data = dict(
-    imgs_per_gpu=64,  # total 64x4=256
+    samples_per_gpu=64,  # total 64x4=256
    train=dict(
        data_source=dict(ann_file='data/imagenet/meta/train_10pct.txt')))

--- a/configs/benchmarks/classification/imagenet/imagenet_1percent/resnet50_head1_4xb64-steplr1e-1-20e_in1k-1pct.py
+++ b/configs/benchmarks/classification/imagenet/imagenet_1percent/resnet50_head1_4xb64-steplr1e-1-20e_in1k-1pct.py
@ -10,7 +10,7 @@ model = dict(backbone=dict(norm_cfg=dict(type='SyncBN')))

 # dataset settings
 data = dict(
-    imgs_per_gpu=64,  # total 64x4=256
+    samples_per_gpu=64,  # total 64x4=256
    train=dict(
        data_source=dict(ann_file='data/imagenet/meta/train_1percent.txt')))

--- a/configs/benchmarks/classification/imagenet/resnet50_8xb512-coslr-90e_in1k.py
+++ b/configs/benchmarks/classification/imagenet/resnet50_8xb512-coslr-90e_in1k.py
@ -8,7 +8,7 @@ _base_ = [
 model = dict(backbone=dict(frozen_stages=4))

 # dataset summary
-data = dict(imgs_per_gpu=512)  # total 512*8=4096, 8GPU linear cls
+data = dict(samples_per_gpu=512)  # total 512*8=4096, 8GPU linear cls

 # simsiam setting
 # runtime settings
--- a/configs/benchmarks/classification/imagenet/vit-small-p16_8xb128-coslr-90e_in1k.py
+++ b/configs/benchmarks/classification/imagenet/vit-small-p16_8xb128-coslr-90e_in1k.py
@ -9,7 +9,7 @@ _base_ = [
 model = dict(backbone=dict(frozen_stages=12, norm_eval=True))

 # dataset summary
-data = dict(imgs_per_gpu=128)  # total 128*8=1024, 8 GPU linear cls
+data = dict(samples_per_gpu=128)  # total 128*8=1024, 8 GPU linear cls

 # optimizer
 optimizer = dict(type='SGD', lr=12, momentum=0.9, weight_decay=0.)
--- a/configs/benchmarks/classification/svm_voc07.py
+++ b/configs/benchmarks/classification/svm_voc07.py
@ -5,7 +5,7 @@ split_name = ['voc07_trainval', 'voc07_test']
 img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

 data = dict(
-    imgs_per_gpu=32,
+    samples_per_gpu=32,
    workers_per_gpu=4,
    extract=dict(
        type=dataset_type,
--- a/configs/benchmarks/classification/tsne_imagenet.py
+++ b/configs/benchmarks/classification/tsne_imagenet.py
@ -4,7 +4,7 @@ name = 'imagenet_val'
 img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

 data = dict(
-    imgs_per_gpu=8,
+    samples_per_gpu=8,
    workers_per_gpu=4,
    extract=dict(
        type='SingleViewDataset',
--- a/configs/selfsup/_base_/datasets/imagenet_byol.py
+++ b/configs/selfsup/_base_/datasets/imagenet_byol.py
@ -51,7 +51,7 @@ if not prefetch:

 # dataset summary
 data = dict(
-    imgs_per_gpu=32,  # total 32*8(gpu)=256
+    samples_per_gpu=32,  # total 32*8(gpu)=256
    workers_per_gpu=4,
    train=dict(
        type=dataset_type,
--- a/configs/selfsup/_base_/datasets/imagenet_deepcluster.py
+++ b/configs/selfsup/_base_/datasets/imagenet_deepcluster.py
@ -31,7 +31,7 @@ if not prefetch:

 # dataset summary
 data = dict(
-    imgs_per_gpu=64,  # 64*8
+    samples_per_gpu=64,  # 64*8
    workers_per_gpu=4,
    train=dict(
        type=dataset_type,
@ -49,7 +49,7 @@ custom_hooks = [
    dict(
        type='DeepClusterHook',
        extractor=dict(
-            imgs_per_gpu=128,
+            samples_per_gpu=128,
            workers_per_gpu=8,
            dataset=dict(
                type=dataset_type,
--- a/configs/selfsup/_base_/datasets/imagenet_mocov1.py
+++ b/configs/selfsup/_base_/datasets/imagenet_mocov1.py
@ -23,7 +23,7 @@ if not prefetch:

 # dataset summary
 data = dict(
-    imgs_per_gpu=32,  # total 32*8=256
+    samples_per_gpu=32,  # total 32*8=256
    workers_per_gpu=4,
    drop_last=True,
    train=dict(
--- a/configs/selfsup/_base_/datasets/imagenet_mocov2.py
+++ b/configs/selfsup/_base_/datasets/imagenet_mocov2.py
@ -30,7 +30,7 @@ if not prefetch:

 # dataset summary
 data = dict(
-    imgs_per_gpu=32,  # total 32*8=256
+    samples_per_gpu=32,  # total 32*8=256
    workers_per_gpu=4,
    drop_last=True,
    train=dict(
--- a/configs/selfsup/_base_/datasets/imagenet_mocov3.py
+++ b/configs/selfsup/_base_/datasets/imagenet_mocov3.py
@ -51,7 +51,7 @@ if not prefetch:

 # dataset summary
 data = dict(
-    imgs_per_gpu=256,  # 256*16(gpu)=4096
+    samples_per_gpu=256,  # 256*16(gpu)=4096
    workers_per_gpu=4,
    train=dict(
        type=dataset_type,
--- a/configs/selfsup/_base_/datasets/imagenet_npid.py
+++ b/configs/selfsup/_base_/datasets/imagenet_npid.py
@ -23,7 +23,7 @@ if not prefetch:

 # dataset summary
 data = dict(
-    imgs_per_gpu=32,  # total 32*8
+    samples_per_gpu=32,  # total 32*8
    workers_per_gpu=4,
    train=dict(
        type=dataset_type,
--- a/configs/selfsup/_base_/datasets/imagenet_odc.py
+++ b/configs/selfsup/_base_/datasets/imagenet_odc.py
@ -31,7 +31,7 @@ if not prefetch:

 # dataset summary
 data = dict(
-    imgs_per_gpu=64,  # 64*8
+    samples_per_gpu=64,  # 64*8
    sampling_replace=True,
    workers_per_gpu=4,
    train=dict(
@ -50,7 +50,7 @@ custom_hooks = [
    dict(
        type='DeepClusterHook',
        extractor=dict(
-            imgs_per_gpu=128,
+            samples_per_gpu=128,
            workers_per_gpu=8,
            dataset=dict(
                type=dataset_type,
--- a/configs/selfsup/_base_/datasets/imagenet_relative-loc.py
+++ b/configs/selfsup/_base_/datasets/imagenet_relative-loc.py
@ -21,7 +21,7 @@ prefetch = False

 # dataset summary
 data = dict(
-    imgs_per_gpu=64,  # 64 x 8 = 512
+    samples_per_gpu=64,  # 64 x 8 = 512
    workers_per_gpu=4,
    train=dict(
        type=dataset_type,
--- a/configs/selfsup/_base_/datasets/imagenet_rotation-pred.py
+++ b/configs/selfsup/_base_/datasets/imagenet_rotation-pred.py
@ -23,7 +23,7 @@ if not prefetch:

 # dataset summary
 data = dict(
-    imgs_per_gpu=16,  # (16*4) x 8 = 512
+    samples_per_gpu=16,  # (16*4) x 8 = 512
    workers_per_gpu=2,
    train=dict(
        type=dataset_type,
--- a/configs/selfsup/_base_/datasets/imagenet_simclr.py
+++ b/configs/selfsup/_base_/datasets/imagenet_simclr.py
@ -29,7 +29,7 @@ if not prefetch:

 # dataset summary
 data = dict(
-    imgs_per_gpu=32,  # total 32*8
+    samples_per_gpu=32,  # total 32*8
    workers_per_gpu=4,
    train=dict(
        type=dataset_type,
--- a/configs/selfsup/_base_/datasets/imagenet_swav_mcrop-2-6.py
+++ b/configs/selfsup/_base_/datasets/imagenet_swav_mcrop-2-6.py
@ -51,7 +51,7 @@ if not prefetch:

 # dataset summary
 data = dict(
-    imgs_per_gpu=32,  # total 32*8=256
+    samples_per_gpu=32,  # total 32*8=256
    workers_per_gpu=4,
    drop_last=True,
    train=dict(
--- a/configs/selfsup/byol/byol_resnet50_8xb256-fp16-accum2-coslr-200e_in1k.py
+++ b/configs/selfsup/byol/byol_resnet50_8xb256-fp16-accum2-coslr-200e_in1k.py
@ -6,7 +6,7 @@ _base_ = [
 ]

 # dataset summary
-data = dict(imgs_per_gpu=256)
+data = dict(samples_per_gpu=256)

 # additional hooks
 # interval for accumulate gradient, total 8*256*2(interval)=4096
--- a/configs/selfsup/mocov3/mocov3_vit-small-p16_32xb128-fp16-coslr-300e_in1k-224.py
+++ b/configs/selfsup/mocov3/mocov3_vit-small-p16_32xb128-fp16-coslr-300e_in1k-224.py
@ -58,7 +58,8 @@ if not prefetch:

 # dataset summary
 data = dict(
-    imgs_per_gpu=128, train=dict(pipelines=[train_pipeline1, train_pipeline2]))
+    samples_per_gpu=128,
+    train=dict(pipelines=[train_pipeline1, train_pipeline2]))

 # MoCo v3 use the same momentum update method as BYOL
 custom_hooks = [dict(type='MomentumUpdateHook')]
--- a/configs/selfsup/simclr/simclr_resnet50_8xb64-coslr-200e_in1k.py
+++ b/configs/selfsup/simclr/simclr_resnet50_8xb64-coslr-200e_in1k.py
@ -1,4 +1,4 @@
 _base_ = 'simclr_resnet50_8xb32-coslr-200e_in1k.py'

 # dataset summary
-data = dict(imgs_per_gpu=64)  # total 64*8
+data = dict(samples_per_gpu=64)  # total 64*8
--- a/configs/selfsup/swav/swav_resnet50_8xb32-mcrop-2-6-coslr-200e_in1k-224-96.py
+++ b/configs/selfsup/swav/swav_resnet50_8xb32-mcrop-2-6-coslr-200e_in1k-224-96.py
@ -13,7 +13,7 @@ custom_hooks = [
    dict(
        type='SwAVHook',
        priority='VERY_HIGH',
-        batch_size={{_base_.data.imgs_per_gpu}},
+        batch_size={{_base_.data.samples_per_gpu}},
        epoch_queue_starts=15,
        crops_for_assign=[0, 1],
        feat_dim=128,
--- a/docs/en/tutorials/0_config.md
+++ b/docs/en/tutorials/0_config.md
@ -207,7 +207,7 @@ if not prefetch:

 # dataset summary
 data = dict(
-    imgs_per_gpu=32,  # Batch size of a single GPU, total 32*8=256
+    samples_per_gpu=32,  # Batch size of a single GPU, total 32*8=256
    workers_per_gpu=4,  # Worker to pre-fetch data for each single GPU
    drop_last=True,  # Whether to drop the last batch of data
    train=dict(
@ -304,7 +304,7 @@ if not prefetch:

 # dataset summary
 data = dict(
-    imgs_per_gpu=32,
+    samples_per_gpu=32,
    workers_per_gpu=4,
    drop_last=True,
    train=dict(type=dataset_type, type=data_source, data_prefix=...),
--- a/docs/en/tutorials/4_schedule.md
+++ b/docs/en/tutorials/4_schedule.md
@ -154,14 +154,14 @@ When there is not enough computation resource, the batch size can only be set to
 Here is an example:

 ```python
-data = dict(imgs_per_gpu=64)
+data = dict(samples_per_gpu=64)
 optimizer_config = dict(type="DistOptimizerHook", update_interval=4)
 ```

 Indicates that during training, back-propagation is performed every 4 iters. And the above is equivalent to:

 ```python
-data = dict(imgs_per_gpu=256)
+data = dict(samples_per_gpu=256)
 optimizer_config = dict(type="OptimizerHook")
 ```

--- a/docs/en/tutorials/6_benchmarks.md
+++ b/docs/en/tutorials/6_benchmarks.md
@ -70,7 +70,7 @@ bash tools/benchmarks/classification/slurm_train_linear.sh ${PARTITION} ${JOB_NA
 ```

 Remarks:
- The default GPU number is 8. When changing GPUS, please also change imgs_per_gpu in the config file accordingly to ensure the total batch size is 256.
+- The default GPU number is 8. When changing GPUS, please also change `samples_per_gpu` in the config file accordingly to ensure the total batch size is 256.
 - `CONFIG`: Use config files under `configs/benchmarks/classification/`, excluding svm_voc07.py and tsne_imagenet.py and imagenet_*percent folders.
 - `PRETRAIN`: the pretrained model file.

--- a/docs/zh_cn/tutorials/0_config.md
+++ b/docs/zh_cn/tutorials/0_config.md
@ -207,7 +207,7 @@ if not prefetch:

 # dataset summary
 data = dict(
-    imgs_per_gpu=32,  # Batch size of a single GPU, total 32*8=256
+    samples_per_gpu=32,  # Batch size of a single GPU, total 32*8=256
    workers_per_gpu=4,  # Worker to pre-fetch data for each single GPU
    drop_last=True,  # Whether to drop the last batch of data
    train=dict(
@ -304,7 +304,7 @@ if not prefetch:

 # dataset summary
 data = dict(
-    imgs_per_gpu=32,
+    samples_per_gpu=32,
    workers_per_gpu=4,
    drop_last=True,
    train=dict(type=dataset_type, type=data_source, data_prefix=...),
--- a/docs/zh_cn/tutorials/4_schedule.md
+++ b/docs/zh_cn/tutorials/4_schedule.md
@ -154,14 +154,14 @@ optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
 用例如下：

 ```python
-data = dict(imgs_per_gpu=64)
+data = dict(samples_per_gpu=64)
 optimizer_config = dict(type="DistOptimizerHook", update_interval=4)
 ```

 表示训练时，每 4 个 iter 执行一次反向传播。由于此时单张 GPU 上的批次大小为 64，也就等价于单张 GPU 上一次迭代的批次大小为 256，也即：

 ```python
-data = dict(imgs_per_gpu=256)
+data = dict(samples_per_gpu=256)
 optimizer_config = dict(type="OptimizerHook")
 ```

--- a/docs/zh_cn/tutorials/6_benchmarks.md
+++ b/docs/zh_cn/tutorials/6_benchmarks.md
@ -70,7 +70,7 @@ bash tools/benchmarks/classification/slurm_train_linear.sh ${PARTITION} ${JOB_NA
 ```

 Remarks:
- The default GPU number is 8. When changing GPUS, please also change imgs_per_gpu in the config file accordingly to ensure the total batch size is 256.
+- The default GPU number is 8. When changing GPUS, please also change `samples_per_gpu` in the config file accordingly to ensure the total batch size is 256.
 - `CONFIG`: Use config files under `configs/benchmarks/classification/`, excluding svm_voc07.py and tsne_imagenet.py and imagenet_*percent folders.
 - `PRETRAIN`: the pretrained model file.

--- a/mmselfsup/apis/train.py
+++ b/mmselfsup/apis/train.py
@ -75,13 +75,26 @@ def train_model(model,

    # prepare data loaders
    dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]
+    if 'imgs_per_gpu' in cfg.data:
+        logger.warning('"imgs_per_gpu" is deprecated. '
+                       'Please use "samples_per_gpu" instead')
+        if 'samples_per_gpu' in cfg.data:
+            logger.warning(
+                f'Got "imgs_per_gpu"={cfg.data.imgs_per_gpu} and '
+                f'"samples_per_gpu"={cfg.data.samples_per_gpu}, "imgs_per_gpu"'
+                f'={cfg.data.imgs_per_gpu} is used in this experiments')
+        else:
+            logger.warning(
+                'Automatically set "samples_per_gpu"="imgs_per_gpu"='
+                f'{cfg.data.imgs_per_gpu} in this experiments')
+        cfg.data.samples_per_gpu = cfg.data.imgs_per_gpu

    data_loaders = [
        build_dataloader(
            ds,
-            cfg.data.imgs_per_gpu,
-            cfg.data.workers_per_gpu,
-            # cfg.gpus will be ignored if distributed
+            samples_per_gpu=cfg.data.samples_per_gpu,
+            workers_per_gpu=cfg.data.workers_per_gpu,
+            # `num_gpus` will be ignored if distributed
            num_gpus=len(cfg.gpu_ids),
            dist=distributed,
            replace=getattr(cfg.data, 'sampling_replace', False),
@ -161,7 +174,7 @@ def train_model(model,
        val_dataset = build_dataset(cfg.data.val)
        val_dataloader = build_dataloader(
            val_dataset,
-            imgs_per_gpu=cfg.data.imgs_per_gpu,
+            samples_per_gpu=cfg.data.samples_per_gpu,
            workers_per_gpu=cfg.data.workers_per_gpu,
            dist=distributed,
            shuffle=False,
--- a/mmselfsup/core/hooks/deepcluster_hook.py
+++ b/mmselfsup/core/hooks/deepcluster_hook.py
@ -7,6 +7,7 @@ from mmcv.utils import print_log

 from mmselfsup.utils import Extractor
 from mmselfsup.utils import clustering as _clustering
+from mmselfsup.utils import get_root_logger


@HOOKS.register_module()
@ -41,6 +42,23 @@ class DeepClusterHook(Hook):
            interval=1,
            dist_mode=True,
            data_loaders=None):
+
+        logger = get_root_logger()
+        if 'imgs_per_gpu' in extractor:
+            logger.warning('"imgs_per_gpu" is deprecated. '
+                           'Please use "samples_per_gpu" instead')
+            if 'samples_per_gpu' in extractor:
+                logger.warning(
+                    f'Got "imgs_per_gpu"={extractor["imgs_per_gpu"]} and '
+                    f'"samples_per_gpu"={extractor["samples_per_gpu"]}, '
+                    f'"imgs_per_gpu"={extractor["imgs_per_gpu"]} is used in '
+                    f'this experiments')
+            else:
+                logger.warning(
+                    'Automatically set "samples_per_gpu"="imgs_per_gpu"='
+                    f'{extractor["imgs_per_gpu"]} in this experiments')
+            extractor['samples_per_gpu'] = extractor['imgs_per_gpu']
+
        self.extractor = Extractor(dist_mode=dist_mode, **extractor)
        self.clustering_type = clustering.pop('type')
        self.clustering_cfg = clustering
--- a/mmselfsup/datasets/builder.py
+++ b/mmselfsup/datasets/builder.py
@ -1,6 +1,7 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import platform
 import random
+import warnings
 from functools import partial

 import numpy as np
@ -45,8 +46,9 @@ def build_dataset(cfg, default_args=None):


 def build_dataloader(dataset,
-                     imgs_per_gpu,
-                     workers_per_gpu,
+                     imgs_per_gpu=None,
+                     samples_per_gpu=None,
+                     workers_per_gpu=1,
                     num_gpus=1,
                     dist=True,
                     shuffle=True,
@ -62,10 +64,13 @@ def build_dataloader(dataset,

    Args:
        dataset (Dataset): A PyTorch dataset.
-        imgs_per_gpu (int): Number of images on each GPU, i.e., batch size of
-            each GPU.
+        imgs_per_gpu (int): (Deprecated, please use samples_per_gpu) Number of
+            images on each GPU, i.e., batch size of each GPU. Defaults to None.
+        samples_per_gpu (int): Number of images on each GPU, i.e., batch size
+            of each GPU. Defaults to None.
        workers_per_gpu (int): How many subprocesses to use for data loading
-            for each GPU.
+            for each GPU. `persistent_workers` option needs num_workers > 0.
+            Defaults to 1.
        num_gpus (int): Number of GPUs. Only used in non-distributed training.
        dist (bool): Distributed training/test or not. Defaults to True.
        shuffle (bool): Whether to shuffle the data at every epoch.
@ -85,18 +90,33 @@ def build_dataloader(dataset,
    Returns:
        DataLoader: A PyTorch dataloader.
    """
+    if imgs_per_gpu is None and samples_per_gpu is None:
+        raise ValueError(
+            'Please inidcate number of images on each GPU, ',
+            '"imgs_per_gpu" and "samples_per_gpu" can not be "None" at the ',
+            'same time. "imgs_per_gpu" is deprecated, please use ',
+            '"samples_per_gpu".')
+
+    if imgs_per_gpu is not None:
+        warnings.warn(f'Got "imgs_per_gpu"={imgs_per_gpu} and '
+                      f'"samples_per_gpu"={samples_per_gpu}, "imgs_per_gpu"'
+                      f'={imgs_per_gpu} is used in this experiments. '
+                      'Automatically set "samples_per_gpu"="imgs_per_gpu"='
+                      f'{imgs_per_gpu} in this experiments')
+        samples_per_gpu = imgs_per_gpu
+
    rank, world_size = get_dist_info()
    if dist:
        sampler = DistributedSampler(
            dataset, world_size, rank, shuffle=shuffle, replace=replace)
        shuffle = False
-        batch_size = imgs_per_gpu
+        batch_size = samples_per_gpu
        num_workers = workers_per_gpu
    else:
        if replace:
            return NotImplemented
        sampler = None  # TODO: set replace
-        batch_size = num_gpus * imgs_per_gpu
+        batch_size = num_gpus * samples_per_gpu
        num_workers = num_gpus * workers_per_gpu

    init_fn = partial(
@ -117,7 +137,7 @@ def build_dataloader(dataset,
        batch_size=batch_size,
        sampler=sampler,
        num_workers=num_workers,
-        collate_fn=partial(collate, samples_per_gpu=imgs_per_gpu),
+        collate_fn=partial(collate, samples_per_gpu=samples_per_gpu),
        pin_memory=pin_memory,
        shuffle=shuffle,
        worker_init_fn=init_fn,
--- a/mmselfsup/utils/extractor.py
+++ b/mmselfsup/utils/extractor.py
@ -11,8 +11,8 @@ class Extractor(object):
    Args:
        dataset (Dataset | dict): A PyTorch dataset or dict that indicates
            the dataset.
-        imgs_per_gpu (int): Number of images on each GPU, i.e., batch size of
-            each GPU.
+        samples_per_gpu (int): Number of images on each GPU, i.e., batch size
+            of each GPU.
        workers_per_gpu (int): How many subprocesses to use for data loading
            for each GPU.
        dist_mode (bool): Use distributed extraction or not. Defaults to False.
@ -25,7 +25,7 @@ class Extractor(object):

    def __init__(self,
                 dataset,
-                 imgs_per_gpu,
+                 samples_per_gpu,
                 workers_per_gpu,
                 dist_mode=False,
                 persistent_workers=True,
@ -40,8 +40,8 @@ class Extractor(object):
                            f'not {type(dataset)}')
        self.data_loader = datasets.build_dataloader(
            self.dataset,
-            imgs_per_gpu,
-            workers_per_gpu,
+            samples_per_gpu=samples_per_gpu,
+            workers_per_gpu=workers_per_gpu,
            dist=dist_mode,
            shuffle=False,
            persistent_workers=persistent_workers,
--- a/tests/test_data/test_datasets/test_builder.py
+++ b/tests/test_data/test_datasets/test_builder.py
@ -3,7 +3,7 @@ from unittest.mock import ANY
 import pytest

 from mmselfsup.datasets import (ConcatDataset, DeepClusterDataset,
-                                RepeatDataset, build_dataset)
+                                RepeatDataset, build_dataloader, build_dataset)

 DATASET_CONFIG = dict(
    type='DeepClusterDataset',
@ -50,3 +50,19 @@ DATASET_CONFIG = dict(
 ])
 def test_build_dataset(cfg, expected_type):
    assert isinstance(build_dataset(cfg), expected_type)
+
+
+def test_build_dataloader():
+    dataset = build_dataset(DATASET_CONFIG)
+
+    with pytest.raises(ValueError):
+        data_loader = build_dataloader(dataset)
+
+    data_loader = build_dataloader(
+        dataset,
+        imgs_per_gpu=1,
+        samples_per_gpu=None,
+        dist=False,
+    )
+    assert len(data_loader) == 2
+    assert data_loader.batch_size == 1
--- a/tools/analysis_tools/visualize_tsne.py
+++ b/tools/analysis_tools/visualize_tsne.py
@ -156,9 +156,22 @@ def main():
    dataset.data_source.data_infos = tmp_infos
    logger.info(f'Apply t-SNE to visualize {len(dataset)} samples.')

+    if 'imgs_per_gpu' in cfg.data:
+        logger.warning('"imgs_per_gpu" is deprecated. '
+                       'Please use "samples_per_gpu" instead')
+        if 'samples_per_gpu' in cfg.data:
+            logger.warning(
+                f'Got "imgs_per_gpu"={cfg.data.imgs_per_gpu} and '
+                f'"samples_per_gpu"={cfg.data.samples_per_gpu}, "imgs_per_gpu"'
+                f'={cfg.data.imgs_per_gpu} is used in this experiments')
+        else:
+            logger.warning(
+                'Automatically set "samples_per_gpu"="imgs_per_gpu"='
+                f'{cfg.data.imgs_per_gpu} in this experiments')
+        cfg.data.samples_per_gpu = cfg.data.imgs_per_gpu
    data_loader = build_dataloader(
        dataset,
-        imgs_per_gpu=dataset_cfg.data.imgs_per_gpu,
+        samples_per_gpu=dataset_cfg.data.samples_per_gpu,
        workers_per_gpu=dataset_cfg.data.workers_per_gpu,
        dist=distributed,
        shuffle=False)
--- a/tools/benchmarks/classification/dist_train_linear.sh
+++ b/tools/benchmarks/classification/dist_train_linear.sh
@ -6,7 +6,7 @@ set -x
 CFG=$1  # use cfgs under "configs/benchmarks/classification/imagenet/*.py"
 PRETRAIN=$2  # pretrained model
 PY_ARGS=${@:3}
-GPUS=${GPUS:-8}  # When changing GPUS, please also change imgs_per_gpu in the config file accordingly to ensure the total batch size is 256.
+GPUS=${GPUS:-8}  # When changing GPUS, please also change samples_per_gpu in the config file accordingly to ensure the total batch size is 256.
 PORT=${PORT:-29500}

 # set work_dir according to config path and pretrained model to distinguish different models
--- a/tools/benchmarks/classification/slurm_train_linear.sh
+++ b/tools/benchmarks/classification/slurm_train_linear.sh
@ -8,7 +8,7 @@ JOB_NAME=$2
 CFG=$3  # use cfgs under "configs/benchmarks/classification/imagenet/*.py"
 PRETRAIN=$4  # pretrained model
 PY_ARGS=${@:5}
-GPUS=${GPUS:-8}  # When changing GPUS, please also change imgs_per_gpu in the config file accordingly to ensure the total batch size is 256.
+GPUS=${GPUS:-8}  # When changing GPUS, please also change samples_per_gpu in the config file accordingly to ensure the total batch size is 256.
 GPUS_PER_NODE=${GPUS_PER_NODE:-8}
 CPUS_PER_TASK=${CPUS_PER_TASK:-5}
 PORT=${PORT:-29500}
--- a/tools/benchmarks/classification/svm_voc07/extract.py
+++ b/tools/benchmarks/classification/svm_voc07/extract.py
@ -96,9 +96,23 @@ def main():
    # build the dataloader
    dataset_cfg = mmcv.Config.fromfile(args.dataset_config)
    dataset = build_dataset(dataset_cfg.data.extract)
+    if 'imgs_per_gpu' in cfg.data:
+        logger.warning('"imgs_per_gpu" is deprecated. '
+                       'Please use "samples_per_gpu" instead')
+        if 'samples_per_gpu' in cfg.data:
+            logger.warning(
+                f'Got "imgs_per_gpu"={cfg.data.imgs_per_gpu} and '
+                f'"samples_per_gpu"={cfg.data.samples_per_gpu}, "imgs_per_gpu"'
+                f'={cfg.data.imgs_per_gpu} is used in this experiments')
+        else:
+            logger.warning(
+                'Automatically set "samples_per_gpu"="imgs_per_gpu"='
+                f'{cfg.data.imgs_per_gpu} in this experiments')
+        cfg.data.samples_per_gpu = cfg.data.imgs_per_gpu
+
    data_loader = build_dataloader(
        dataset,
-        imgs_per_gpu=dataset_cfg.data.imgs_per_gpu,
+        samples_per_gpu=dataset_cfg.data.samples_per_gpu,
        workers_per_gpu=dataset_cfg.data.workers_per_gpu,
        dist=distributed,
        shuffle=False)
--- a/tools/test.py
+++ b/tools/test.py
@ -96,9 +96,22 @@ def main():

    # build the dataloader
    dataset = build_dataset(cfg.data.val)
+    if 'imgs_per_gpu' in cfg.data:
+        logger.warning('"imgs_per_gpu" is deprecated. '
+                       'Please use "samples_per_gpu" instead')
+        if 'samples_per_gpu' in cfg.data:
+            logger.warning(
+                f'Got "imgs_per_gpu"={cfg.data.imgs_per_gpu} and '
+                f'"samples_per_gpu"={cfg.data.samples_per_gpu}, "imgs_per_gpu"'
+                f'={cfg.data.imgs_per_gpu} is used in this experiments')
+        else:
+            logger.warning(
+                'Automatically set "samples_per_gpu"="imgs_per_gpu"='
+                f'{cfg.data.imgs_per_gpu} in this experiments')
+        cfg.data.samples_per_gpu = cfg.data.imgs_per_gpu
    data_loader = build_dataloader(
        dataset,
-        imgs_per_gpu=cfg.data.imgs_per_gpu,
+        samples_per_gpu=cfg.data.samples_per_gpu,
        workers_per_gpu=cfg.data.workers_per_gpu,
        dist=distributed,
        shuffle=False)