diff --git a/configs/benchmarks/linear_classification/imagenet/r50_last.py b/configs/benchmarks/linear_classification/imagenet/r50_last.py index 625cdefb..1d476619 100644 --- a/configs/benchmarks/linear_classification/imagenet/r50_last.py +++ b/configs/benchmarks/linear_classification/imagenet/r50_last.py @@ -28,15 +28,16 @@ img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_pipeline = [ dict(type='RandomResizedCrop', size=224), dict(type='RandomHorizontalFlip'), - dict(type='ToTensor'), - dict(type='Normalize', **img_norm_cfg), ] test_pipeline = [ dict(type='Resize', size=256), dict(type='CenterCrop', size=224), - dict(type='ToTensor'), - dict(type='Normalize', **img_norm_cfg), ] +# prefetch +prefetch = False +if not prefetch: + train_pipeline.extend([dict(type='ToTensor'), dict(type='Normalize', **img_norm_cfg)]) + test_pipeline.extend([dict(type='ToTensor'), dict(type='Normalize', **img_norm_cfg)]) data = dict( imgs_per_gpu=32, # total 32*8=256, 8GPU linear cls workers_per_gpu=5, @@ -45,12 +46,14 @@ data = dict( data_source=dict( list_file=data_train_list, root=data_train_root, **data_source_cfg), - pipeline=train_pipeline), + pipeline=train_pipeline, + prefetch=prefetch), val=dict( type=dataset_type, data_source=dict( list_file=data_test_list, root=data_test_root, **data_source_cfg), - pipeline=test_pipeline)) + pipeline=test_pipeline, + prefetch=prefetch)) # additional hooks custom_hooks = [ dict( @@ -60,6 +63,8 @@ custom_hooks = [ interval=1, imgs_per_gpu=128, workers_per_gpu=4, + prefetch=prefetch, + img_norm_cfg=img_norm_cfg, eval_param=dict(topk=(1, 5))) ] # optimizer diff --git a/configs/benchmarks/linear_classification/imagenet/r50_last_sobel.py b/configs/benchmarks/linear_classification/imagenet/r50_last_sobel.py index b7d24027..ec4f7723 100644 --- a/configs/benchmarks/linear_classification/imagenet/r50_last_sobel.py +++ b/configs/benchmarks/linear_classification/imagenet/r50_last_sobel.py @@ -28,15 +28,16 @@ img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_pipeline = [ dict(type='RandomResizedCrop', size=224), dict(type='RandomHorizontalFlip'), - dict(type='ToTensor'), - dict(type='Normalize', **img_norm_cfg), ] test_pipeline = [ dict(type='Resize', size=256), dict(type='CenterCrop', size=224), - dict(type='ToTensor'), - dict(type='Normalize', **img_norm_cfg), ] +# prefetch +prefetch = False +if not prefetch: + train_pipeline.extend([dict(type='ToTensor'), dict(type='Normalize', **img_norm_cfg)]) + test_pipeline.extend([dict(type='ToTensor'), dict(type='Normalize', **img_norm_cfg)]) data = dict( imgs_per_gpu=32, # total 32*8=256, 8GPU linear cls workers_per_gpu=5, @@ -45,12 +46,14 @@ data = dict( data_source=dict( list_file=data_train_list, root=data_train_root, **data_source_cfg), - pipeline=train_pipeline), + pipeline=train_pipeline, + prefetch=prefetch), val=dict( type=dataset_type, data_source=dict( list_file=data_test_list, root=data_test_root, **data_source_cfg), - pipeline=test_pipeline)) + pipeline=test_pipeline, + prefetch=prefetch)) # additional hooks custom_hooks = [ dict( @@ -60,6 +63,8 @@ custom_hooks = [ interval=1, imgs_per_gpu=128, workers_per_gpu=4, + prefetch=prefetch, + img_norm_cfg=img_norm_cfg, eval_param=dict(topk=(1, 5))) ] # optimizer diff --git a/configs/benchmarks/linear_classification/places205/r50_multihead.py b/configs/benchmarks/linear_classification/places205/r50_multihead.py index 8a61eb6e..8826bb41 100644 --- a/configs/benchmarks/linear_classification/places205/r50_multihead.py +++ b/configs/benchmarks/linear_classification/places205/r50_multihead.py @@ -35,15 +35,16 @@ train_pipeline = [ dict(type='CenterCrop', size=256), dict(type='RandomCrop', size=224), dict(type='RandomHorizontalFlip'), - dict(type='ToTensor'), - dict(type='Normalize', **img_norm_cfg), ] test_pipeline = [ dict(type='Resize', size=256), dict(type='CenterCrop', size=224), - dict(type='ToTensor'), - dict(type='Normalize', **img_norm_cfg), ] +# prefetch +prefetch = False +if not prefetch: + train_pipeline.extend([dict(type='ToTensor'), dict(type='Normalize', **img_norm_cfg)]) + test_pipeline.extend([dict(type='ToTensor'), dict(type='Normalize', **img_norm_cfg)]) data = dict( imgs_per_gpu=32, # total 32x8=256 workers_per_gpu=4, @@ -52,12 +53,14 @@ data = dict( data_source=dict( list_file=data_train_list, root=data_train_root, **data_source_cfg), - pipeline=train_pipeline), + pipeline=train_pipeline, + prefetch=prefetch), val=dict( type=dataset_type, data_source=dict( list_file=data_test_list, root=data_test_root, **data_source_cfg), - pipeline=test_pipeline)) + pipeline=test_pipeline, + prefetch=prefetch)) # additional hooks custom_hooks = [ dict( @@ -67,6 +70,8 @@ custom_hooks = [ interval=10, imgs_per_gpu=32, workers_per_gpu=4, + prefetch=prefetch, + img_norm_cfg=img_norm_cfg, eval_param=dict(topk=(1, ))) ] # optimizer diff --git a/configs/benchmarks/linear_classification/places205/r50_multihead_sobel.py b/configs/benchmarks/linear_classification/places205/r50_multihead_sobel.py index 9dea3f3d..7e5cd869 100644 --- a/configs/benchmarks/linear_classification/places205/r50_multihead_sobel.py +++ b/configs/benchmarks/linear_classification/places205/r50_multihead_sobel.py @@ -35,15 +35,16 @@ train_pipeline = [ dict(type='CenterCrop', size=256), dict(type='RandomCrop', size=224), dict(type='RandomHorizontalFlip'), - dict(type='ToTensor'), - dict(type='Normalize', **img_norm_cfg), ] test_pipeline = [ dict(type='Resize', size=256), dict(type='CenterCrop', size=224), - dict(type='ToTensor'), - dict(type='Normalize', **img_norm_cfg), ] +# prefetch +prefetch = False +if not prefetch: + train_pipeline.extend([dict(type='ToTensor'), dict(type='Normalize', **img_norm_cfg)]) + test_pipeline.extend([dict(type='ToTensor'), dict(type='Normalize', **img_norm_cfg)]) data = dict( imgs_per_gpu=32, # total 32x8=256 workers_per_gpu=4, @@ -52,12 +53,14 @@ data = dict( data_source=dict( list_file=data_train_list, root=data_train_root, **data_source_cfg), - pipeline=train_pipeline), + pipeline=train_pipeline, + prefetch=prefetch), val=dict( type=dataset_type, data_source=dict( list_file=data_test_list, root=data_test_root, **data_source_cfg), - pipeline=test_pipeline)) + pipeline=test_pipeline, + prefetch=prefetch)) # additional hooks custom_hooks = [ dict( @@ -67,6 +70,8 @@ custom_hooks = [ interval=10, imgs_per_gpu=32, workers_per_gpu=4, + prefetch=prefetch, + img_norm_cfg=img_norm_cfg, eval_param=dict(topk=(1, ))) ] # optimizer diff --git a/docs/GETTING_STARTED.md b/docs/GETTING_STARTED.md index 4efe859d..bb3a56fe 100644 --- a/docs/GETTING_STARTED.md +++ b/docs/GETTING_STARTED.md @@ -92,7 +92,7 @@ prefetch = True 3 . Replacing Pillow with Pillow-SIMD (https://github.com/uploadcare/pillow-simd.git) to make use of SIMD command sets with modern CPU. ```shell pip uninstall pillow -pip install Pillow-SIMD +pip install Pillow-SIMD or CC="cc -mavx2" pip install -U --force-reinstall pillow-simd if AVX2 is available. ``` We test it using MoCoV2 using a total batch size of 256 on Tesla V100. The training time per step is decreased to 0.17s from 0.23s. ## Benchmarks diff --git a/openselfsup/datasets/classification.py b/openselfsup/datasets/classification.py index bfb5174a..cc4094ae 100644 --- a/openselfsup/datasets/classification.py +++ b/openselfsup/datasets/classification.py @@ -4,6 +4,7 @@ from openselfsup.utils import print_log from .registry import DATASETS from .base import BaseDataset +from .utils import to_numpy @DATASETS.register_module @@ -11,12 +12,14 @@ class ClassificationDataset(BaseDataset): """Dataset for classification. """ - def __init__(self, data_source, pipeline): - super(ClassificationDataset, self).__init__(data_source, pipeline) + def __init__(self, data_source, pipeline, prefetch=False): + super(ClassificationDataset, self).__init__(data_source, pipeline, prefetch) def __getitem__(self, idx): img, target = self.data_source.get_sample(idx) img = self.pipeline(img) + if self.prefetch: + img = torch.from_numpy(to_numpy(img)) return dict(img=img, gt_label=target) def evaluate(self, scores, keyword, logger=None, topk=(1, 5)): diff --git a/openselfsup/hooks/validate_hook.py b/openselfsup/hooks/validate_hook.py index 1dabcaa6..b769183a 100644 --- a/openselfsup/hooks/validate_hook.py +++ b/openselfsup/hooks/validate_hook.py @@ -42,7 +42,10 @@ class ValidateHook(Hook): eval_kwargs['imgs_per_gpu'], eval_kwargs['workers_per_gpu'], dist=dist_mode, - shuffle=False) + shuffle=False, + prefetch=eval_kwargs.get('prefetch', False), + img_norm_cfg=eval_kwargs.get('img_norm_cfg', dict()), + ) self.dist_mode = dist_mode self.initial = initial self.interval = interval