mmpretrain/configs/_base_/datasets/imagenet_bs32_mocov2.py
zzc98 bc3c4a35ee
[Refactor] Support to use "split" to specify training set/validation set in the ImageNet dataset (#1535)
* [Feature]: Add caption

* [Feature]: Update scienceqa

* [CI] Add test mim CI. (#879)

* refactor imagenet dataset

* refactor imagenet dataset

* refactor imagenet dataset

* update imagenet21k

* update configs

* update mnist

* update dataset_prepare.md

* fix sun397 url and update user_guides/dataset_prepare.md

* update dataset_prepare.md

* fix sun397 dataset

* fix sun397

* update chinese dataset_prepare.md

* update dataset_prepare.md

* [Refactor] update voc dataset

* [Refactor] update voc dataset

* refactor imagenet

* refactor imagenet

* use mmengine.fileio

---------

Co-authored-by: liuyuan <3463423099@qq.com>
Co-authored-by: Ma Zerun <mzr1996@163.com>
Co-authored-by: Ezra-Yu <18586273+Ezra-Yu@users.noreply.github.com>
2023-06-02 11:03:18 +08:00

59 lines
1.5 KiB
Python

# dataset settings
dataset_type = 'ImageNet'
data_root = 'data/imagenet/'
data_preprocessor = dict(
type='SelfSupDataPreprocessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True)
# The difference between mocov2 and mocov1 is the transforms in the pipeline
view_pipeline = [
dict(
type='RandomResizedCrop',
scale=224,
crop_ratio_range=(0.2, 1.),
backend='pillow'),
dict(
type='RandomApply',
transforms=[
dict(
type='ColorJitter',
brightness=0.4,
contrast=0.4,
saturation=0.4,
hue=0.1)
],
prob=0.8),
dict(
type='RandomGrayscale',
prob=0.2,
keep_channels=True,
channel_weights=(0.114, 0.587, 0.2989)),
dict(
type='GaussianBlur',
magnitude_range=(0.1, 2.0),
magnitude_std='inf',
prob=0.5),
dict(type='RandomFlip', prob=0.5),
]
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='MultiView', num_views=2, transforms=[view_pipeline]),
dict(type='PackInputs')
]
train_dataloader = dict(
batch_size=32,
num_workers=8,
drop_last=True,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
collate_fn=dict(type='default_collate'),
dataset=dict(
type=dataset_type,
data_root=data_root,
split='train',
pipeline=train_pipeline))