diff --git a/mmpretrain/configs/_base_/datasets/imagenet_bs32.py b/mmpretrain/configs/_base_/datasets/imagenet_bs32.py index 7d074008..63e2d975 100644 --- a/mmpretrain/configs/_base_/datasets/imagenet_bs32.py +++ b/mmpretrain/configs/_base_/datasets/imagenet_bs32.py @@ -38,7 +38,7 @@ train_dataloader = dict( dataset=dict( type=dataset_type, data_root='data/imagenet', - ann_file='meta/train.txt', + # ann_file='meta/train.txt', data_prefix='train', pipeline=train_pipeline), sampler=dict(type=DefaultSampler, shuffle=True), @@ -50,7 +50,7 @@ val_dataloader = dict( dataset=dict( type=dataset_type, data_root='data/imagenet', - ann_file='meta/val.txt', + # ann_file='meta/val.txt', data_prefix='val', pipeline=test_pipeline), sampler=dict(type=DefaultSampler, shuffle=False), diff --git a/mmpretrain/configs/_base_/datasets/imagenet_bs64_pil_resize_autoaug.py b/mmpretrain/configs/_base_/datasets/imagenet_bs64_pil_resize_autoaug.py index 5fc13e72..a62b973b 100644 --- a/mmpretrain/configs/_base_/datasets/imagenet_bs64_pil_resize_autoaug.py +++ b/mmpretrain/configs/_base_/datasets/imagenet_bs64_pil_resize_autoaug.py @@ -33,7 +33,7 @@ train_pipeline = [ scale=224, backend='pillow', interpolation='bicubic'), - dict(type='RandomFlip', prob=0.5, direction='horizontal'), + dict(type=RandomFlip, prob=0.5, direction='horizontal'), dict( type=AutoAugment, policies='imagenet', diff --git a/mmpretrain/configs/vision_transformer/vit_base_p16_32xb128_mae_in1k.py b/mmpretrain/configs/vision_transformer/vit_base_p16_32xb128_mae_in1k.py index 20487ce7..4d292253 100644 --- a/mmpretrain/configs/vision_transformer/vit_base_p16_32xb128_mae_in1k.py +++ b/mmpretrain/configs/vision_transformer/vit_base_p16_32xb128_mae_in1k.py @@ -40,10 +40,10 @@ model = dict( ])) # dataset settings -train_dataloader = dict(batch_size=128) +train_dataloader.update(batch_size=128) # schedule settings -optim_wrapper = dict( +optim_wrapper.update( optimizer=dict( type=AdamW, lr=1e-4 * 4096 / 256, @@ -64,4 +64,4 @@ custom_hooks = [dict(type=EMAHook, momentum=1e-4)] # NOTE: `auto_scale_lr` is for automatically scaling LR # based on the actual training batch size. # base_batch_size = (32 GPUs) x (128 samples per GPU) -auto_scale_lr = dict(base_batch_size=4096) \ No newline at end of file +auto_scale_lr.update(base_batch_size=4096) \ No newline at end of file diff --git a/mmpretrain/configs/vision_transformer/vit_base_p16_64xb64_in1k.py b/mmpretrain/configs/vision_transformer/vit_base_p16_64xb64_in1k.py index 8ee006a1..e5ce582e 100644 --- a/mmpretrain/configs/vision_transformer/vit_base_p16_64xb64_in1k.py +++ b/mmpretrain/configs/vision_transformer/vit_base_p16_64xb64_in1k.py @@ -11,10 +11,10 @@ with read_base(): from .._base_.default_runtime import * # model setting -model = dict( +model.update( head=dict(hidden_dim=3072), train_cfg=dict(augments=dict(type=Mixup, alpha=0.2)), ) # schedule setting -optim_wrapper = dict(clip_grad=dict(max_norm=1.0)) \ No newline at end of file +optim_wrapper.update(clip_grad=dict(max_norm=1.0)) \ No newline at end of file diff --git a/mmpretrain/configs/vision_transformer/vit_base_p16_64xb64_in1k_384px.py b/mmpretrain/configs/vision_transformer/vit_base_p16_64xb64_in1k_384px.py index fa060040..ba55b6d1 100644 --- a/mmpretrain/configs/vision_transformer/vit_base_p16_64xb64_in1k_384px.py +++ b/mmpretrain/configs/vision_transformer/vit_base_p16_64xb64_in1k_384px.py @@ -16,10 +16,10 @@ with read_base(): # model setting -model = dict(backbone=dict(img_size=384)) +model.update(backbone=dict(img_size=384)) # dataset setting -data_preprocessor = dict( +data_preprocessor.update( mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5], # convert image from BGR to RGB @@ -40,9 +40,9 @@ test_pipeline = [ dict(type=PackInputs), ] -train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) -val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) -test_dataloader = dict(dataset=dict(pipeline=test_pipeline)) +train_dataloader.update(dataset=dict(pipeline=train_pipeline)) +val_dataloader.update(dataset=dict(pipeline=test_pipeline)) +test_dataloader.update(dataset=dict(pipeline=test_pipeline)) # schedule setting -optim_wrapper = dict(clip_grad=dict(max_norm=1.0)) \ No newline at end of file +optim_wrapper.update(clip_grad=dict(max_norm=1.0)) \ No newline at end of file diff --git a/mmpretrain/configs/vision_transformer/vit_base_p32_64xb64_in1k.py b/mmpretrain/configs/vision_transformer/vit_base_p32_64xb64_in1k.py index b935169f..9758bc3c 100644 --- a/mmpretrain/configs/vision_transformer/vit_base_p32_64xb64_in1k.py +++ b/mmpretrain/configs/vision_transformer/vit_base_p32_64xb64_in1k.py @@ -12,10 +12,10 @@ with read_base(): # model setting -model = dict( +model.update( head=dict(hidden_dim=3072), train_cfg=dict(augments=dict(type=Mixup, alpha=0.2)), ) # schedule setting -optim_wrapper = dict(clip_grad=dict(max_norm=1.0)) \ No newline at end of file +optim_wrapper.update(clip_grad=dict(max_norm=1.0)) \ No newline at end of file diff --git a/mmpretrain/configs/vision_transformer/vit_base_p32_64xb64_in1k_384px.py b/mmpretrain/configs/vision_transformer/vit_base_p32_64xb64_in1k_384px.py index 0372369a..e194cec1 100644 --- a/mmpretrain/configs/vision_transformer/vit_base_p32_64xb64_in1k_384px.py +++ b/mmpretrain/configs/vision_transformer/vit_base_p32_64xb64_in1k_384px.py @@ -15,10 +15,10 @@ with read_base(): from .._base_.default_runtime import * # model setting -model = dict(backbone=dict(img_size=384)) +model.update(backbone=dict(img_size=384)) # dataset setting -data_preprocessor = dict( +data_preprocessor.update( mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5], # convert image from BGR to RGB @@ -39,9 +39,9 @@ test_pipeline = [ dict(type=PackInputs), ] -train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) -val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) -test_dataloader = dict(dataset=dict(pipeline=test_pipeline)) +train_dataloader.update(dataset=dict(pipeline=train_pipeline)) +val_dataloader.update(dataset=dict(pipeline=test_pipeline)) +test_dataloader.update(dataset=dict(pipeline=test_pipeline)) # schedule setting -optim_wrapper = dict(clip_grad=dict(max_norm=1.0)) \ No newline at end of file +optim_wrapper.update(clip_grad=dict(max_norm=1.0)) \ No newline at end of file diff --git a/mmpretrain/configs/vision_transformer/vit_large_p16_64xb64_in1k.py b/mmpretrain/configs/vision_transformer/vit_large_p16_64xb64_in1k.py index bf64f536..11265802 100644 --- a/mmpretrain/configs/vision_transformer/vit_large_p16_64xb64_in1k.py +++ b/mmpretrain/configs/vision_transformer/vit_large_p16_64xb64_in1k.py @@ -11,10 +11,10 @@ with read_base(): # model setting -model = dict( +model.update( head=dict(hidden_dim=3072), train_cfg=dict(augments=dict(type=Mixup, alpha=0.2)), ) # schedule setting -optim_wrapper = dict(clip_grad=dict(max_norm=1.0)) \ No newline at end of file +optim_wrapper.update(clip_grad=dict(max_norm=1.0)) \ No newline at end of file diff --git a/mmpretrain/configs/vision_transformer/vit_large_p16_64xb64_in1k_384px.py b/mmpretrain/configs/vision_transformer/vit_large_p16_64xb64_in1k_384px.py index e75a9cd4..247e0bb3 100644 --- a/mmpretrain/configs/vision_transformer/vit_large_p16_64xb64_in1k_384px.py +++ b/mmpretrain/configs/vision_transformer/vit_large_p16_64xb64_in1k_384px.py @@ -16,10 +16,10 @@ with read_base(): # model setting -model = dict(backbone=dict(img_size=384)) +model.update(backbone=dict(img_size=384)) # dataset setting -data_preprocessor = dict( +data_preprocessor.update( mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5], # convert image from BGR to RGB @@ -40,9 +40,9 @@ test_pipeline = [ dict(type=PackInputs), ] -train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) -val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) -test_dataloader = dict(dataset=dict(pipeline=test_pipeline)) +train_dataloader.update(dataset=dict(pipeline=train_pipeline)) +val_dataloader.update(dataset=dict(pipeline=test_pipeline)) +test_dataloader.update(dataset=dict(pipeline=test_pipeline)) # schedule setting -optim_wrapper = dict(clip_grad=dict(max_norm=1.0)) \ No newline at end of file +optim_wrapper.update(clip_grad=dict(max_norm=1.0)) \ No newline at end of file diff --git a/mmpretrain/configs/vision_transformer/vit_large_p32_64xb64_in1k.py b/mmpretrain/configs/vision_transformer/vit_large_p32_64xb64_in1k.py index 5172f030..fa8f530f 100644 --- a/mmpretrain/configs/vision_transformer/vit_large_p32_64xb64_in1k.py +++ b/mmpretrain/configs/vision_transformer/vit_large_p32_64xb64_in1k.py @@ -10,10 +10,10 @@ with read_base(): from .._base_.default_runtime import * # model setting -model = dict( +model.update( head=dict(hidden_dim=3072), train_cfg=dict(augments=dict(type=Mixup, alpha=0.2)), ) # schedule setting -optim_wrapper = dict(clip_grad=dict(max_norm=1.0)) \ No newline at end of file +optim_wrapper.update(clip_grad=dict(max_norm=1.0)) \ No newline at end of file diff --git a/mmpretrain/configs/vision_transformer/vit_large_p32_64xb64_in1k_384px.py b/mmpretrain/configs/vision_transformer/vit_large_p32_64xb64_in1k_384px.py index 5b41240e..4819705b 100644 --- a/mmpretrain/configs/vision_transformer/vit_large_p32_64xb64_in1k_384px.py +++ b/mmpretrain/configs/vision_transformer/vit_large_p32_64xb64_in1k_384px.py @@ -13,7 +13,7 @@ with read_base(): from .._base_.default_runtime import * # model setting -model = dict(backbone=dict(img_size=384)) +model.update(backbone=dict(img_size=384)) # dataset setting data_preprocessor = dict( @@ -37,9 +37,9 @@ test_pipeline = [ dict(type=PackInputs), ] -train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) -val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) -test_dataloader = dict(dataset=dict(pipeline=test_pipeline)) +train_dataloader.update(dataset=dict(pipeline=train_pipeline)) +val_dataloader.update(dataset=dict(pipeline=test_pipeline)) +test_dataloader.update(dataset=dict(pipeline=test_pipeline)) # schedule setting -optim_wrapper = dict(clip_grad=dict(max_norm=1.0)) \ No newline at end of file +optim_wrapper.update(clip_grad=dict(max_norm=1.0)) \ No newline at end of file diff --git a/tools/train.py b/tools/train.py index fce6a3c6..046e9d66 100644 --- a/tools/train.py +++ b/tools/train.py @@ -146,8 +146,17 @@ def main(): # load config cfg = Config.fromfile(args.config) - cfg.train_dataloader.dataset.data_root = 'xyz' - cfg.val_dataloader.dataset.data_root = 'xyz' + # print('default train data root: ', cfg.train_dataloader.dataset.data_root) + # print('default val data root: ', cfg.val_dataloader.dataset.data_root) + + cfg.train_dataloader.dataset.data_root = '/home/zeyuan.yin/imagenet' + cfg.val_dataloader.dataset.data_root = '/home/zeyuan.yin/imagenet' + + print('dataset cfg', cfg.train_dataloader.dataset) + print('---') + # print('model cfg', cfg.model) + # print('optim_wrapper cfg', cfg.optim_wrapper) + exit() # merge cli arguments to config cfg = merge_args(cfg, args)