mirror of
https://github.com/PaddlePaddle/PaddleClas.git
synced 2025-06-03 21:55:06 +08:00
update: update the default gpu num to 8 when using AdamW
This commit is contained in:
parent
fea9522a69
commit
24372cc6e5
@ -42,11 +42,12 @@ Optimizer:
|
||||
no_weight_decay_name: pos_embed cls_token .bias norm
|
||||
one_dim_param_no_weight_decay: True
|
||||
lr:
|
||||
# for 8 cards
|
||||
name: Cosine
|
||||
learning_rate: 1.25e-4
|
||||
eta_min: 1.25e-6
|
||||
learning_rate: 2.5e-4
|
||||
eta_min: 2.5e-6
|
||||
warmup_epoch: 20
|
||||
warmup_start_lr: 1.25e-7
|
||||
warmup_start_lr: 2.5e-7
|
||||
|
||||
|
||||
# data loader for train and eval
|
||||
|
@ -42,11 +42,12 @@ Optimizer:
|
||||
no_weight_decay_name: pos_embed cls_token .bias norm
|
||||
one_dim_param_no_weight_decay: True
|
||||
lr:
|
||||
# for 8 cards
|
||||
name: Cosine
|
||||
learning_rate: 6.25e-5
|
||||
eta_min: 6.25e-7
|
||||
learning_rate: 1.25e-4
|
||||
eta_min: 1.25e-6
|
||||
warmup_epoch: 20
|
||||
warmup_start_lr: 6.25e-8
|
||||
warmup_start_lr: 1.25e-7
|
||||
|
||||
|
||||
# data loader for train and eval
|
||||
|
@ -42,11 +42,12 @@ Optimizer:
|
||||
no_weight_decay_name: pos_embed cls_token .bias norm
|
||||
one_dim_param_no_weight_decay: True
|
||||
lr:
|
||||
# for 8 cards
|
||||
name: Cosine
|
||||
learning_rate: 1.25e-4
|
||||
eta_min: 1.25e-6
|
||||
learning_rate: 2.5e-4
|
||||
eta_min: 2.5e-6
|
||||
warmup_epoch: 20
|
||||
warmup_start_lr: 1.25e-7
|
||||
warmup_start_lr: 2.5e-7
|
||||
|
||||
|
||||
# data loader for train and eval
|
||||
|
@ -42,11 +42,12 @@ Optimizer:
|
||||
no_weight_decay_name: pos_embed cls_token .bias norm
|
||||
one_dim_param_no_weight_decay: True
|
||||
lr:
|
||||
# for 8 cards
|
||||
name: Cosine
|
||||
learning_rate: 3.125e-5
|
||||
eta_min: 3.125e-7
|
||||
learning_rate: 6.25e-5
|
||||
eta_min: 6.25e-7
|
||||
warmup_epoch: 20
|
||||
warmup_start_lr: 3.125e-8
|
||||
warmup_start_lr: 6.25e-8
|
||||
|
||||
|
||||
# data loader for train and eval
|
||||
|
@ -42,11 +42,12 @@ Optimizer:
|
||||
no_weight_decay_name: pos_embed cls_token .bias norm
|
||||
one_dim_param_no_weight_decay: True
|
||||
lr:
|
||||
# for 8 cards
|
||||
name: Cosine
|
||||
learning_rate: 2.5e-4
|
||||
eta_min: 2.5e-6
|
||||
learning_rate: 5e-4
|
||||
eta_min: 5e-6
|
||||
warmup_epoch: 20
|
||||
warmup_start_lr: 2.5e-7
|
||||
warmup_start_lr: 5e-7
|
||||
|
||||
|
||||
# data loader for train and eval
|
||||
|
@ -42,11 +42,12 @@ Optimizer:
|
||||
no_weight_decay_name: pos_embed cls_token .bias norm
|
||||
one_dim_param_no_weight_decay: True
|
||||
lr:
|
||||
# for 8 cards
|
||||
name: Cosine
|
||||
learning_rate: 5e-4
|
||||
eta_min: 5e-6
|
||||
learning_rate: 1e-3
|
||||
eta_min: 1e-5
|
||||
warmup_epoch: 20
|
||||
warmup_start_lr: 5e-7
|
||||
warmup_start_lr: 1e-6
|
||||
|
||||
|
||||
# data loader for train and eval
|
||||
|
@ -40,11 +40,12 @@ Optimizer:
|
||||
no_weight_decay_name: norm cls_token pos_embed dist_token
|
||||
one_dim_param_no_weight_decay: True
|
||||
lr:
|
||||
# for 8 cards
|
||||
name: Cosine
|
||||
learning_rate: 1e-3
|
||||
eta_min: 1e-5
|
||||
learning_rate: 2e-3
|
||||
eta_min: 2e-5
|
||||
warmup_epoch: 5
|
||||
warmup_start_lr: 1e-6
|
||||
warmup_start_lr: 2e-6
|
||||
|
||||
# data loader for train and eval
|
||||
DataLoader:
|
||||
|
@ -40,11 +40,12 @@ Optimizer:
|
||||
no_weight_decay_name: norm cls_token pos_embed dist_token
|
||||
one_dim_param_no_weight_decay: True
|
||||
lr:
|
||||
# for 8 cards
|
||||
name: Cosine
|
||||
learning_rate: 1e-3
|
||||
eta_min: 1e-5
|
||||
learning_rate: 2e-3
|
||||
eta_min: 2e-5
|
||||
warmup_epoch: 5
|
||||
warmup_start_lr: 1e-6
|
||||
warmup_start_lr: 2e-6
|
||||
|
||||
# data loader for train and eval
|
||||
DataLoader:
|
||||
|
@ -40,11 +40,12 @@ Optimizer:
|
||||
no_weight_decay_name: norm cls_token pos_embed dist_token
|
||||
one_dim_param_no_weight_decay: True
|
||||
lr:
|
||||
# for 8 cards
|
||||
name: Cosine
|
||||
learning_rate: 1e-3
|
||||
eta_min: 1e-5
|
||||
learning_rate: 2e-3
|
||||
eta_min: 2e-5
|
||||
warmup_epoch: 5
|
||||
warmup_start_lr: 1e-6
|
||||
warmup_start_lr: 2e-6
|
||||
|
||||
# data loader for train and eval
|
||||
DataLoader:
|
||||
|
@ -40,11 +40,12 @@ Optimizer:
|
||||
no_weight_decay_name: norm cls_token pos_embed dist_token
|
||||
one_dim_param_no_weight_decay: True
|
||||
lr:
|
||||
# for 8 cards
|
||||
name: Cosine
|
||||
learning_rate: 1e-3
|
||||
eta_min: 1e-5
|
||||
learning_rate: 2e-3
|
||||
eta_min: 2e-5
|
||||
warmup_epoch: 5
|
||||
warmup_start_lr: 1e-6
|
||||
warmup_start_lr: 2e-6
|
||||
|
||||
# data loader for train and eval
|
||||
DataLoader:
|
||||
|
@ -41,10 +41,10 @@ Optimizer:
|
||||
one_dim_param_no_weight_decay: True
|
||||
lr:
|
||||
name: Cosine
|
||||
learning_rate: 1e-3
|
||||
eta_min: 1e-5
|
||||
learning_rate: 2e-3
|
||||
eta_min: 2e-5
|
||||
warmup_epoch: 5
|
||||
warmup_start_lr: 1e-6
|
||||
warmup_start_lr: 2e-6
|
||||
|
||||
# data loader for train and eval
|
||||
DataLoader:
|
||||
|
@ -40,11 +40,12 @@ Optimizer:
|
||||
no_weight_decay_name: norm cls_token pos_embed dist_token
|
||||
one_dim_param_no_weight_decay: True
|
||||
lr:
|
||||
# for 8 cards
|
||||
name: Cosine
|
||||
learning_rate: 1e-3
|
||||
eta_min: 1e-5
|
||||
learning_rate: 2e-3
|
||||
eta_min: 2e-5
|
||||
warmup_epoch: 5
|
||||
warmup_start_lr: 1e-6
|
||||
warmup_start_lr: 2e-6
|
||||
|
||||
# data loader for train and eval
|
||||
DataLoader:
|
||||
|
@ -40,11 +40,12 @@ Optimizer:
|
||||
no_weight_decay_name: norm cls_token pos_embed dist_token
|
||||
one_dim_param_no_weight_decay: True
|
||||
lr:
|
||||
# for 8 cards
|
||||
name: Cosine
|
||||
learning_rate: 1e-3
|
||||
eta_min: 1e-5
|
||||
learning_rate: 2e-3
|
||||
eta_min: 2e-5
|
||||
warmup_epoch: 5
|
||||
warmup_start_lr: 1e-6
|
||||
warmup_start_lr: 2e-6
|
||||
|
||||
# data loader for train and eval
|
||||
DataLoader:
|
||||
|
@ -40,11 +40,12 @@ Optimizer:
|
||||
no_weight_decay_name: norm cls_token pos_embed dist_token
|
||||
one_dim_param_no_weight_decay: True
|
||||
lr:
|
||||
# for 8 cards
|
||||
name: Cosine
|
||||
learning_rate: 1e-3
|
||||
eta_min: 1e-5
|
||||
learning_rate: 2e-3
|
||||
eta_min: 2e-5
|
||||
warmup_epoch: 5
|
||||
warmup_start_lr: 1e-6
|
||||
warmup_start_lr: 2e-6
|
||||
|
||||
# data loader for train and eval
|
||||
DataLoader:
|
||||
|
@ -43,11 +43,12 @@ Optimizer:
|
||||
no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token
|
||||
one_dim_param_no_weight_decay: True
|
||||
lr:
|
||||
# for 8 cards
|
||||
name: Cosine
|
||||
learning_rate: 5e-4
|
||||
eta_min: 5e-6
|
||||
learning_rate: 1e-3
|
||||
eta_min: 1e-5
|
||||
warmup_epoch: 20
|
||||
warmup_start_lr: 5e-7
|
||||
warmup_start_lr: 1e-6
|
||||
|
||||
|
||||
# data loader for train and eval
|
||||
|
@ -43,11 +43,12 @@ Optimizer:
|
||||
no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token
|
||||
one_dim_param_no_weight_decay: True
|
||||
lr:
|
||||
# for 8 cards
|
||||
name: Cosine
|
||||
learning_rate: 5e-4
|
||||
eta_min: 5e-6
|
||||
learning_rate: 1e-3
|
||||
eta_min: 1e-5
|
||||
warmup_epoch: 20
|
||||
warmup_start_lr: 5e-7
|
||||
warmup_start_lr: 1e-6
|
||||
|
||||
|
||||
# data loader for train and eval
|
||||
|
@ -43,11 +43,12 @@ Optimizer:
|
||||
no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token
|
||||
one_dim_param_no_weight_decay: True
|
||||
lr:
|
||||
# for 8 cards
|
||||
name: Cosine
|
||||
learning_rate: 5e-4
|
||||
eta_min: 5e-6
|
||||
learning_rate: 1e-3
|
||||
eta_min: 1e-5
|
||||
warmup_epoch: 20
|
||||
warmup_start_lr: 5e-7
|
||||
warmup_start_lr: 1e-6
|
||||
|
||||
|
||||
# data loader for train and eval
|
||||
|
@ -43,11 +43,12 @@ Optimizer:
|
||||
no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token
|
||||
one_dim_param_no_weight_decay: True
|
||||
lr:
|
||||
# for 8 cards
|
||||
name: Cosine
|
||||
learning_rate: 5e-4
|
||||
eta_min: 5e-6
|
||||
learning_rate: 1e-3
|
||||
eta_min: 1e-5
|
||||
warmup_epoch: 20
|
||||
warmup_start_lr: 5e-7
|
||||
warmup_start_lr: 1e-6
|
||||
|
||||
|
||||
# data loader for train and eval
|
||||
|
@ -44,11 +44,12 @@ Optimizer:
|
||||
no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token
|
||||
one_dim_param_no_weight_decay: True
|
||||
lr:
|
||||
# for 8 cards
|
||||
name: Cosine
|
||||
learning_rate: 5e-4
|
||||
eta_min: 5e-6
|
||||
learning_rate: 1e-3
|
||||
eta_min: 1e-5
|
||||
warmup_epoch: 20
|
||||
warmup_start_lr: 5e-7
|
||||
warmup_start_lr: 1e-6
|
||||
|
||||
|
||||
# data loader for train and eval
|
||||
|
@ -44,11 +44,12 @@ Optimizer:
|
||||
no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token
|
||||
one_dim_param_no_weight_decay: True
|
||||
lr:
|
||||
# for 8 cards
|
||||
name: Cosine
|
||||
learning_rate: 5e-4
|
||||
eta_min: 5e-6
|
||||
learning_rate: 1e-3
|
||||
eta_min: 1e-5
|
||||
warmup_epoch: 20
|
||||
warmup_start_lr: 5e-7
|
||||
warmup_start_lr: 1e-6
|
||||
|
||||
|
||||
# data loader for train and eval
|
||||
|
@ -44,11 +44,12 @@ Optimizer:
|
||||
no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token
|
||||
one_dim_param_no_weight_decay: True
|
||||
lr:
|
||||
# for 8 cards
|
||||
name: Cosine
|
||||
learning_rate: 5e-4
|
||||
eta_min: 5e-6
|
||||
learning_rate: 1e-3
|
||||
eta_min: 1e-5
|
||||
warmup_epoch: 20
|
||||
warmup_start_lr: 5e-7
|
||||
warmup_start_lr: 1e-6
|
||||
|
||||
|
||||
# data loader for train and eval
|
||||
|
@ -41,11 +41,12 @@ Optimizer:
|
||||
no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm
|
||||
one_dim_param_no_weight_decay: True
|
||||
lr:
|
||||
# for 8 cards
|
||||
name: Cosine
|
||||
learning_rate: 5e-4
|
||||
eta_min: 1e-5
|
||||
learning_rate: 1e-3
|
||||
eta_min: 2e-5
|
||||
warmup_epoch: 20
|
||||
warmup_start_lr: 1e-6
|
||||
warmup_start_lr: 2e-6
|
||||
|
||||
|
||||
# data loader for train and eval
|
||||
|
@ -41,11 +41,12 @@ Optimizer:
|
||||
no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm
|
||||
one_dim_param_no_weight_decay: True
|
||||
lr:
|
||||
# for 8 cards
|
||||
name: Cosine
|
||||
learning_rate: 5e-4
|
||||
eta_min: 1e-5
|
||||
learning_rate: 1e-3
|
||||
eta_min: 2e-5
|
||||
warmup_epoch: 20
|
||||
warmup_start_lr: 1e-6
|
||||
warmup_start_lr: 2e-6
|
||||
|
||||
|
||||
# data loader for train and eval
|
||||
|
@ -41,11 +41,12 @@ Optimizer:
|
||||
no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm
|
||||
one_dim_param_no_weight_decay: True
|
||||
lr:
|
||||
# for 8 cards
|
||||
name: Cosine
|
||||
learning_rate: 5e-4
|
||||
eta_min: 1e-5
|
||||
learning_rate: 1e-3
|
||||
eta_min: 2e-5
|
||||
warmup_epoch: 20
|
||||
warmup_start_lr: 1e-6
|
||||
warmup_start_lr: 2e-6
|
||||
|
||||
|
||||
# data loader for train and eval
|
||||
|
@ -41,11 +41,12 @@ Optimizer:
|
||||
no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm
|
||||
one_dim_param_no_weight_decay: True
|
||||
lr:
|
||||
# for 8 cards
|
||||
name: Cosine
|
||||
learning_rate: 5e-4
|
||||
eta_min: 1e-5
|
||||
learning_rate: 1e-3
|
||||
eta_min: 2e-5
|
||||
warmup_epoch: 20
|
||||
warmup_start_lr: 1e-6
|
||||
warmup_start_lr: 2e-6
|
||||
|
||||
|
||||
# data loader for train and eval
|
||||
|
@ -41,11 +41,12 @@ Optimizer:
|
||||
no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm
|
||||
one_dim_param_no_weight_decay: True
|
||||
lr:
|
||||
# for 8 cards
|
||||
name: Cosine
|
||||
learning_rate: 5e-4
|
||||
eta_min: 1e-5
|
||||
learning_rate: 1e-3
|
||||
eta_min: 2e-5
|
||||
warmup_epoch: 20
|
||||
warmup_start_lr: 1e-6
|
||||
warmup_start_lr: 2e-6
|
||||
|
||||
|
||||
# data loader for train and eval
|
||||
|
@ -41,11 +41,12 @@ Optimizer:
|
||||
no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm
|
||||
one_dim_param_no_weight_decay: True
|
||||
lr:
|
||||
# for 8 cards
|
||||
name: Cosine
|
||||
learning_rate: 5e-4
|
||||
eta_min: 1e-5
|
||||
learning_rate: 1e-3
|
||||
eta_min: 2e-5
|
||||
warmup_epoch: 20
|
||||
warmup_start_lr: 1e-6
|
||||
warmup_start_lr: 2e-6
|
||||
|
||||
|
||||
# data loader for train and eval
|
||||
|
@ -43,11 +43,12 @@ Optimizer:
|
||||
no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block
|
||||
one_dim_param_no_weight_decay: True
|
||||
lr:
|
||||
# for 8 cards
|
||||
name: Cosine
|
||||
learning_rate: 5e-4
|
||||
eta_min: 1e-5
|
||||
learning_rate: 1e-3
|
||||
eta_min: 2e-5
|
||||
warmup_epoch: 5
|
||||
warmup_start_lr: 1e-6
|
||||
warmup_start_lr: 2e-6
|
||||
|
||||
|
||||
# data loader for train and eval
|
||||
|
@ -43,11 +43,12 @@ Optimizer:
|
||||
no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block
|
||||
one_dim_param_no_weight_decay: True
|
||||
lr:
|
||||
# for 8 cards
|
||||
name: Cosine
|
||||
learning_rate: 5e-4
|
||||
eta_min: 1e-5
|
||||
learning_rate: 1e-3
|
||||
eta_min: 2e-5
|
||||
warmup_epoch: 5
|
||||
warmup_start_lr: 1e-6
|
||||
warmup_start_lr: 2e-6
|
||||
|
||||
|
||||
# data loader for train and eval
|
||||
|
@ -43,11 +43,12 @@ Optimizer:
|
||||
no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block
|
||||
one_dim_param_no_weight_decay: True
|
||||
lr:
|
||||
# for 8 cards
|
||||
name: Cosine
|
||||
learning_rate: 5e-4
|
||||
eta_min: 1e-5
|
||||
learning_rate: 1e-3
|
||||
eta_min: 2e-5
|
||||
warmup_epoch: 5
|
||||
warmup_start_lr: 1e-6
|
||||
warmup_start_lr: 2e-6
|
||||
|
||||
|
||||
# data loader for train and eval
|
||||
|
@ -43,11 +43,12 @@ Optimizer:
|
||||
no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block
|
||||
one_dim_param_no_weight_decay: True
|
||||
lr:
|
||||
# for 8 cards
|
||||
name: Cosine
|
||||
learning_rate: 5e-4
|
||||
eta_min: 1e-5
|
||||
learning_rate: 1e-3
|
||||
eta_min: 2e-5
|
||||
warmup_epoch: 5
|
||||
warmup_start_lr: 1e-6
|
||||
warmup_start_lr: 2e-6
|
||||
|
||||
|
||||
# data loader for train and eval
|
||||
|
@ -43,11 +43,12 @@ Optimizer:
|
||||
no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block
|
||||
one_dim_param_no_weight_decay: True
|
||||
lr:
|
||||
# for 8 cards
|
||||
name: Cosine
|
||||
learning_rate: 5e-4
|
||||
eta_min: 1e-5
|
||||
learning_rate: 1e-3
|
||||
eta_min: 2e-5
|
||||
warmup_epoch: 5
|
||||
warmup_start_lr: 1e-6
|
||||
warmup_start_lr: 2e-6
|
||||
|
||||
|
||||
# data loader for train and eval
|
||||
|
@ -43,11 +43,12 @@ Optimizer:
|
||||
no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block
|
||||
one_dim_param_no_weight_decay: True
|
||||
lr:
|
||||
# for 8 cards
|
||||
name: Cosine
|
||||
learning_rate: 5e-4
|
||||
eta_min: 1e-5
|
||||
learning_rate: 1e-3
|
||||
eta_min: 2e-5
|
||||
warmup_epoch: 5
|
||||
warmup_start_lr: 1e-6
|
||||
warmup_start_lr: 2e-6
|
||||
|
||||
|
||||
# data loader for train and eval
|
||||
|
@ -250,12 +250,17 @@ class Engine(object):
|
||||
level=amp_level,
|
||||
save_dtype='float32')
|
||||
|
||||
# for distributed
|
||||
# check the gpu num
|
||||
world_size = dist.get_world_size()
|
||||
self.config["Global"]["distributed"] = world_size != 1
|
||||
if world_size != 4 and self.mode == "train":
|
||||
msg = f"The training strategy in config files provided by PaddleClas is based on 4 gpus. But the number of gpus is {world_size} in current training. Please modify the stategy (learning rate, batch size and so on) if use config files in PaddleClas to train."
|
||||
logger.warning(msg)
|
||||
if self.mode == "train":
|
||||
std_gpu_num = 8 if self.config["Optimizer"][
|
||||
"name"] == "AdamW" else 4
|
||||
if world_size != std_gpu_num:
|
||||
msg = f"The training strategy provided by PaddleClas is based on {std_gpu_num} gpus. But the number of gpu is {world_size} in current training. Please modify the stategy (learning rate, batch size and so on) if use this config to train."
|
||||
logger.warning(msg)
|
||||
|
||||
# for distributed
|
||||
if self.config["Global"]["distributed"]:
|
||||
dist.init_parallel_env()
|
||||
self.model = paddle.DataParallel(self.model)
|
||||
|
Loading…
x
Reference in New Issue
Block a user