mirror of
https://github.com/PaddlePaddle/PaddleClas.git
synced 2025-06-03 21:55:06 +08:00
update: update the default gpu num to 8 when using AdamW
This commit is contained in:
parent
fea9522a69
commit
24372cc6e5
@ -42,11 +42,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: pos_embed cls_token .bias norm
|
no_weight_decay_name: pos_embed cls_token .bias norm
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 1.25e-4
|
learning_rate: 2.5e-4
|
||||||
eta_min: 1.25e-6
|
eta_min: 2.5e-6
|
||||||
warmup_epoch: 20
|
warmup_epoch: 20
|
||||||
warmup_start_lr: 1.25e-7
|
warmup_start_lr: 2.5e-7
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
|
@ -42,11 +42,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: pos_embed cls_token .bias norm
|
no_weight_decay_name: pos_embed cls_token .bias norm
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 6.25e-5
|
learning_rate: 1.25e-4
|
||||||
eta_min: 6.25e-7
|
eta_min: 1.25e-6
|
||||||
warmup_epoch: 20
|
warmup_epoch: 20
|
||||||
warmup_start_lr: 6.25e-8
|
warmup_start_lr: 1.25e-7
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
|
@ -42,11 +42,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: pos_embed cls_token .bias norm
|
no_weight_decay_name: pos_embed cls_token .bias norm
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 1.25e-4
|
learning_rate: 2.5e-4
|
||||||
eta_min: 1.25e-6
|
eta_min: 2.5e-6
|
||||||
warmup_epoch: 20
|
warmup_epoch: 20
|
||||||
warmup_start_lr: 1.25e-7
|
warmup_start_lr: 2.5e-7
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
|
@ -42,11 +42,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: pos_embed cls_token .bias norm
|
no_weight_decay_name: pos_embed cls_token .bias norm
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 3.125e-5
|
learning_rate: 6.25e-5
|
||||||
eta_min: 3.125e-7
|
eta_min: 6.25e-7
|
||||||
warmup_epoch: 20
|
warmup_epoch: 20
|
||||||
warmup_start_lr: 3.125e-8
|
warmup_start_lr: 6.25e-8
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
|
@ -42,11 +42,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: pos_embed cls_token .bias norm
|
no_weight_decay_name: pos_embed cls_token .bias norm
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 2.5e-4
|
learning_rate: 5e-4
|
||||||
eta_min: 2.5e-6
|
eta_min: 5e-6
|
||||||
warmup_epoch: 20
|
warmup_epoch: 20
|
||||||
warmup_start_lr: 2.5e-7
|
warmup_start_lr: 5e-7
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
|
@ -42,11 +42,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: pos_embed cls_token .bias norm
|
no_weight_decay_name: pos_embed cls_token .bias norm
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 5e-4
|
learning_rate: 1e-3
|
||||||
eta_min: 5e-6
|
eta_min: 1e-5
|
||||||
warmup_epoch: 20
|
warmup_epoch: 20
|
||||||
warmup_start_lr: 5e-7
|
warmup_start_lr: 1e-6
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
|
@ -40,11 +40,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: norm cls_token pos_embed dist_token
|
no_weight_decay_name: norm cls_token pos_embed dist_token
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 1e-3
|
learning_rate: 2e-3
|
||||||
eta_min: 1e-5
|
eta_min: 2e-5
|
||||||
warmup_epoch: 5
|
warmup_epoch: 5
|
||||||
warmup_start_lr: 1e-6
|
warmup_start_lr: 2e-6
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
DataLoader:
|
DataLoader:
|
||||||
|
@ -40,11 +40,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: norm cls_token pos_embed dist_token
|
no_weight_decay_name: norm cls_token pos_embed dist_token
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 1e-3
|
learning_rate: 2e-3
|
||||||
eta_min: 1e-5
|
eta_min: 2e-5
|
||||||
warmup_epoch: 5
|
warmup_epoch: 5
|
||||||
warmup_start_lr: 1e-6
|
warmup_start_lr: 2e-6
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
DataLoader:
|
DataLoader:
|
||||||
|
@ -40,11 +40,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: norm cls_token pos_embed dist_token
|
no_weight_decay_name: norm cls_token pos_embed dist_token
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 1e-3
|
learning_rate: 2e-3
|
||||||
eta_min: 1e-5
|
eta_min: 2e-5
|
||||||
warmup_epoch: 5
|
warmup_epoch: 5
|
||||||
warmup_start_lr: 1e-6
|
warmup_start_lr: 2e-6
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
DataLoader:
|
DataLoader:
|
||||||
|
@ -40,11 +40,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: norm cls_token pos_embed dist_token
|
no_weight_decay_name: norm cls_token pos_embed dist_token
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 1e-3
|
learning_rate: 2e-3
|
||||||
eta_min: 1e-5
|
eta_min: 2e-5
|
||||||
warmup_epoch: 5
|
warmup_epoch: 5
|
||||||
warmup_start_lr: 1e-6
|
warmup_start_lr: 2e-6
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
DataLoader:
|
DataLoader:
|
||||||
|
@ -41,10 +41,10 @@ Optimizer:
|
|||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 1e-3
|
learning_rate: 2e-3
|
||||||
eta_min: 1e-5
|
eta_min: 2e-5
|
||||||
warmup_epoch: 5
|
warmup_epoch: 5
|
||||||
warmup_start_lr: 1e-6
|
warmup_start_lr: 2e-6
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
DataLoader:
|
DataLoader:
|
||||||
|
@ -40,11 +40,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: norm cls_token pos_embed dist_token
|
no_weight_decay_name: norm cls_token pos_embed dist_token
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 1e-3
|
learning_rate: 2e-3
|
||||||
eta_min: 1e-5
|
eta_min: 2e-5
|
||||||
warmup_epoch: 5
|
warmup_epoch: 5
|
||||||
warmup_start_lr: 1e-6
|
warmup_start_lr: 2e-6
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
DataLoader:
|
DataLoader:
|
||||||
|
@ -40,11 +40,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: norm cls_token pos_embed dist_token
|
no_weight_decay_name: norm cls_token pos_embed dist_token
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 1e-3
|
learning_rate: 2e-3
|
||||||
eta_min: 1e-5
|
eta_min: 2e-5
|
||||||
warmup_epoch: 5
|
warmup_epoch: 5
|
||||||
warmup_start_lr: 1e-6
|
warmup_start_lr: 2e-6
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
DataLoader:
|
DataLoader:
|
||||||
|
@ -40,11 +40,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: norm cls_token pos_embed dist_token
|
no_weight_decay_name: norm cls_token pos_embed dist_token
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 1e-3
|
learning_rate: 2e-3
|
||||||
eta_min: 1e-5
|
eta_min: 2e-5
|
||||||
warmup_epoch: 5
|
warmup_epoch: 5
|
||||||
warmup_start_lr: 1e-6
|
warmup_start_lr: 2e-6
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
DataLoader:
|
DataLoader:
|
||||||
|
@ -43,11 +43,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token
|
no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 5e-4
|
learning_rate: 1e-3
|
||||||
eta_min: 5e-6
|
eta_min: 1e-5
|
||||||
warmup_epoch: 20
|
warmup_epoch: 20
|
||||||
warmup_start_lr: 5e-7
|
warmup_start_lr: 1e-6
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
|
@ -43,11 +43,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token
|
no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 5e-4
|
learning_rate: 1e-3
|
||||||
eta_min: 5e-6
|
eta_min: 1e-5
|
||||||
warmup_epoch: 20
|
warmup_epoch: 20
|
||||||
warmup_start_lr: 5e-7
|
warmup_start_lr: 1e-6
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
|
@ -43,11 +43,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token
|
no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 5e-4
|
learning_rate: 1e-3
|
||||||
eta_min: 5e-6
|
eta_min: 1e-5
|
||||||
warmup_epoch: 20
|
warmup_epoch: 20
|
||||||
warmup_start_lr: 5e-7
|
warmup_start_lr: 1e-6
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
|
@ -43,11 +43,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token
|
no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 5e-4
|
learning_rate: 1e-3
|
||||||
eta_min: 5e-6
|
eta_min: 1e-5
|
||||||
warmup_epoch: 20
|
warmup_epoch: 20
|
||||||
warmup_start_lr: 5e-7
|
warmup_start_lr: 1e-6
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
|
@ -44,11 +44,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token
|
no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 5e-4
|
learning_rate: 1e-3
|
||||||
eta_min: 5e-6
|
eta_min: 1e-5
|
||||||
warmup_epoch: 20
|
warmup_epoch: 20
|
||||||
warmup_start_lr: 5e-7
|
warmup_start_lr: 1e-6
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
|
@ -44,11 +44,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token
|
no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 5e-4
|
learning_rate: 1e-3
|
||||||
eta_min: 5e-6
|
eta_min: 1e-5
|
||||||
warmup_epoch: 20
|
warmup_epoch: 20
|
||||||
warmup_start_lr: 5e-7
|
warmup_start_lr: 1e-6
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
|
@ -44,11 +44,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token
|
no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 5e-4
|
learning_rate: 1e-3
|
||||||
eta_min: 5e-6
|
eta_min: 1e-5
|
||||||
warmup_epoch: 20
|
warmup_epoch: 20
|
||||||
warmup_start_lr: 5e-7
|
warmup_start_lr: 1e-6
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
|
@ -41,11 +41,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm
|
no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 5e-4
|
learning_rate: 1e-3
|
||||||
eta_min: 1e-5
|
eta_min: 2e-5
|
||||||
warmup_epoch: 20
|
warmup_epoch: 20
|
||||||
warmup_start_lr: 1e-6
|
warmup_start_lr: 2e-6
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
|
@ -41,11 +41,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm
|
no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 5e-4
|
learning_rate: 1e-3
|
||||||
eta_min: 1e-5
|
eta_min: 2e-5
|
||||||
warmup_epoch: 20
|
warmup_epoch: 20
|
||||||
warmup_start_lr: 1e-6
|
warmup_start_lr: 2e-6
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
|
@ -41,11 +41,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm
|
no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 5e-4
|
learning_rate: 1e-3
|
||||||
eta_min: 1e-5
|
eta_min: 2e-5
|
||||||
warmup_epoch: 20
|
warmup_epoch: 20
|
||||||
warmup_start_lr: 1e-6
|
warmup_start_lr: 2e-6
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
|
@ -41,11 +41,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm
|
no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 5e-4
|
learning_rate: 1e-3
|
||||||
eta_min: 1e-5
|
eta_min: 2e-5
|
||||||
warmup_epoch: 20
|
warmup_epoch: 20
|
||||||
warmup_start_lr: 1e-6
|
warmup_start_lr: 2e-6
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
|
@ -41,11 +41,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm
|
no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 5e-4
|
learning_rate: 1e-3
|
||||||
eta_min: 1e-5
|
eta_min: 2e-5
|
||||||
warmup_epoch: 20
|
warmup_epoch: 20
|
||||||
warmup_start_lr: 1e-6
|
warmup_start_lr: 2e-6
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
|
@ -41,11 +41,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm
|
no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 5e-4
|
learning_rate: 1e-3
|
||||||
eta_min: 1e-5
|
eta_min: 2e-5
|
||||||
warmup_epoch: 20
|
warmup_epoch: 20
|
||||||
warmup_start_lr: 1e-6
|
warmup_start_lr: 2e-6
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
|
@ -43,11 +43,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block
|
no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 5e-4
|
learning_rate: 1e-3
|
||||||
eta_min: 1e-5
|
eta_min: 2e-5
|
||||||
warmup_epoch: 5
|
warmup_epoch: 5
|
||||||
warmup_start_lr: 1e-6
|
warmup_start_lr: 2e-6
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
|
@ -43,11 +43,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block
|
no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 5e-4
|
learning_rate: 1e-3
|
||||||
eta_min: 1e-5
|
eta_min: 2e-5
|
||||||
warmup_epoch: 5
|
warmup_epoch: 5
|
||||||
warmup_start_lr: 1e-6
|
warmup_start_lr: 2e-6
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
|
@ -43,11 +43,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block
|
no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 5e-4
|
learning_rate: 1e-3
|
||||||
eta_min: 1e-5
|
eta_min: 2e-5
|
||||||
warmup_epoch: 5
|
warmup_epoch: 5
|
||||||
warmup_start_lr: 1e-6
|
warmup_start_lr: 2e-6
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
|
@ -43,11 +43,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block
|
no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 5e-4
|
learning_rate: 1e-3
|
||||||
eta_min: 1e-5
|
eta_min: 2e-5
|
||||||
warmup_epoch: 5
|
warmup_epoch: 5
|
||||||
warmup_start_lr: 1e-6
|
warmup_start_lr: 2e-6
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
|
@ -43,11 +43,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block
|
no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 5e-4
|
learning_rate: 1e-3
|
||||||
eta_min: 1e-5
|
eta_min: 2e-5
|
||||||
warmup_epoch: 5
|
warmup_epoch: 5
|
||||||
warmup_start_lr: 1e-6
|
warmup_start_lr: 2e-6
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
|
@ -43,11 +43,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block
|
no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 5e-4
|
learning_rate: 1e-3
|
||||||
eta_min: 1e-5
|
eta_min: 2e-5
|
||||||
warmup_epoch: 5
|
warmup_epoch: 5
|
||||||
warmup_start_lr: 1e-6
|
warmup_start_lr: 2e-6
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
|
@ -250,12 +250,17 @@ class Engine(object):
|
|||||||
level=amp_level,
|
level=amp_level,
|
||||||
save_dtype='float32')
|
save_dtype='float32')
|
||||||
|
|
||||||
# for distributed
|
# check the gpu num
|
||||||
world_size = dist.get_world_size()
|
world_size = dist.get_world_size()
|
||||||
self.config["Global"]["distributed"] = world_size != 1
|
self.config["Global"]["distributed"] = world_size != 1
|
||||||
if world_size != 4 and self.mode == "train":
|
if self.mode == "train":
|
||||||
msg = f"The training strategy in config files provided by PaddleClas is based on 4 gpus. But the number of gpus is {world_size} in current training. Please modify the stategy (learning rate, batch size and so on) if use config files in PaddleClas to train."
|
std_gpu_num = 8 if self.config["Optimizer"][
|
||||||
|
"name"] == "AdamW" else 4
|
||||||
|
if world_size != std_gpu_num:
|
||||||
|
msg = f"The training strategy provided by PaddleClas is based on {std_gpu_num} gpus. But the number of gpu is {world_size} in current training. Please modify the stategy (learning rate, batch size and so on) if use this config to train."
|
||||||
logger.warning(msg)
|
logger.warning(msg)
|
||||||
|
|
||||||
|
# for distributed
|
||||||
if self.config["Global"]["distributed"]:
|
if self.config["Global"]["distributed"]:
|
||||||
dist.init_parallel_env()
|
dist.init_parallel_env()
|
||||||
self.model = paddle.DataParallel(self.model)
|
self.model = paddle.DataParallel(self.model)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user