mirror of
https://github.com/PaddlePaddle/PaddleClas.git
synced 2025-06-03 21:55:06 +08:00
Merge branch 'develop' into ConvNeXt
This commit is contained in:
commit
b334da6fad
@ -42,11 +42,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: pos_embed cls_token .bias norm
|
no_weight_decay_name: pos_embed cls_token .bias norm
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 1.25e-4
|
learning_rate: 2.5e-4
|
||||||
eta_min: 1.25e-6
|
eta_min: 2.5e-6
|
||||||
warmup_epoch: 20
|
warmup_epoch: 20
|
||||||
warmup_start_lr: 1.25e-7
|
warmup_start_lr: 2.5e-7
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
|
@ -42,11 +42,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: pos_embed cls_token .bias norm
|
no_weight_decay_name: pos_embed cls_token .bias norm
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 6.25e-5
|
learning_rate: 1.25e-4
|
||||||
eta_min: 6.25e-7
|
eta_min: 1.25e-6
|
||||||
warmup_epoch: 20
|
warmup_epoch: 20
|
||||||
warmup_start_lr: 6.25e-8
|
warmup_start_lr: 1.25e-7
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
|
@ -42,11 +42,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: pos_embed cls_token .bias norm
|
no_weight_decay_name: pos_embed cls_token .bias norm
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 1.25e-4
|
learning_rate: 2.5e-4
|
||||||
eta_min: 1.25e-6
|
eta_min: 2.5e-6
|
||||||
warmup_epoch: 20
|
warmup_epoch: 20
|
||||||
warmup_start_lr: 1.25e-7
|
warmup_start_lr: 2.5e-7
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
|
@ -42,11 +42,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: pos_embed cls_token .bias norm
|
no_weight_decay_name: pos_embed cls_token .bias norm
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 3.125e-5
|
learning_rate: 6.25e-5
|
||||||
eta_min: 3.125e-7
|
eta_min: 6.25e-7
|
||||||
warmup_epoch: 20
|
warmup_epoch: 20
|
||||||
warmup_start_lr: 3.125e-8
|
warmup_start_lr: 6.25e-8
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
|
@ -42,11 +42,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: pos_embed cls_token .bias norm
|
no_weight_decay_name: pos_embed cls_token .bias norm
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 2.5e-4
|
learning_rate: 5e-4
|
||||||
eta_min: 2.5e-6
|
eta_min: 5e-6
|
||||||
warmup_epoch: 20
|
warmup_epoch: 20
|
||||||
warmup_start_lr: 2.5e-7
|
warmup_start_lr: 5e-7
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
|
@ -42,11 +42,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: pos_embed cls_token .bias norm
|
no_weight_decay_name: pos_embed cls_token .bias norm
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 5e-4
|
learning_rate: 1e-3
|
||||||
eta_min: 5e-6
|
eta_min: 1e-5
|
||||||
warmup_epoch: 20
|
warmup_epoch: 20
|
||||||
warmup_start_lr: 5e-7
|
warmup_start_lr: 1e-6
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
|
@ -40,11 +40,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: norm cls_token pos_embed dist_token
|
no_weight_decay_name: norm cls_token pos_embed dist_token
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 1e-3
|
learning_rate: 2e-3
|
||||||
eta_min: 1e-5
|
eta_min: 2e-5
|
||||||
warmup_epoch: 5
|
warmup_epoch: 5
|
||||||
warmup_start_lr: 1e-6
|
warmup_start_lr: 2e-6
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
DataLoader:
|
DataLoader:
|
||||||
|
@ -40,11 +40,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: norm cls_token pos_embed dist_token
|
no_weight_decay_name: norm cls_token pos_embed dist_token
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 1e-3
|
learning_rate: 2e-3
|
||||||
eta_min: 1e-5
|
eta_min: 2e-5
|
||||||
warmup_epoch: 5
|
warmup_epoch: 5
|
||||||
warmup_start_lr: 1e-6
|
warmup_start_lr: 2e-6
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
DataLoader:
|
DataLoader:
|
||||||
|
@ -40,11 +40,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: norm cls_token pos_embed dist_token
|
no_weight_decay_name: norm cls_token pos_embed dist_token
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 1e-3
|
learning_rate: 2e-3
|
||||||
eta_min: 1e-5
|
eta_min: 2e-5
|
||||||
warmup_epoch: 5
|
warmup_epoch: 5
|
||||||
warmup_start_lr: 1e-6
|
warmup_start_lr: 2e-6
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
DataLoader:
|
DataLoader:
|
||||||
|
@ -40,11 +40,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: norm cls_token pos_embed dist_token
|
no_weight_decay_name: norm cls_token pos_embed dist_token
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 1e-3
|
learning_rate: 2e-3
|
||||||
eta_min: 1e-5
|
eta_min: 2e-5
|
||||||
warmup_epoch: 5
|
warmup_epoch: 5
|
||||||
warmup_start_lr: 1e-6
|
warmup_start_lr: 2e-6
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
DataLoader:
|
DataLoader:
|
||||||
|
@ -41,10 +41,10 @@ Optimizer:
|
|||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 1e-3
|
learning_rate: 2e-3
|
||||||
eta_min: 1e-5
|
eta_min: 2e-5
|
||||||
warmup_epoch: 5
|
warmup_epoch: 5
|
||||||
warmup_start_lr: 1e-6
|
warmup_start_lr: 2e-6
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
DataLoader:
|
DataLoader:
|
||||||
|
@ -40,11 +40,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: norm cls_token pos_embed dist_token
|
no_weight_decay_name: norm cls_token pos_embed dist_token
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 1e-3
|
learning_rate: 2e-3
|
||||||
eta_min: 1e-5
|
eta_min: 2e-5
|
||||||
warmup_epoch: 5
|
warmup_epoch: 5
|
||||||
warmup_start_lr: 1e-6
|
warmup_start_lr: 2e-6
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
DataLoader:
|
DataLoader:
|
||||||
|
@ -40,11 +40,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: norm cls_token pos_embed dist_token
|
no_weight_decay_name: norm cls_token pos_embed dist_token
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 1e-3
|
learning_rate: 2e-3
|
||||||
eta_min: 1e-5
|
eta_min: 2e-5
|
||||||
warmup_epoch: 5
|
warmup_epoch: 5
|
||||||
warmup_start_lr: 1e-6
|
warmup_start_lr: 2e-6
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
DataLoader:
|
DataLoader:
|
||||||
|
@ -40,11 +40,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: norm cls_token pos_embed dist_token
|
no_weight_decay_name: norm cls_token pos_embed dist_token
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 1e-3
|
learning_rate: 2e-3
|
||||||
eta_min: 1e-5
|
eta_min: 2e-5
|
||||||
warmup_epoch: 5
|
warmup_epoch: 5
|
||||||
warmup_start_lr: 1e-6
|
warmup_start_lr: 2e-6
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
DataLoader:
|
DataLoader:
|
||||||
|
@ -49,9 +49,8 @@ Loss:
|
|||||||
model_name_pairs:
|
model_name_pairs:
|
||||||
- ["Student", "Teacher"]
|
- ["Student", "Teacher"]
|
||||||
Eval:
|
Eval:
|
||||||
- DistillationGTCELoss:
|
- CELoss:
|
||||||
weight: 1.0
|
weight: 1.0
|
||||||
model_names: ["Student"]
|
|
||||||
|
|
||||||
|
|
||||||
Optimizer:
|
Optimizer:
|
||||||
|
@ -88,10 +88,8 @@ Loss:
|
|||||||
s_shapes: *s_shapes
|
s_shapes: *s_shapes
|
||||||
t_shapes: *t_shapes
|
t_shapes: *t_shapes
|
||||||
Eval:
|
Eval:
|
||||||
- DistillationGTCELoss:
|
- CELoss:
|
||||||
weight: 1.0
|
weight: 1.0
|
||||||
model_names: ["Student"]
|
|
||||||
|
|
||||||
|
|
||||||
Optimizer:
|
Optimizer:
|
||||||
name: Momentum
|
name: Momentum
|
||||||
|
@ -43,11 +43,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token
|
no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 5e-4
|
learning_rate: 1e-3
|
||||||
eta_min: 5e-6
|
eta_min: 1e-5
|
||||||
warmup_epoch: 20
|
warmup_epoch: 20
|
||||||
warmup_start_lr: 5e-7
|
warmup_start_lr: 1e-6
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
|
@ -43,11 +43,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token
|
no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 5e-4
|
learning_rate: 1e-3
|
||||||
eta_min: 5e-6
|
eta_min: 1e-5
|
||||||
warmup_epoch: 20
|
warmup_epoch: 20
|
||||||
warmup_start_lr: 5e-7
|
warmup_start_lr: 1e-6
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
|
@ -43,11 +43,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token
|
no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 5e-4
|
learning_rate: 1e-3
|
||||||
eta_min: 5e-6
|
eta_min: 1e-5
|
||||||
warmup_epoch: 20
|
warmup_epoch: 20
|
||||||
warmup_start_lr: 5e-7
|
warmup_start_lr: 1e-6
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
|
@ -43,11 +43,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token
|
no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 5e-4
|
learning_rate: 1e-3
|
||||||
eta_min: 5e-6
|
eta_min: 1e-5
|
||||||
warmup_epoch: 20
|
warmup_epoch: 20
|
||||||
warmup_start_lr: 5e-7
|
warmup_start_lr: 1e-6
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
|
@ -44,11 +44,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token
|
no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 5e-4
|
learning_rate: 1e-3
|
||||||
eta_min: 5e-6
|
eta_min: 1e-5
|
||||||
warmup_epoch: 20
|
warmup_epoch: 20
|
||||||
warmup_start_lr: 5e-7
|
warmup_start_lr: 1e-6
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
|
@ -44,11 +44,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token
|
no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 5e-4
|
learning_rate: 1e-3
|
||||||
eta_min: 5e-6
|
eta_min: 1e-5
|
||||||
warmup_epoch: 20
|
warmup_epoch: 20
|
||||||
warmup_start_lr: 5e-7
|
warmup_start_lr: 1e-6
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
|
@ -44,11 +44,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token
|
no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 5e-4
|
learning_rate: 1e-3
|
||||||
eta_min: 5e-6
|
eta_min: 1e-5
|
||||||
warmup_epoch: 20
|
warmup_epoch: 20
|
||||||
warmup_start_lr: 5e-7
|
warmup_start_lr: 1e-6
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
|
@ -41,11 +41,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm
|
no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 5e-4
|
learning_rate: 1e-3
|
||||||
eta_min: 1e-5
|
eta_min: 2e-5
|
||||||
warmup_epoch: 20
|
warmup_epoch: 20
|
||||||
warmup_start_lr: 1e-6
|
warmup_start_lr: 2e-6
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
|
@ -41,11 +41,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm
|
no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 5e-4
|
learning_rate: 1e-3
|
||||||
eta_min: 1e-5
|
eta_min: 2e-5
|
||||||
warmup_epoch: 20
|
warmup_epoch: 20
|
||||||
warmup_start_lr: 1e-6
|
warmup_start_lr: 2e-6
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
|
@ -41,11 +41,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm
|
no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 5e-4
|
learning_rate: 1e-3
|
||||||
eta_min: 1e-5
|
eta_min: 2e-5
|
||||||
warmup_epoch: 20
|
warmup_epoch: 20
|
||||||
warmup_start_lr: 1e-6
|
warmup_start_lr: 2e-6
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
|
@ -41,11 +41,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm
|
no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 5e-4
|
learning_rate: 1e-3
|
||||||
eta_min: 1e-5
|
eta_min: 2e-5
|
||||||
warmup_epoch: 20
|
warmup_epoch: 20
|
||||||
warmup_start_lr: 1e-6
|
warmup_start_lr: 2e-6
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
|
@ -41,11 +41,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm
|
no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 5e-4
|
learning_rate: 1e-3
|
||||||
eta_min: 1e-5
|
eta_min: 2e-5
|
||||||
warmup_epoch: 20
|
warmup_epoch: 20
|
||||||
warmup_start_lr: 1e-6
|
warmup_start_lr: 2e-6
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
|
@ -41,11 +41,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm
|
no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 5e-4
|
learning_rate: 1e-3
|
||||||
eta_min: 1e-5
|
eta_min: 2e-5
|
||||||
warmup_epoch: 20
|
warmup_epoch: 20
|
||||||
warmup_start_lr: 1e-6
|
warmup_start_lr: 2e-6
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
|
@ -43,11 +43,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block
|
no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 5e-4
|
learning_rate: 1e-3
|
||||||
eta_min: 1e-5
|
eta_min: 2e-5
|
||||||
warmup_epoch: 5
|
warmup_epoch: 5
|
||||||
warmup_start_lr: 1e-6
|
warmup_start_lr: 2e-6
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
|
@ -43,11 +43,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block
|
no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 5e-4
|
learning_rate: 1e-3
|
||||||
eta_min: 1e-5
|
eta_min: 2e-5
|
||||||
warmup_epoch: 5
|
warmup_epoch: 5
|
||||||
warmup_start_lr: 1e-6
|
warmup_start_lr: 2e-6
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
|
@ -43,11 +43,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block
|
no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 5e-4
|
learning_rate: 1e-3
|
||||||
eta_min: 1e-5
|
eta_min: 2e-5
|
||||||
warmup_epoch: 5
|
warmup_epoch: 5
|
||||||
warmup_start_lr: 1e-6
|
warmup_start_lr: 2e-6
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
|
@ -43,11 +43,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block
|
no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 5e-4
|
learning_rate: 1e-3
|
||||||
eta_min: 1e-5
|
eta_min: 2e-5
|
||||||
warmup_epoch: 5
|
warmup_epoch: 5
|
||||||
warmup_start_lr: 1e-6
|
warmup_start_lr: 2e-6
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
|
@ -43,11 +43,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block
|
no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 5e-4
|
learning_rate: 1e-3
|
||||||
eta_min: 1e-5
|
eta_min: 2e-5
|
||||||
warmup_epoch: 5
|
warmup_epoch: 5
|
||||||
warmup_start_lr: 1e-6
|
warmup_start_lr: 2e-6
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
|
@ -43,11 +43,12 @@ Optimizer:
|
|||||||
no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block
|
no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block
|
||||||
one_dim_param_no_weight_decay: True
|
one_dim_param_no_weight_decay: True
|
||||||
lr:
|
lr:
|
||||||
|
# for 8 cards
|
||||||
name: Cosine
|
name: Cosine
|
||||||
learning_rate: 5e-4
|
learning_rate: 1e-3
|
||||||
eta_min: 1e-5
|
eta_min: 2e-5
|
||||||
warmup_epoch: 5
|
warmup_epoch: 5
|
||||||
warmup_start_lr: 1e-6
|
warmup_start_lr: 2e-6
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
# data loader for train and eval
|
||||||
|
@ -262,12 +262,17 @@ class Engine(object):
|
|||||||
self.model_ema = ExponentialMovingAverage(
|
self.model_ema = ExponentialMovingAverage(
|
||||||
self.model, self.config['EMA'].get("decay", 0.9999))
|
self.model, self.config['EMA'].get("decay", 0.9999))
|
||||||
|
|
||||||
# for distributed
|
# check the gpu num
|
||||||
world_size = dist.get_world_size()
|
world_size = dist.get_world_size()
|
||||||
self.config["Global"]["distributed"] = world_size != 1
|
self.config["Global"]["distributed"] = world_size != 1
|
||||||
if world_size != 4 and self.mode == "train":
|
if self.mode == "train":
|
||||||
msg = f"The training strategy in config files provided by PaddleClas is based on 4 gpus. But the number of gpus is {world_size} in current training. Please modify the stategy (learning rate, batch size and so on) if use config files in PaddleClas to train."
|
std_gpu_num = 8 if self.config["Optimizer"][
|
||||||
logger.warning(msg)
|
"name"] == "AdamW" else 4
|
||||||
|
if world_size != std_gpu_num:
|
||||||
|
msg = f"The training strategy provided by PaddleClas is based on {std_gpu_num} gpus. But the number of gpu is {world_size} in current training. Please modify the stategy (learning rate, batch size and so on) if use this config to train."
|
||||||
|
logger.warning(msg)
|
||||||
|
|
||||||
|
# for distributed
|
||||||
if self.config["Global"]["distributed"]:
|
if self.config["Global"]["distributed"]:
|
||||||
dist.init_parallel_env()
|
dist.init_parallel_env()
|
||||||
self.model = paddle.DataParallel(self.model)
|
self.model = paddle.DataParallel(self.model)
|
||||||
|
@ -80,22 +80,17 @@ def classification_eval(engine, epoch_id=0):
|
|||||||
current_samples = batch_size * paddle.distributed.get_world_size()
|
current_samples = batch_size * paddle.distributed.get_world_size()
|
||||||
accum_samples += current_samples
|
accum_samples += current_samples
|
||||||
|
|
||||||
|
if isinstance(out, dict) and "Student" in out:
|
||||||
|
out = out["Student"]
|
||||||
|
if isinstance(out, dict) and "logits" in out:
|
||||||
|
out = out["logits"]
|
||||||
|
|
||||||
# gather Tensor when distributed
|
# gather Tensor when distributed
|
||||||
if paddle.distributed.get_world_size() > 1:
|
if paddle.distributed.get_world_size() > 1:
|
||||||
label_list = []
|
label_list = []
|
||||||
paddle.distributed.all_gather(label_list, batch[1])
|
paddle.distributed.all_gather(label_list, batch[1])
|
||||||
labels = paddle.concat(label_list, 0)
|
labels = paddle.concat(label_list, 0)
|
||||||
|
|
||||||
if isinstance(out, dict):
|
|
||||||
if "Student" in out:
|
|
||||||
out = out["Student"]
|
|
||||||
if isinstance(out, dict):
|
|
||||||
out = out["logits"]
|
|
||||||
elif "logits" in out:
|
|
||||||
out = out["logits"]
|
|
||||||
else:
|
|
||||||
msg = "Error: Wrong key in out!"
|
|
||||||
raise Exception(msg)
|
|
||||||
if isinstance(out, list):
|
if isinstance(out, list):
|
||||||
preds = []
|
preds = []
|
||||||
for x in out:
|
for x in out:
|
||||||
|
@ -20,6 +20,7 @@ class DSHSDLoss(nn.Layer):
|
|||||||
"""
|
"""
|
||||||
# DSHSD(IEEE ACCESS 2019)
|
# DSHSD(IEEE ACCESS 2019)
|
||||||
# paper [Deep Supervised Hashing Based on Stable Distribution](https://ieeexplore.ieee.org/document/8648432/)
|
# paper [Deep Supervised Hashing Based on Stable Distribution](https://ieeexplore.ieee.org/document/8648432/)
|
||||||
|
# code reference: https://github.com/swuxyj/DeepHash-pytorch/blob/master/DSHSD.py
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, alpha, multi_label=False):
|
def __init__(self, alpha, multi_label=False):
|
||||||
@ -62,6 +63,7 @@ class DSHSDLoss(nn.Layer):
|
|||||||
class LCDSHLoss(nn.Layer):
|
class LCDSHLoss(nn.Layer):
|
||||||
"""
|
"""
|
||||||
# paper [Locality-Constrained Deep Supervised Hashing for Image Retrieval](https://www.ijcai.org/Proceedings/2017/0499.pdf)
|
# paper [Locality-Constrained Deep Supervised Hashing for Image Retrieval](https://www.ijcai.org/Proceedings/2017/0499.pdf)
|
||||||
|
# code reference: https://github.com/swuxyj/DeepHash-pytorch/blob/master/LCDSH.py
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, n_class, _lambda):
|
def __init__(self, n_class, _lambda):
|
||||||
@ -100,6 +102,7 @@ class DCHLoss(paddle.nn.Layer):
|
|||||||
"""
|
"""
|
||||||
# paper [Deep Cauchy Hashing for Hamming Space Retrieval]
|
# paper [Deep Cauchy Hashing for Hamming Space Retrieval]
|
||||||
URL:(http://ise.thss.tsinghua.edu.cn/~mlong/doc/deep-cauchy-hashing-cvpr18.pdf)
|
URL:(http://ise.thss.tsinghua.edu.cn/~mlong/doc/deep-cauchy-hashing-cvpr18.pdf)
|
||||||
|
# code reference: https://github.com/swuxyj/DeepHash-pytorch/blob/master/DCH.py
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, gamma, _lambda, n_class):
|
def __init__(self, gamma, _lambda, n_class):
|
||||||
|
@ -23,6 +23,11 @@ from .comfunc import rerange_index
|
|||||||
|
|
||||||
|
|
||||||
class EmlLoss(paddle.nn.Layer):
|
class EmlLoss(paddle.nn.Layer):
|
||||||
|
"""Ensemble Metric Learning Loss
|
||||||
|
paper: [Large Scale Strongly Supervised Ensemble Metric Learning, with Applications to Face Verification and Retrieval](https://arxiv.org/pdf/1212.6094.pdf)
|
||||||
|
code reference: https://github.com/PaddlePaddle/models/blob/develop/PaddleCV/metric_learning/losses/emlloss.py
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, batch_size=40, samples_each_class=2):
|
def __init__(self, batch_size=40, samples_each_class=2):
|
||||||
super(EmlLoss, self).__init__()
|
super(EmlLoss, self).__init__()
|
||||||
assert (batch_size % samples_each_class == 0)
|
assert (batch_size % samples_each_class == 0)
|
||||||
|
@ -18,11 +18,13 @@ import paddle.nn.functional as F
|
|||||||
class GoogLeNetLoss(nn.Layer):
|
class GoogLeNetLoss(nn.Layer):
|
||||||
"""
|
"""
|
||||||
Cross entropy loss used after googlenet
|
Cross entropy loss used after googlenet
|
||||||
|
reference paper: [https://arxiv.org/pdf/1409.4842v1.pdf](Going Deeper with Convolutions)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, epsilon=None):
|
def __init__(self, epsilon=None):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
assert (epsilon is None or epsilon <= 0 or epsilon >= 1), "googlenet is not support label_smooth"
|
assert (epsilon is None or epsilon <= 0 or
|
||||||
|
epsilon >= 1), "googlenet is not support label_smooth"
|
||||||
|
|
||||||
def forward(self, inputs, label):
|
def forward(self, inputs, label):
|
||||||
input0, input1, input2 = inputs
|
input0, input1, input2 = inputs
|
||||||
|
@ -21,7 +21,9 @@ from .comfunc import rerange_index
|
|||||||
|
|
||||||
class MSMLoss(paddle.nn.Layer):
|
class MSMLoss(paddle.nn.Layer):
|
||||||
"""
|
"""
|
||||||
MSMLoss Loss, based on triplet loss. USE P * K samples.
|
paper : [Margin Sample Mining Loss: A Deep Learning Based Method for Person Re-identification](https://arxiv.org/pdf/1710.00478.pdf)
|
||||||
|
code reference: https://github.com/michuanhaohao/keras_reid/blob/master/reid_tripletcls.py
|
||||||
|
Margin Sample Mining Loss, based on triplet loss. USE P * K samples.
|
||||||
the batch size is fixed. Batch_size = P * K; but the K may vary between batches.
|
the batch size is fixed. Batch_size = P * K; but the K may vary between batches.
|
||||||
same label gather together
|
same label gather together
|
||||||
|
|
||||||
|
@ -5,6 +5,11 @@ import paddle
|
|||||||
|
|
||||||
|
|
||||||
class NpairsLoss(paddle.nn.Layer):
|
class NpairsLoss(paddle.nn.Layer):
|
||||||
|
"""Npair_loss_
|
||||||
|
paper [Improved deep metric learning with multi-class N-pair loss objective](https://dl.acm.org/doi/10.5555/3157096.3157304)
|
||||||
|
code reference: https://www.tensorflow.org/versions/r1.15/api_docs/python/tf/contrib/losses/metric_learning/npairs_loss
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, reg_lambda=0.01):
|
def __init__(self, reg_lambda=0.01):
|
||||||
super(NpairsLoss, self).__init__()
|
super(NpairsLoss, self).__init__()
|
||||||
self.reg_lambda = reg_lambda
|
self.reg_lambda = reg_lambda
|
||||||
|
@ -23,6 +23,11 @@ import paddle.nn.functional as F
|
|||||||
|
|
||||||
|
|
||||||
class PairwiseCosface(nn.Layer):
|
class PairwiseCosface(nn.Layer):
|
||||||
|
"""
|
||||||
|
paper: Circle Loss: A Unified Perspective of Pair Similarity Optimization
|
||||||
|
code reference: https://github.com/leoluopy/circle-loss-demonstration/blob/main/circle_loss.py
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, margin, gamma):
|
def __init__(self, margin, gamma):
|
||||||
super(PairwiseCosface, self).__init__()
|
super(PairwiseCosface, self).__init__()
|
||||||
self.margin = margin
|
self.margin = margin
|
||||||
@ -36,8 +41,10 @@ class PairwiseCosface(nn.Layer):
|
|||||||
dist_mat = paddle.matmul(embedding, embedding, transpose_y=True)
|
dist_mat = paddle.matmul(embedding, embedding, transpose_y=True)
|
||||||
|
|
||||||
N = dist_mat.shape[0]
|
N = dist_mat.shape[0]
|
||||||
is_pos = targets.reshape([N,1]).expand([N,N]).equal(paddle.t(targets.reshape([N,1]).expand([N,N]))).astype('float')
|
is_pos = targets.reshape([N, 1]).expand([N, N]).equal(
|
||||||
is_neg = targets.reshape([N,1]).expand([N,N]).not_equal(paddle.t(targets.reshape([N,1]).expand([N,N]))).astype('float')
|
paddle.t(targets.reshape([N, 1]).expand([N, N]))).astype('float')
|
||||||
|
is_neg = targets.reshape([N, 1]).expand([N, N]).not_equal(
|
||||||
|
paddle.t(targets.reshape([N, 1]).expand([N, N]))).astype('float')
|
||||||
|
|
||||||
# Mask scores related to itself
|
# Mask scores related to itself
|
||||||
is_pos = is_pos - paddle.eye(N, N)
|
is_pos = is_pos - paddle.eye(N, N)
|
||||||
@ -46,10 +53,12 @@ class PairwiseCosface(nn.Layer):
|
|||||||
s_n = dist_mat * is_neg
|
s_n = dist_mat * is_neg
|
||||||
|
|
||||||
logit_p = -self.gamma * s_p + (-99999999.) * (1 - is_pos)
|
logit_p = -self.gamma * s_p + (-99999999.) * (1 - is_pos)
|
||||||
logit_n = self.gamma * (s_n + self.margin) + (-99999999.) * (1 - is_neg)
|
logit_n = self.gamma * (s_n + self.margin) + (-99999999.) * (1 - is_neg
|
||||||
|
)
|
||||||
|
|
||||||
loss = F.softplus(paddle.logsumexp(logit_p, axis=1) + paddle.logsumexp(logit_n, axis=1)).mean()
|
loss = F.softplus(
|
||||||
|
paddle.logsumexp(
|
||||||
|
logit_p, axis=1) + paddle.logsumexp(
|
||||||
|
logit_n, axis=1)).mean()
|
||||||
|
|
||||||
return {"PairwiseCosface": loss}
|
return {"PairwiseCosface": loss}
|
||||||
|
|
||||||
|
|
||||||
|
@ -29,6 +29,7 @@ def pdist(e, squared=False, eps=1e-12):
|
|||||||
|
|
||||||
|
|
||||||
class RKdAngle(nn.Layer):
|
class RKdAngle(nn.Layer):
|
||||||
|
# paper : [Relational Knowledge Distillation](https://arxiv.org/abs/1904.05068?context=cs.LG)
|
||||||
# reference: https://github.com/lenscloth/RKD/blob/master/metric/loss.py
|
# reference: https://github.com/lenscloth/RKD/blob/master/metric/loss.py
|
||||||
def __init__(self, target_size=None):
|
def __init__(self, target_size=None):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
@ -64,6 +65,7 @@ class RKdAngle(nn.Layer):
|
|||||||
|
|
||||||
|
|
||||||
class RkdDistance(nn.Layer):
|
class RkdDistance(nn.Layer):
|
||||||
|
# paper : [Relational Knowledge Distillation](https://arxiv.org/abs/1904.05068?context=cs.LG)
|
||||||
# reference: https://github.com/lenscloth/RKD/blob/master/metric/loss.py
|
# reference: https://github.com/lenscloth/RKD/blob/master/metric/loss.py
|
||||||
def __init__(self, eps=1e-12, target_size=1):
|
def __init__(self, eps=1e-12, target_size=1):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
@ -4,6 +4,7 @@ from paddle import nn
|
|||||||
|
|
||||||
class SupConLoss(nn.Layer):
|
class SupConLoss(nn.Layer):
|
||||||
"""Supervised Contrastive Learning: https://arxiv.org/pdf/2004.11362.pdf.
|
"""Supervised Contrastive Learning: https://arxiv.org/pdf/2004.11362.pdf.
|
||||||
|
code reference: https://github.com/HobbitLong/SupContrast/blob/master/losses.py
|
||||||
It also supports the unsupervised contrastive loss in SimCLR"""
|
It also supports the unsupervised contrastive loss in SimCLR"""
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
|
@ -22,6 +22,8 @@ from .comfunc import rerange_index
|
|||||||
|
|
||||||
class TriHardLoss(paddle.nn.Layer):
|
class TriHardLoss(paddle.nn.Layer):
|
||||||
"""
|
"""
|
||||||
|
paper: In Defense of the Triplet Loss for Person Re-Identification
|
||||||
|
code reference: https://github.com/VisualComputingInstitute/triplet-reid/blob/master/loss.py
|
||||||
TriHard Loss, based on triplet loss. USE P * K samples.
|
TriHard Loss, based on triplet loss. USE P * K samples.
|
||||||
the batch size is fixed. Batch_size = P * K; but the K may vary between batches.
|
the batch size is fixed. Batch_size = P * K; but the K may vary between batches.
|
||||||
same label gather together
|
same label gather together
|
||||||
|
@ -1,3 +1,17 @@
|
|||||||
|
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
from __future__ import division
|
from __future__ import division
|
||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
@ -8,6 +22,8 @@ import paddle.nn as nn
|
|||||||
|
|
||||||
class TripletLossV2(nn.Layer):
|
class TripletLossV2(nn.Layer):
|
||||||
"""Triplet loss with hard positive/negative mining.
|
"""Triplet loss with hard positive/negative mining.
|
||||||
|
paper : [Facenet: A unified embedding for face recognition and clustering](https://arxiv.org/pdf/1503.03832.pdf)
|
||||||
|
code reference: https://github.com/okzhili/Cartoon-face-recognition/blob/master/loss/triplet_loss.py
|
||||||
Args:
|
Args:
|
||||||
margin (float): margin for triplet.
|
margin (float): margin for triplet.
|
||||||
"""
|
"""
|
||||||
|
@ -118,8 +118,6 @@ def build_optimizer(config, epochs, step_each_epoch, model_list=None):
|
|||||||
if hasattr(model_list[i], optim_scope):
|
if hasattr(model_list[i], optim_scope):
|
||||||
optim_model.append(getattr(model_list[i], optim_scope))
|
optim_model.append(getattr(model_list[i], optim_scope))
|
||||||
|
|
||||||
assert len(optim_model) == 1, \
|
|
||||||
"Invalid optim model for optim scope({}), number of optim_model={}".format(optim_scope, len(optim_model))
|
|
||||||
optim = getattr(optimizer, optim_name)(
|
optim = getattr(optimizer, optim_name)(
|
||||||
learning_rate=lr, grad_clip=grad_clip,
|
learning_rate=lr, grad_clip=grad_clip,
|
||||||
**optim_cfg)(model_list=optim_model)
|
**optim_cfg)(model_list=optim_model)
|
||||||
|
@ -13,7 +13,7 @@ train_infer_img_dir:./dataset/ILSVRC2012/val
|
|||||||
null:null
|
null:null
|
||||||
##
|
##
|
||||||
trainer:norm_train
|
trainer:norm_train
|
||||||
norm_train:tools/train.py -c ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_tiny_224.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False
|
norm_train:tools/train.py -c ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_tiny_224.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False -o Global.print_batch_step=1
|
||||||
pact_train:null
|
pact_train:null
|
||||||
fpgm_train:null
|
fpgm_train:null
|
||||||
distill_train:null
|
distill_train:null
|
||||||
|
@ -13,7 +13,7 @@ train_infer_img_dir:./dataset/ILSVRC2012/val
|
|||||||
null:null
|
null:null
|
||||||
##
|
##
|
||||||
trainer:norm_train
|
trainer:norm_train
|
||||||
norm_train:tools/train.py -c ppcls/configs/ImageNet/MobileViT/MobileViT_S.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False
|
norm_train:tools/train.py -c ppcls/configs/ImageNet/MobileViT/MobileViT_S.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False -o Global.print_batch_step=1
|
||||||
pact_train:null
|
pact_train:null
|
||||||
fpgm_train:null
|
fpgm_train:null
|
||||||
distill_train:null
|
distill_train:null
|
||||||
|
@ -13,7 +13,7 @@ train_infer_img_dir:./dataset/ILSVRC2012/val
|
|||||||
null:null
|
null:null
|
||||||
##
|
##
|
||||||
trainer:norm_train
|
trainer:norm_train
|
||||||
norm_train:tools/train.py -c ppcls/configs/ImageNet/PVTV2/PVT_V2_B2_Linear.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False
|
norm_train:tools/train.py -c ppcls/configs/ImageNet/PVTV2/PVT_V2_B2_Linear.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False -o Global.print_batch_step=1
|
||||||
pact_train:null
|
pact_train:null
|
||||||
fpgm_train:null
|
fpgm_train:null
|
||||||
distill_train:null
|
distill_train:null
|
||||||
|
Loading…
x
Reference in New Issue
Block a user