158 lines
3.4 KiB
YAML
158 lines
3.4 KiB
YAML
|
# global configs
|
||
|
Global:
|
||
|
checkpoints: null
|
||
|
pretrained_model: null
|
||
|
output_dir: ./output_r50_vd_distill
|
||
|
device: gpu
|
||
|
save_interval: 1
|
||
|
eval_during_train: True
|
||
|
eval_interval: 1
|
||
|
epochs: 100
|
||
|
print_batch_step: 10
|
||
|
use_visualdl: False
|
||
|
# used for static mode and model export
|
||
|
image_shape: [3, 224, 224]
|
||
|
save_inference_dir: ./inference
|
||
|
to_static: True
|
||
|
|
||
|
AMP:
|
||
|
scale_loss: 128.0
|
||
|
use_dynamic_loss_scaling: True
|
||
|
# O1: mixed fp16
|
||
|
level: O1
|
||
|
|
||
|
# model architecture
|
||
|
Arch:
|
||
|
name: "DistillationModel"
|
||
|
class_num: &class_num 1000
|
||
|
# if not null, its lengths should be same as models
|
||
|
pretrained_list:
|
||
|
# if not null, its lengths should be same as models
|
||
|
freeze_params_list:
|
||
|
- True
|
||
|
- False
|
||
|
infer_model_name: "Student"
|
||
|
models:
|
||
|
- Teacher:
|
||
|
name: ResNet50_vd
|
||
|
class_num: *class_num
|
||
|
pretrained: True
|
||
|
use_ssld: True
|
||
|
- Student:
|
||
|
name: PPLCNet_x2_5
|
||
|
class_num: *class_num
|
||
|
pretrained: False
|
||
|
|
||
|
# loss function config for traing/eval process
|
||
|
Loss:
|
||
|
Train:
|
||
|
- DistillationDMLLoss:
|
||
|
weight: 1.0
|
||
|
model_name_pairs:
|
||
|
- ["Student", "Teacher"]
|
||
|
Eval:
|
||
|
- CELoss:
|
||
|
weight: 1.0
|
||
|
|
||
|
|
||
|
Optimizer:
|
||
|
name: Momentum
|
||
|
momentum: 0.9
|
||
|
lr:
|
||
|
name: Cosine
|
||
|
learning_rate: 0.2
|
||
|
warmup_epoch: 5
|
||
|
regularizer:
|
||
|
name: 'L2'
|
||
|
coeff: 0.00004
|
||
|
|
||
|
|
||
|
# data loader for train and eval
|
||
|
DataLoader:
|
||
|
Train:
|
||
|
dataset:
|
||
|
name: ImageNetDataset
|
||
|
image_root: ./dataset/ILSVRC2012/
|
||
|
cls_label_path: ./dataset/ILSVRC2012/train_list.txt
|
||
|
transform_ops:
|
||
|
- DecodeImage:
|
||
|
to_rgb: True
|
||
|
channel_first: False
|
||
|
- RandCropImage:
|
||
|
size: 224
|
||
|
- RandFlipImage:
|
||
|
flip_code: 1
|
||
|
- NormalizeImage:
|
||
|
scale: 1.0/255.0
|
||
|
mean: [0.485, 0.456, 0.406]
|
||
|
std: [0.229, 0.224, 0.225]
|
||
|
order: ''
|
||
|
|
||
|
sampler:
|
||
|
name: DistributedBatchSampler
|
||
|
batch_size: 128
|
||
|
drop_last: False
|
||
|
shuffle: True
|
||
|
loader:
|
||
|
num_workers: 8
|
||
|
use_shared_memory: True
|
||
|
|
||
|
Eval:
|
||
|
dataset:
|
||
|
name: ImageNetDataset
|
||
|
image_root: ./dataset/ILSVRC2012/
|
||
|
cls_label_path: ./dataset/ILSVRC2012/train_list.txt
|
||
|
transform_ops:
|
||
|
- DecodeImage:
|
||
|
to_rgb: True
|
||
|
channel_first: False
|
||
|
- ResizeImage:
|
||
|
resize_short: 256
|
||
|
- CropImage:
|
||
|
size: 224
|
||
|
- NormalizeImage:
|
||
|
scale: 1.0/255.0
|
||
|
mean: [0.485, 0.456, 0.406]
|
||
|
std: [0.229, 0.224, 0.225]
|
||
|
order: ''
|
||
|
sampler:
|
||
|
name: DistributedBatchSampler
|
||
|
batch_size: 256
|
||
|
drop_last: False
|
||
|
shuffle: False
|
||
|
loader:
|
||
|
num_workers: 8
|
||
|
use_shared_memory: True
|
||
|
|
||
|
Infer:
|
||
|
infer_imgs: docs/images/inference_deployment/whl_demo.jpg
|
||
|
batch_size: 10
|
||
|
transforms:
|
||
|
- DecodeImage:
|
||
|
to_rgb: True
|
||
|
channel_first: False
|
||
|
- ResizeImage:
|
||
|
resize_short: 256
|
||
|
- CropImage:
|
||
|
size: 224
|
||
|
- NormalizeImage:
|
||
|
scale: 1.0/255.0
|
||
|
mean: [0.485, 0.456, 0.406]
|
||
|
std: [0.229, 0.224, 0.225]
|
||
|
order: ''
|
||
|
- ToCHWImage:
|
||
|
PostProcess:
|
||
|
name: Topk
|
||
|
topk: 5
|
||
|
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
|
||
|
|
||
|
Metric:
|
||
|
Train:
|
||
|
- DistillationTopkAcc:
|
||
|
model_key: "Student"
|
||
|
topk: [1, 5]
|
||
|
Eval:
|
||
|
- DistillationTopkAcc:
|
||
|
model_key: "Student"
|
||
|
topk: [1, 5]
|