diff --git a/configs/DataAugment/ResNet50_AutoAugment.yaml b/configs/DataAugment/ResNet50_AutoAugment.yaml new file mode 100644 index 000000000..44bbe631a --- /dev/null +++ b/configs/DataAugment/ResNet50_AutoAugment.yaml @@ -0,0 +1,75 @@ +mode: 'train' +ARCHITECTURE: + name: 'ResNet50' + +pretrained_model: "" +model_save_dir: "./output/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 300 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: False +ls_epsilon: -1 + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.000100 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - AutoAugment: + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/DataAugment/ResNet50_Baseline.yaml b/configs/DataAugment/ResNet50_Baseline.yaml new file mode 100644 index 000000000..1d03ae37c --- /dev/null +++ b/configs/DataAugment/ResNet50_Baseline.yaml @@ -0,0 +1,74 @@ +mode: 'train' +ARCHITECTURE: + name: 'ResNet50' + +pretrained_model: "" +model_save_dir: "./output/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 300 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: False +ls_epsilon: -1 + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.000100 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/DataAugment/ResNet50_Cutmix.yaml b/configs/DataAugment/ResNet50_Cutmix.yaml new file mode 100644 index 000000000..322fb35c4 --- /dev/null +++ b/configs/DataAugment/ResNet50_Cutmix.yaml @@ -0,0 +1,77 @@ +mode: 'train' +ARCHITECTURE: + name: 'ResNet50' + +pretrained_model: "" +model_save_dir: "./output/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 300 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: True +ls_epsilon: -1 + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.000100 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + mix: + - CutmixOperator: + alpha: 0.2 + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/DataAugment/ResNet50_Cutout.yaml b/configs/DataAugment/ResNet50_Cutout.yaml new file mode 100644 index 000000000..91cbe4c72 --- /dev/null +++ b/configs/DataAugment/ResNet50_Cutout.yaml @@ -0,0 +1,77 @@ +mode: 'train' +ARCHITECTURE: + name: 'ResNet50' + +pretrained_model: "" +model_save_dir: "./output/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 300 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: False +ls_epsilon: -1 + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.000100 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - Cutout: + n_holes: 1 + length: 112 + - ToCHWImage: + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/DataAugment/ResNet50_GridMask.yaml b/configs/DataAugment/ResNet50_GridMask.yaml new file mode 100644 index 000000000..4d442a2e9 --- /dev/null +++ b/configs/DataAugment/ResNet50_GridMask.yaml @@ -0,0 +1,80 @@ +mode: 'train' +ARCHITECTURE: + name: 'ResNet50' + +pretrained_model: "" +model_save_dir: "./output/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 300 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: False +ls_epsilon: -1 + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.000100 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - GridMask: + d1: 96 + d2: 224 + rotate: 1 + ratio: 0.5 + mode: 0 + - ToCHWImage: + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/DataAugment/ResNet50_HideAndSeek.yaml b/configs/DataAugment/ResNet50_HideAndSeek.yaml new file mode 100644 index 000000000..17f5f1eec --- /dev/null +++ b/configs/DataAugment/ResNet50_HideAndSeek.yaml @@ -0,0 +1,75 @@ +mode: 'train' +ARCHITECTURE: + name: 'ResNet50' + +pretrained_model: "" +model_save_dir: "./output/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 300 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: False +ls_epsilon: -1 + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.000100 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - HideAndSeek: + - ToCHWImage: + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/DataAugment/ResNet50_Mixup.yaml b/configs/DataAugment/ResNet50_Mixup.yaml new file mode 100644 index 000000000..3e4105f7b --- /dev/null +++ b/configs/DataAugment/ResNet50_Mixup.yaml @@ -0,0 +1,77 @@ +mode: 'train' +ARCHITECTURE: + name: 'ResNet50' + +pretrained_model: "" +model_save_dir: "./output/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 300 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: True +ls_epsilon: -1 + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.000100 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + mix: + - MixupOperator: + alpha: 0.2 + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/DataAugment/ResNet50_RandAugment.yaml b/configs/DataAugment/ResNet50_RandAugment.yaml new file mode 100644 index 000000000..a3a34eceb --- /dev/null +++ b/configs/DataAugment/ResNet50_RandAugment.yaml @@ -0,0 +1,77 @@ +mode: 'train' +ARCHITECTURE: + name: 'ResNet50' + +pretrained_model: "" +model_save_dir: "./output/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 300 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: False +ls_epsilon: -1 + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.000100 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - RandAugment: + num_layers: 2 + magnitude: 5 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/DataAugment/ResNet50_RandomErasing.yaml b/configs/DataAugment/ResNet50_RandomErasing.yaml new file mode 100644 index 000000000..682cf95d6 --- /dev/null +++ b/configs/DataAugment/ResNet50_RandomErasing.yaml @@ -0,0 +1,80 @@ +mode: 'train' +ARCHITECTURE: + name: 'ResNet50' + +pretrained_model: "" +model_save_dir: "./output/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 300 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: False +ls_epsilon: -1 + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.000100 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - RandomErasing: + EPSILON: 0.5 + sl: 0.02 + sh: 0.4 + r1: 0.3 + mean: [0., 0., 0.] + - ToCHWImage: + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/Distillation/R50_vd_distill_MV3_large_x1_0.yaml b/configs/Distillation/R50_vd_distill_MV3_large_x1_0.yaml new file mode 100644 index 000000000..39c186e82 --- /dev/null +++ b/configs/Distillation/R50_vd_distill_MV3_large_x1_0.yaml @@ -0,0 +1,74 @@ +mode: 'train' +ARCHITECTURE: + name: 'ResNet50_vd_distill_MobileNetV3_large_x1_0' + +pretrained_model: "./pretrained/ResNet50_vd_ssld_pretrained/" +model_save_dir: "./output/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 120 +topk: 5 +image_shape: [3, 224, 224] + +use_distillation: True +ls_epsilon: 0.1 + +LEARNING_RATE: + function: 'CosineWarmup' + params: + lr: 1.3 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.00001 + +TRAIN: + batch_size: 2048 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/Distillation/ResX101_32x16d_wsl_distill_R50_vd.yaml b/configs/Distillation/ResX101_32x16d_wsl_distill_R50_vd.yaml new file mode 100644 index 000000000..9e5b060f1 --- /dev/null +++ b/configs/Distillation/ResX101_32x16d_wsl_distill_R50_vd.yaml @@ -0,0 +1,73 @@ +mode: 'train' +ARCHITECTURE: + name: 'ResNeXt101_32x16d_wsl_distill_ResNet50_vd' + +pretrained_model: "./pretrained/ResNeXt101_32x16d_wsl_pretrained/" +model_save_dir: "./output/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 120 +topk: 5 +image_shape: [3, 224, 224] + +use_distillation: True + +LEARNING_RATE: + function: 'CosineWarmup' + params: + lr: 0.4 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.00007 + +TRAIN: + batch_size: 1024 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/docs/zh_CN/advanced_tutorials/distillation/distillation.md b/docs/zh_CN/advanced_tutorials/distillation/distillation.md index cf5c2c2a2..6e29702c9 100644 --- a/docs/zh_CN/advanced_tutorials/distillation/distillation.md +++ b/docs/zh_CN/advanced_tutorials/distillation/distillation.md @@ -185,9 +185,73 @@ for var in ./*_student; do cp "$var" "../student_model/${var%_student}"; done # # 五、SSLD实战 -* 该部分内容正在持续更新中,敬请期待。 +本节简要介绍了SSLD的实战内容,用户可以通过此处的配置启动SSLD蒸馏任务。如果用户对SSLD +## 7.1 参数配置 +实战部分提供了SSLD蒸馏的示例,在`ppcls/modeling/architectures/distillation_models.py`中提供了`ResNeXt101_32x16d_wsl`蒸馏`ResNet50_vd`与`ResNet50_vd_ssld`蒸馏`MobileNetV3_large_x1_0`的示例,`configs/Distillation`里分别提供了二者的配置文件,用户可以在`tools/run.sh`里直接替换配置文件的路径即可使用。 + +### ResNeXt101_32x16d_wsl蒸馏ResNet50_vd + +`ResNeXt101_32x16d_wsl`蒸馏`ResNet50_vd`的配置如下,其中`pretrained model`指定了`ResNeXt101_32x16d_wsl`(教师模型)的预训练模型的路径,该路径也可以同时指定教师模型与学生模型的预训练模型的路径,用户只需要同时传入二者预训练的路径即可(配置中的注释部分)。 + +```yaml +ARCHITECTURE: + name: 'ResNeXt101_32x16d_wsl_distill_ResNet50_vd' +pretrained_model: "./pretrained/ResNeXt101_32x16d_wsl_pretrained/" +# pretrained_model: +# - "./pretrained/ResNeXt101_32x16d_wsl_pretrained/" +# - "./pretrained/ResNet50_vd_pretrained/" +use_distillation: True +``` + +### ResNet50_vd_ssld蒸馏MobileNetV3_large_x1_0 + +类似于`ResNeXt101_32x16d_wsl`蒸馏`ResNet50_vd`,`ResNet50_vd_ssld`蒸馏`MobileNetV3_large_x1_0`的配置如下: + +```yaml +ARCHITECTURE: + name: 'ResNet50_vd_distill_MobileNetV3_large_x1_0' +pretrained_model: "./pretrained/ResNet50_vd_ssld_pretrained/" +# pretrained_model: +# - "./pretrained/ResNet50_vd_ssld_pretrained/" +# - "./pretrained/ResNet50_vd_pretrained/" +use_distillation: True +``` + +## 7.2 启动命令 + +当用户配置完训练环境后,类似于训练其他分类任务,只需要将`tools/run.sh`中的配置文件替换成为相应的蒸馏配置文件即可。 + +其中`run.sh`中的内容如下: + +```bash +export PYTHONPATH=path_to_PaddleClas:$PYTHONPATH + +python -m paddle.distributed.launch \ + --selected_gpus="0,1,2,3" \ + --log_dir=R50_vd_distill_MV3_large_x1_0 \ + tools/train.py \ + -c ./configs/Distillation/R50_vd_distill_MV3_large_x1_0.yaml +``` + +运行`run.sh` + +```bash +sh tools/run.sh +``` + +## 7.3 注意事项 + +* 用户在使用SSLD蒸馏之前,首先需要在目标数据集上训练一个教师模型,该教师模型用于指导学生模型在该数据集上的训练。 + +* 在用户使用SSLD蒸馏的时候需要将配置文件中的`use_distillation`设置为`True`,另外由于学生模型学习带有知识信息的soft-label,所以需要关掉label_smoothing选项,即将`ls_epsilon`中的值设置在[0,1]之外。 + +* 如果学生模型没有加载预训练模型,训练的其他超参数可以参考该学生模型在ImageNet-1k上训练的超参数,如果学生模型加载了预训练模型,学习率可以调整到原来的1/10或者1/100。 + +* 在SSLD蒸馏的过程中,学生模型只学习soft-label导致训练目标变的更加复杂,建议可以适当的调小`l2_decay`的值来获得更高的验证集准确率。 + +* 若用户准备添加无标签的训练数据,只需要将新的训练数据放置在原本训练数据的路径下,生成新的数据list即可,另外,新生成的数据list需要将无标签的数据添加伪标签(只是为了统一读数据)。 **此处插播一条硬广~** > 如果您觉得此文档对您有帮助,欢迎star、watch、fork,三连我们的项目:[https://github.com/PaddlePaddle/PaddleClas](https://github.com/PaddlePaddle/PaddleClas) diff --git a/docs/zh_CN/advanced_tutorials/image_augmentation/ImageAugment.md b/docs/zh_CN/advanced_tutorials/image_augmentation/ImageAugment.md index 2fb872990..5e35dab34 100644 --- a/docs/zh_CN/advanced_tutorials/image_augmentation/ImageAugment.md +++ b/docs/zh_CN/advanced_tutorials/image_augmentation/ImageAugment.md @@ -438,9 +438,118 @@ new_batch = cutmix_op(batch) # 七、数据增广分类实战 -* 该部分内容正在持续更新中,敬请期待。 +本节列举了八类数据增广的参数配置、启动命令以及使用过程的注意事项等。如果用户在使用 +## 7.1 参数配置 +由于不同的数据增广方式含有不同的超参数,为了便于理解和使用,我们在`configs/DataAugment`里分别列举了8种训练ResNet50的数据增广方式的参数配置文件,用户可以在`tools/run.sh`里直接替换配置文件的路径即可使用。此处分别挑选了图像变换、图像裁剪、图像混叠中的一个示例展示,其他参数配置用户可以自查配置文件。 + +### RandAugment + +`RandAugment`的图像增广方式的配置如下,其中用户需要指定其中的参数`num_layers`与`magnitude`,默认的数值分别是`2`和`5`。`RandAugment`是在uint8的数据格式上转换的,所以其处理过程应该放在归一化操作(`NormalizeImage`)之前。 + +```yaml + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - RandAugment: + num_layers: 2 + magnitude: 5 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: +``` + +### Cutout + +`Cutout`的图像增广方式的配置如下,其中用户需要指定其中的参数`n_holes`与`length`,默认的数值分别是`1`和`112`。类似其他图像裁剪类的数据增广方式,`Cutout`既可以在uint8格式的数据上操作,也可以在归一化(`NormalizeImage`)后的数据上操作,此处给出的是在归一化后的操作。 + +```yaml + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - Cutout: + n_holes: 1 + length: 112 + - ToCHWImage: +``` + +### Mixup + +`Mixup`的图像增广方式的配置如下,其中用户需要指定其中的参数`alpha`,默认的数值是`0.2`。类似其他图像混合类的数据增广方式,`Mixup`是在图像做完数据处理后将每个batch内的数据做图像混叠,将混叠后的图像和标签输入网络中训练,所以其是在图像数据处理(图像变换、图像裁剪)后操作。另外,在配置文件中,需要将`use_mix`参数设置为`True`。 + +```yaml + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + mix: + - MixupOperator: + alpha: 0.2 +``` + +## 7.2 启动命令 + +当用户配置完训练环境后,类似于训练其他分类任务,只需要将`tools/run.sh`中的配置文件替换成为相应的数据增广方式的配置文件即可。 + +其中`run.sh`中的内容如下: + +```bash +export PYTHONPATH=path_to_PaddleClas:$PYTHONPATH + +python -m paddle.distributed.launch \ + --selected_gpus="0,1,2,3" \ + --log_dir=ResNet50_Cutout \ + tools/train.py \ + -c ./configs/DataAugment/ResNet50_Cutout.yaml +``` + +运行`run.sh` + +```bash +sh tools/run.sh +``` + +## 7.3 注意事项 + +* 在使用图像混叠类的数据处理时,需要将配置文件中的`use_mix`设置为`True`,另外由于图像混叠时需对label进行混叠,无法计算训练数据的准确率,所以在训练过程中没有打印训练准确率。 + +* 在使用数据增广后,由于训练数据更难,所以训练损失函数可能较大,训练集的准确率相对较低,但其有拥更好的泛化能力,所以验证集的准确率相对较高。 + +* 在使用数据增广后,模型可能会趋于欠拟合状态,建议可以适当的调小`l2_decay`的值来获得更高的验证集准确率。 + +* 几乎每一类图像增广均含有超参数,我们只提供了基于ImageNet-1k的超参数,其他数据集需要用户自己调试超参数,具体超参数的含义用户可以阅读相关的论文,调试方法也可以参考训练技巧的章节。 **此处插播一条硬广~** > 如果您觉得此文档对您有帮助,欢迎star、watch、fork,三连我们的项目:[https://github.com/PaddlePaddle/PaddleClas](https://github.com/PaddlePaddle/PaddleClas) diff --git a/ppcls/data/imaug/__init__.py b/ppcls/data/imaug/__init__.py index 55be7a373..6860382bc 100644 --- a/ppcls/data/imaug/__init__.py +++ b/ppcls/data/imaug/__init__.py @@ -44,12 +44,12 @@ def transform(data, ops=[]): return data -class ImageNetPolicy(RawImageNetPolicy): +class AutoAugment(RawImageNetPolicy): """ ImageNetPolicy wrapper to auto fit different img types """ def __init__(self, *args, **kwargs): if six.PY2: - super(ImageNetPolicy, self).__init__(*args, **kwargs) + super(AutoAugment, self).__init__(*args, **kwargs) else: super().__init__(*args, **kwargs) @@ -59,7 +59,7 @@ class ImageNetPolicy(RawImageNetPolicy): img = Image.fromarray(img) if six.PY2: - img = super(ImageNetPolicy, self).__call__(img) + img = super(AutoAugment, self).__call__(img) else: img = super().__call__(img) diff --git a/ppcls/data/imaug/grid.py b/ppcls/data/imaug/grid.py index 437dbc09d..93e0c58ac 100644 --- a/ppcls/data/imaug/grid.py +++ b/ppcls/data/imaug/grid.py @@ -25,7 +25,7 @@ NUM_EPOCHS = 240 class GridMask(object): - def __init__(self, d1, d2, rotate=1, ratio=0.5, mode=0, prob=1.): + def __init__(self, d1=96, d2=224, rotate=1, ratio=0.5, mode=0, prob=1.): self.d1 = d1 self.d2 = d2 self.rotate = rotate diff --git a/ppcls/data/imaug/randaugment.py b/ppcls/data/imaug/randaugment.py index 959c7b185..3ccf75757 100644 --- a/ppcls/data/imaug/randaugment.py +++ b/ppcls/data/imaug/randaugment.py @@ -20,7 +20,7 @@ import random class RandAugment(object): - def __init__(self, num_layers, magnitude, fillcolor=(128, 128, 128)): + def __init__(self, num_layers=2, magnitude=5, fillcolor=(128, 128, 128)): self.num_layers = num_layers self.magnitude = magnitude self.max_level = 10