diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..3904a7ed8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +*.pyc +*.sw* +*log* +/dataset +checkpoints/ +pretrained/ +*.ipynb* +build/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 000000000..1ab8d75f0 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,27 @@ +- repo: https://github.com/PaddlePaddle/mirrors-yapf.git + sha: 0d79c0c469bab64f7229c9aca2b1186ef47f0e37 + hooks: + - id: yapf + files: \.py$ +- repo: https://github.com/pre-commit/pre-commit-hooks + sha: a11d9314b22d8f8c7556443875b731ef05965464 + hooks: + - id: check-merge-conflict + - id: check-symlinks + - id: detect-private-key + files: (?!.*paddle)^.*$ + - id: end-of-file-fixer + files: \.(md|yml)$ + - id: trailing-whitespace + files: \.(md|yml)$ +- repo: https://github.com/Lucas-C/pre-commit-hooks + sha: v1.0.1 + hooks: + - id: forbid-crlf + files: \.(md|yml)$ + - id: remove-crlf + files: \.(md|yml)$ + - id: forbid-tabs + files: \.(md|yml)$ + - id: remove-tabs + files: \.(md|yml)$ diff --git a/configs/AlexNet/AlexNet.yaml b/configs/AlexNet/AlexNet.yaml new file mode 100644 index 000000000..fa46e6804 --- /dev/null +++ b/configs/AlexNet/AlexNet.yaml @@ -0,0 +1,75 @@ +mode: 'train' +architecture: "AlexNet" +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 120 +topk: 5 +image_shape: [3, 224, 224] + +LEARNING_RATE: + function: 'Piecewise' + params: + lr: 0.01 + decay_epochs: [30, 60, 90] + gamma: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.0001 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + diff --git a/configs/DPN/DPN107.yaml b/configs/DPN/DPN107.yaml new file mode 100644 index 000000000..d44418fe7 --- /dev/null +++ b/configs/DPN/DPN107.yaml @@ -0,0 +1,75 @@ +mode: 'train' +architecture: 'DPN107' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 200 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: True +ls_epsilon: 0.1 + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.00010 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + mix: + - MixupOperator: + alpha: 0.2 + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/DPN/DPN131.yaml b/configs/DPN/DPN131.yaml new file mode 100644 index 000000000..95b6345a7 --- /dev/null +++ b/configs/DPN/DPN131.yaml @@ -0,0 +1,75 @@ +mode: 'train' +architecture: 'DPN131' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 200 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: True +ls_epsilon: 0.1 + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.00010 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + mix: + - MixupOperator: + alpha: 0.2 + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/DPN/DPN68.yaml b/configs/DPN/DPN68.yaml new file mode 100644 index 000000000..e1fa30b43 --- /dev/null +++ b/configs/DPN/DPN68.yaml @@ -0,0 +1,75 @@ +mode: 'train' +architecture: 'DPN68' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 200 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: True +ls_epsilon: 0.1 + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.00010 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + mix: + - MixupOperator: + alpha: 0.2 + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/DPN/DPN92.yaml b/configs/DPN/DPN92.yaml new file mode 100644 index 000000000..ecfae64dc --- /dev/null +++ b/configs/DPN/DPN92.yaml @@ -0,0 +1,75 @@ +mode: 'train' +architecture: 'DPN92' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 200 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: True +ls_epsilon: 0.1 + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.00010 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + mix: + - MixupOperator: + alpha: 0.2 + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/DPN/DPN98.yaml b/configs/DPN/DPN98.yaml new file mode 100644 index 000000000..51dab0ac6 --- /dev/null +++ b/configs/DPN/DPN98.yaml @@ -0,0 +1,75 @@ +mode: 'train' +architecture: 'DPN98' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 200 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: True +ls_epsilon: 0.1 + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.00010 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + mix: + - MixupOperator: + alpha: 0.2 + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/DarkNet/DarkNet53.yaml b/configs/DarkNet/DarkNet53.yaml new file mode 100644 index 000000000..e3610d9a4 --- /dev/null +++ b/configs/DarkNet/DarkNet53.yaml @@ -0,0 +1,71 @@ +mode: 'train' +architecture: "DarkNet53" +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 200 +topk: 5 +image_shape: [3, 256, 256] + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.0001 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 256 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + diff --git a/configs/DenseNet/DenseNet121.yaml b/configs/DenseNet/DenseNet121.yaml new file mode 100644 index 000000000..2b13cacb7 --- /dev/null +++ b/configs/DenseNet/DenseNet121.yaml @@ -0,0 +1,74 @@ +mode: 'train' +architecture: 'DenseNet121' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 120 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: False +ls_epsilon: -1 + +LEARNING_RATE: + function: 'Piecewise' + params: + lr: 0.1 + decay_epochs: [30, 60, 90] + gamma: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.00010 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/DenseNet/DenseNet161.yaml b/configs/DenseNet/DenseNet161.yaml new file mode 100644 index 000000000..c69376126 --- /dev/null +++ b/configs/DenseNet/DenseNet161.yaml @@ -0,0 +1,74 @@ +mode: 'train' +architecture: 'DenseNet161' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 120 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: False +ls_epsilon: -1 + +LEARNING_RATE: + function: 'Piecewise' + params: + lr: 0.1 + decay_epochs: [30, 60, 90] + gamma: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.00010 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/DenseNet/DenseNet169.yaml b/configs/DenseNet/DenseNet169.yaml new file mode 100644 index 000000000..d8f80309b --- /dev/null +++ b/configs/DenseNet/DenseNet169.yaml @@ -0,0 +1,74 @@ +mode: 'train' +architecture: 'DenseNet169' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 120 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: False +ls_epsilon: -1 + +LEARNING_RATE: + function: 'Piecewise' + params: + lr: 0.1 + decay_epochs: [30, 60, 90] + gamma: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.00010 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/DenseNet/DenseNet201.yaml b/configs/DenseNet/DenseNet201.yaml new file mode 100644 index 000000000..c848de2dd --- /dev/null +++ b/configs/DenseNet/DenseNet201.yaml @@ -0,0 +1,74 @@ +mode: 'train' +architecture: 'DenseNet201' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 120 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: False +ls_epsilon: -1 + +LEARNING_RATE: + function: 'Piecewise' + params: + lr: 0.1 + decay_epochs: [30, 60, 90] + gamma: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.00010 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/DenseNet/DenseNet264.yaml b/configs/DenseNet/DenseNet264.yaml new file mode 100644 index 000000000..4794be4ee --- /dev/null +++ b/configs/DenseNet/DenseNet264.yaml @@ -0,0 +1,74 @@ +mode: 'train' +architecture: 'DenseNet264' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 120 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: False +ls_epsilon: -1 + +LEARNING_RATE: + function: 'Piecewise' + params: + lr: 0.1 + decay_epochs: [30, 60, 90] + gamma: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.00010 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/HRNet/HRNet_W18_C.yaml b/configs/HRNet/HRNet_W18_C.yaml new file mode 100644 index 000000000..d74e16318 --- /dev/null +++ b/configs/HRNet/HRNet_W18_C.yaml @@ -0,0 +1,74 @@ +mode: 'train' +architecture: 'HRNet_W18_C' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 120 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: False +ls_epsilon: -1 + +LEARNING_RATE: + function: 'Piecewise' + params: + lr: 0.1 + decay_epochs: [30, 60, 90] + gamma: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.00010 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/HRNet/HRNet_W30_C.yaml b/configs/HRNet/HRNet_W30_C.yaml new file mode 100644 index 000000000..c270a7f49 --- /dev/null +++ b/configs/HRNet/HRNet_W30_C.yaml @@ -0,0 +1,74 @@ +mode: 'train' +architecture: 'HRNet_W30_C' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 120 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: False +ls_epsilon: -1 + +LEARNING_RATE: + function: 'Piecewise' + params: + lr: 0.1 + decay_epochs: [30, 60, 90] + gamma: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.00010 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/HRNet/HRNet_W32_C.yaml b/configs/HRNet/HRNet_W32_C.yaml new file mode 100644 index 000000000..97f748f01 --- /dev/null +++ b/configs/HRNet/HRNet_W32_C.yaml @@ -0,0 +1,74 @@ +mode: 'train' +architecture: 'HRNet_W32_C' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 120 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: False +ls_epsilon: -1 + +LEARNING_RATE: + function: 'Piecewise' + params: + lr: 0.1 + decay_epochs: [30, 60, 90] + gamma: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.00010 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/HRNet/HRNet_W40_C.yaml b/configs/HRNet/HRNet_W40_C.yaml new file mode 100644 index 000000000..cf6d3e81e --- /dev/null +++ b/configs/HRNet/HRNet_W40_C.yaml @@ -0,0 +1,74 @@ +mode: 'train' +architecture: 'HRNet_W40_C' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 120 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: False +ls_epsilon: -1 + +LEARNING_RATE: + function: 'Piecewise' + params: + lr: 0.1 + decay_epochs: [30, 60, 90] + gamma: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.00010 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/HRNet/HRNet_W44_C.yaml b/configs/HRNet/HRNet_W44_C.yaml new file mode 100644 index 000000000..2e435a635 --- /dev/null +++ b/configs/HRNet/HRNet_W44_C.yaml @@ -0,0 +1,74 @@ +mode: 'train' +architecture: 'HRNet_W44_C' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 120 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: False +ls_epsilon: -1 + +LEARNING_RATE: + function: 'Piecewise' + params: + lr: 0.1 + decay_epochs: [30, 60, 90] + gamma: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.00010 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/HRNet/HRNet_W48_C.yaml b/configs/HRNet/HRNet_W48_C.yaml new file mode 100644 index 000000000..b63341d7c --- /dev/null +++ b/configs/HRNet/HRNet_W48_C.yaml @@ -0,0 +1,74 @@ +mode: 'train' +architecture: 'HRNet_W48_C' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 120 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: False +ls_epsilon: -1 + +LEARNING_RATE: + function: 'Piecewise' + params: + lr: 0.1 + decay_epochs: [30, 60, 90] + gamma: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.00010 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/HRNet/HRNet_W64_C.yaml b/configs/HRNet/HRNet_W64_C.yaml new file mode 100644 index 000000000..8684664ab --- /dev/null +++ b/configs/HRNet/HRNet_W64_C.yaml @@ -0,0 +1,74 @@ +mode: 'train' +architecture: 'HRNet_W64_C' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 120 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: False +ls_epsilon: -1 + +LEARNING_RATE: + function: 'Piecewise' + params: + lr: 0.1 + decay_epochs: [30, 60, 90] + gamma: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.00010 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/Inception/GoogLeNet.yaml b/configs/Inception/GoogLeNet.yaml new file mode 100644 index 000000000..795bbcb13 --- /dev/null +++ b/configs/Inception/GoogLeNet.yaml @@ -0,0 +1,69 @@ +mode: 'train' +architecture: "GoogLeNet" +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 120 +topk: 5 +image_shape: [3, 224, 224] + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.01 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.0001 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/Inception/InceptionV4.yaml b/configs/Inception/InceptionV4.yaml new file mode 100644 index 000000000..65c73264f --- /dev/null +++ b/configs/Inception/InceptionV4.yaml @@ -0,0 +1,77 @@ +mode: 'train' +architecture: 'InceptionV4' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 200 +topk: 5 +image_shape: [3, 299, 299] + +use_mix: True +ls_epsilon: 0.1 + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.045 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.00010 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 299 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + mix: + - MixupOperator: + alpha: 0.2 + + + +VALID: + batch_size: 16 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 320 + - CropImage: + size: 299 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/MobileNetV1/MobileNetV1.yaml b/configs/MobileNetV1/MobileNetV1.yaml new file mode 100644 index 000000000..ff2b62b31 --- /dev/null +++ b/configs/MobileNetV1/MobileNetV1.yaml @@ -0,0 +1,75 @@ +mode: 'train' +architecture: "MobileNetV1" +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 120 +topk: 5 +image_shape: [3, 224, 224] + +LEARNING_RATE: + function: 'Piecewise' + params: + lr: 0.1 + decay_epochs: [30, 60, 90] + gamma: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.00003 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + diff --git a/configs/MobileNetV1/MobileNetV1_x0_25.yaml b/configs/MobileNetV1/MobileNetV1_x0_25.yaml new file mode 100644 index 000000000..12943b750 --- /dev/null +++ b/configs/MobileNetV1/MobileNetV1_x0_25.yaml @@ -0,0 +1,75 @@ +mode: 'train' +architecture: "MobileNetV1_x0_25" +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 120 +topk: 5 +image_shape: [3, 224, 224] + +LEARNING_RATE: + function: 'Piecewise' + params: + lr: 0.1 + decay_epochs: [30, 60, 90] + gamma: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.00003 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + diff --git a/configs/MobileNetV1/MobileNetV1_x0_5.yaml b/configs/MobileNetV1/MobileNetV1_x0_5.yaml new file mode 100644 index 000000000..14baaf658 --- /dev/null +++ b/configs/MobileNetV1/MobileNetV1_x0_5.yaml @@ -0,0 +1,75 @@ +mode: 'train' +architecture: "MobileNetV1_x0_5" +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 120 +topk: 5 +image_shape: [3, 224, 224] + +LEARNING_RATE: + function: 'Piecewise' + params: + lr: 0.1 + decay_epochs: [30, 60, 90] + gamma: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.00003 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + diff --git a/configs/MobileNetV1/MobileNetV1_x0_75.yaml b/configs/MobileNetV1/MobileNetV1_x0_75.yaml new file mode 100644 index 000000000..3563e1d95 --- /dev/null +++ b/configs/MobileNetV1/MobileNetV1_x0_75.yaml @@ -0,0 +1,75 @@ +mode: 'train' +architecture: "MobileNetV1_x0_75" +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 120 +topk: 5 +image_shape: [3, 224, 224] + +LEARNING_RATE: + function: 'Piecewise' + params: + lr: 0.1 + decay_epochs: [30, 60, 90] + gamma: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.00003 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + diff --git a/configs/MobileNetV2/MobileNetV2.yaml b/configs/MobileNetV2/MobileNetV2.yaml new file mode 100644 index 000000000..ba25a430d --- /dev/null +++ b/configs/MobileNetV2/MobileNetV2.yaml @@ -0,0 +1,74 @@ +mode: 'train' +architecture: "MobileNetV2" +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 240 +topk: 5 +image_shape: [3, 224, 224] + + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.045 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.00004 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + diff --git a/configs/MobileNetV2/MobileNetV2_x0_25.yaml b/configs/MobileNetV2/MobileNetV2_x0_25.yaml new file mode 100644 index 000000000..25957a91b --- /dev/null +++ b/configs/MobileNetV2/MobileNetV2_x0_25.yaml @@ -0,0 +1,75 @@ +mode: 'train' +architecture: "MobileNetV2_x0_25" +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 240 +topk: 5 +image_shape: [3, 224, 224] + + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.045 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.00003 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + ratio: [1.0, 1.0] + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + diff --git a/configs/MobileNetV2/MobileNetV2_x0_5.yaml b/configs/MobileNetV2/MobileNetV2_x0_5.yaml new file mode 100644 index 000000000..4591353e6 --- /dev/null +++ b/configs/MobileNetV2/MobileNetV2_x0_5.yaml @@ -0,0 +1,75 @@ +mode: 'train' +architecture: "MobileNetV2_x0_5" +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 240 +topk: 5 +image_shape: [3, 224, 224] + + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.045 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.00003 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + ratio: [1.0, 1.0] + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + diff --git a/configs/MobileNetV2/MobileNetV2_x0_75.yaml b/configs/MobileNetV2/MobileNetV2_x0_75.yaml new file mode 100644 index 000000000..757c87831 --- /dev/null +++ b/configs/MobileNetV2/MobileNetV2_x0_75.yaml @@ -0,0 +1,74 @@ +mode: 'train' +architecture: "MobileNetV2_x0_75" +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 240 +topk: 5 +image_shape: [3, 224, 224] + + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.045 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.00004 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + diff --git a/configs/MobileNetV2/MobileNetV2_x1_5.yaml b/configs/MobileNetV2/MobileNetV2_x1_5.yaml new file mode 100644 index 000000000..f23634721 --- /dev/null +++ b/configs/MobileNetV2/MobileNetV2_x1_5.yaml @@ -0,0 +1,74 @@ +mode: 'train' +architecture: "MobileNetV2_x1_5" +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 240 +topk: 5 +image_shape: [3, 224, 224] + + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.045 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.00004 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + diff --git a/configs/MobileNetV2/MobileNetV2_x2_0.yaml b/configs/MobileNetV2/MobileNetV2_x2_0.yaml new file mode 100644 index 000000000..39996f76f --- /dev/null +++ b/configs/MobileNetV2/MobileNetV2_x2_0.yaml @@ -0,0 +1,74 @@ +mode: 'train' +architecture: "MobileNetV2_x2_0" +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 240 +topk: 5 +image_shape: [3, 224, 224] + + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.045 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.00004 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + diff --git a/configs/MobileNetV3/MobileNetV3_large_x0_35.yaml b/configs/MobileNetV3/MobileNetV3_large_x0_35.yaml new file mode 100644 index 000000000..bc27a07f3 --- /dev/null +++ b/configs/MobileNetV3/MobileNetV3_large_x0_35.yaml @@ -0,0 +1,75 @@ +mode: 'train' +architecture: "MobileNetV3_large_x0_35" +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +ls_epsilon: 0.1 +validate: True +valid_interval: 1 +epochs: 360 +topk: 5 +image_shape: [3, 224, 224] + +LEARNING_RATE: + function: 'CosineWarmup' + params: + lr: 2.6 + warmup_epoch: 5 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.00002 + +TRAIN: + batch_size: 4096 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + diff --git a/configs/MobileNetV3/MobileNetV3_large_x0_5.yaml b/configs/MobileNetV3/MobileNetV3_large_x0_5.yaml new file mode 100644 index 000000000..1aa847924 --- /dev/null +++ b/configs/MobileNetV3/MobileNetV3_large_x0_5.yaml @@ -0,0 +1,75 @@ +mode: 'train' +architecture: "MobileNetV3_large_x0_5" +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +ls_epsilon: 0.1 +validate: True +valid_interval: 1 +epochs: 360 +topk: 5 +image_shape: [3, 224, 224] + +LEARNING_RATE: + function: 'CosineWarmup' + params: + lr: 1.3 + warmup_epoch: 5 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.00002 + +TRAIN: + batch_size: 2048 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + diff --git a/configs/MobileNetV3/MobileNetV3_large_x0_75.yaml b/configs/MobileNetV3/MobileNetV3_large_x0_75.yaml new file mode 100644 index 000000000..3d859e2dd --- /dev/null +++ b/configs/MobileNetV3/MobileNetV3_large_x0_75.yaml @@ -0,0 +1,75 @@ +mode: 'train' +architecture: "MobileNetV3_large_x0_75" +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +ls_epsilon: 0.1 +validate: True +valid_interval: 1 +epochs: 360 +topk: 5 +image_shape: [3, 224, 224] + +LEARNING_RATE: + function: 'CosineWarmup' + params: + lr: 1.3 + warmup_epoch: 5 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.00002 + +TRAIN: + batch_size: 2048 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + diff --git a/configs/MobileNetV3/MobileNetV3_large_x1_0.yaml b/configs/MobileNetV3/MobileNetV3_large_x1_0.yaml new file mode 100644 index 000000000..32d0fe6fa --- /dev/null +++ b/configs/MobileNetV3/MobileNetV3_large_x1_0.yaml @@ -0,0 +1,76 @@ +mode: 'train' +architecture: "MobileNetV3_large_x1_0" +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +ls_epsilon: 0.1 +validate: True +valid_interval: 1 +epochs: 360 +topk: 5 +image_shape: [3, 224, 224] + +LEARNING_RATE: + function: 'CosineWarmup' + params: + lr: 2.6 + warmup_epoch: 5 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.00002 + +TRAIN: + batch_size: 4096 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - ImageNetPolicy: + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + + +VALID: + batch_size: 32 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + diff --git a/configs/MobileNetV3/MobileNetV3_large_x1_25.yaml b/configs/MobileNetV3/MobileNetV3_large_x1_25.yaml new file mode 100644 index 000000000..a368b8d02 --- /dev/null +++ b/configs/MobileNetV3/MobileNetV3_large_x1_25.yaml @@ -0,0 +1,75 @@ +mode: 'train' +architecture: "MobileNetV3_large_x1_25" +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +ls_epsilon: 0.1 +validate: True +valid_interval: 1 +epochs: 360 +topk: 5 +image_shape: [3, 224, 224] + +LEARNING_RATE: + function: 'CosineWarmup' + params: + lr: 0.65 + warmup_epoch: 5 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.00004 + +TRAIN: + batch_size: 1024 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + diff --git a/configs/MobileNetV3/MobileNetV3_small_x0_35.yaml b/configs/MobileNetV3/MobileNetV3_small_x0_35.yaml new file mode 100644 index 000000000..7fee09f2c --- /dev/null +++ b/configs/MobileNetV3/MobileNetV3_small_x0_35.yaml @@ -0,0 +1,74 @@ +mode: 'train' +architecture: "MobileNetV3_small_x0_35" +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 360 +topk: 5 +image_shape: [3, 224, 224] + +LEARNING_RATE: + function: 'CosineWarmup' + params: + lr: 2.6 + warmup_epoch: 5 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.00001 + +TRAIN: + batch_size: 4096 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + diff --git a/configs/MobileNetV3/MobileNetV3_small_x0_5.yaml b/configs/MobileNetV3/MobileNetV3_small_x0_5.yaml new file mode 100644 index 000000000..4659bd52f --- /dev/null +++ b/configs/MobileNetV3/MobileNetV3_small_x0_5.yaml @@ -0,0 +1,75 @@ +mode: 'train' +architecture: "MobileNetV3_small_x0_5" +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +ls_epsilon: 0.1 +validate: True +valid_interval: 1 +epochs: 360 +topk: 5 +image_shape: [3, 224, 224] + +LEARNING_RATE: + function: 'CosineWarmup' + params: + lr: 2.6 + warmup_epoch: 5 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.00001 + +TRAIN: + batch_size: 4096 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + diff --git a/configs/MobileNetV3/MobileNetV3_small_x0_75.yaml b/configs/MobileNetV3/MobileNetV3_small_x0_75.yaml new file mode 100644 index 000000000..23d13b0ef --- /dev/null +++ b/configs/MobileNetV3/MobileNetV3_small_x0_75.yaml @@ -0,0 +1,75 @@ +mode: 'train' +architecture: "MobileNetV3_small_x0_75" +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +ls_epsilon: 0.1 +validate: True +valid_interval: 1 +epochs: 360 +topk: 5 +image_shape: [3, 224, 224] + +LEARNING_RATE: + function: 'CosineWarmup' + params: + lr: 2.6 + warmup_epoch: 5 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.00002 + +TRAIN: + batch_size: 4096 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + diff --git a/configs/MobileNetV3/MobileNetV3_small_x1_0.yaml b/configs/MobileNetV3/MobileNetV3_small_x1_0.yaml new file mode 100644 index 000000000..f6369ec32 --- /dev/null +++ b/configs/MobileNetV3/MobileNetV3_small_x1_0.yaml @@ -0,0 +1,75 @@ +mode: 'train' +architecture: "MobileNetV3_small_x1_0" +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +ls_epsilon: 0.1 +validate: True +valid_interval: 1 +epochs: 360 +topk: 5 +image_shape: [3, 224, 224] + +LEARNING_RATE: + function: 'CosineWarmup' + params: + lr: 2.6 + warmup_epoch: 5 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.00002 + +TRAIN: + batch_size: 4096 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + diff --git a/configs/MobileNetV3/MobileNetV3_small_x1_25.yaml b/configs/MobileNetV3/MobileNetV3_small_x1_25.yaml new file mode 100644 index 000000000..cb711f845 --- /dev/null +++ b/configs/MobileNetV3/MobileNetV3_small_x1_25.yaml @@ -0,0 +1,75 @@ +mode: 'train' +architecture: "MobileNetV3_small_x1_25" +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +ls_epsilon: 0.1 +validate: True +valid_interval: 1 +epochs: 360 +topk: 5 +image_shape: [3, 224, 224] + +LEARNING_RATE: + function: 'CosineWarmup' + params: + lr: 1.3 + warmup_epoch: 5 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.00002 + +TRAIN: + batch_size: 2048 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + diff --git a/configs/Res2Net/Res2Net101_vd_26w_4s.yaml b/configs/Res2Net/Res2Net101_vd_26w_4s.yaml new file mode 100644 index 000000000..2d5cecfd9 --- /dev/null +++ b/configs/Res2Net/Res2Net101_vd_26w_4s.yaml @@ -0,0 +1,75 @@ +mode: 'train' +architecture: 'Res2Net101_vd_26w_4s' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 200 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: True +ls_epsilon: 0.1 + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.000100 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + mix: + - MixupOperator: + alpha: 0.2 + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/Res2Net/Res2Net200_vd_26w_4s.yaml b/configs/Res2Net/Res2Net200_vd_26w_4s.yaml new file mode 100644 index 000000000..5cd51eb7b --- /dev/null +++ b/configs/Res2Net/Res2Net200_vd_26w_4s.yaml @@ -0,0 +1,75 @@ +mode: 'train' +architecture: 'Res2Net200_vd_26w_4s' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 200 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: True +ls_epsilon: 0.1 + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.000100 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + mix: + - MixupOperator: + alpha: 0.2 + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/Res2Net/Res2Net50_14w_8s.yaml b/configs/Res2Net/Res2Net50_14w_8s.yaml new file mode 100644 index 000000000..69d249670 --- /dev/null +++ b/configs/Res2Net/Res2Net50_14w_8s.yaml @@ -0,0 +1,75 @@ +mode: 'train' +architecture: 'Res2Net50_14w_8s' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 200 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: True +ls_epsilon: 0.1 + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.000100 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + mix: + - MixupOperator: + alpha: 0.2 + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/Res2Net/Res2Net50_26w_4s.yaml b/configs/Res2Net/Res2Net50_26w_4s.yaml new file mode 100644 index 000000000..2565bfcb1 --- /dev/null +++ b/configs/Res2Net/Res2Net50_26w_4s.yaml @@ -0,0 +1,75 @@ +mode: 'train' +architecture: 'Res2Net50_26w_4s' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 200 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: True +ls_epsilon: 0.1 + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.000100 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + mix: + - MixupOperator: + alpha: 0.2 + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/Res2Net/Res2Net50_vd_26w_4s.yaml b/configs/Res2Net/Res2Net50_vd_26w_4s.yaml new file mode 100644 index 000000000..9aa79156c --- /dev/null +++ b/configs/Res2Net/Res2Net50_vd_26w_4s.yaml @@ -0,0 +1,75 @@ +mode: 'train' +architecture: 'Res2Net50_vd_26w_4s' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 200 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: True +ls_epsilon: 0.1 + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.000100 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + mix: + - MixupOperator: + alpha: 0.2 + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/ResNeXt/ResNeXt101_32x4d.yaml b/configs/ResNeXt/ResNeXt101_32x4d.yaml new file mode 100644 index 000000000..08c364894 --- /dev/null +++ b/configs/ResNeXt/ResNeXt101_32x4d.yaml @@ -0,0 +1,74 @@ +mode: 'train' +architecture: 'ResNeXt101_32x4d' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 120 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: False +ls_epsilon: -1 + +LEARNING_RATE: + function: 'Piecewise' + params: + lr: 0.1 + decay_epochs: [30, 60, 90] + gamma: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.000100 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/ResNeXt/ResNeXt101_64x4d.yaml b/configs/ResNeXt/ResNeXt101_64x4d.yaml new file mode 100644 index 000000000..8a662284f --- /dev/null +++ b/configs/ResNeXt/ResNeXt101_64x4d.yaml @@ -0,0 +1,74 @@ +mode: 'train' +architecture: 'ResNeXt101_64x4d' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 120 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: False +ls_epsilon: -1 + +LEARNING_RATE: + function: 'Piecewise' + params: + lr: 0.1 + decay_epochs: [30, 60, 90] + gamma: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.000150 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/ResNeXt/ResNeXt101_vd_32x4d.yaml b/configs/ResNeXt/ResNeXt101_vd_32x4d.yaml new file mode 100644 index 000000000..4a70e2e02 --- /dev/null +++ b/configs/ResNeXt/ResNeXt101_vd_32x4d.yaml @@ -0,0 +1,75 @@ +mode: 'train' +architecture: 'ResNeXt101_vd_32x4d' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 200 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: True +ls_epsilon: 0.1 + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.000100 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + mix: + - MixupOperator: + alpha: 0.2 + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/ResNeXt/ResNeXt101_vd_64x4d.yaml b/configs/ResNeXt/ResNeXt101_vd_64x4d.yaml new file mode 100644 index 000000000..1587b425d --- /dev/null +++ b/configs/ResNeXt/ResNeXt101_vd_64x4d.yaml @@ -0,0 +1,75 @@ +mode: 'train' +architecture: 'ResNeXt101_vd_64x4d' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 200 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: True +ls_epsilon: 0.1 + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.000100 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + mix: + - MixupOperator: + alpha: 0.2 + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/ResNeXt/ResNeXt152_32x4d.yaml b/configs/ResNeXt/ResNeXt152_32x4d.yaml new file mode 100644 index 000000000..d073066eb --- /dev/null +++ b/configs/ResNeXt/ResNeXt152_32x4d.yaml @@ -0,0 +1,74 @@ +mode: 'train' +architecture: 'ResNeXt152_32x4d' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 120 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: False +ls_epsilon: -1 + +LEARNING_RATE: + function: 'Piecewise' + params: + lr: 0.1 + decay_epochs: [30, 60, 90] + gamma: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.000100 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/ResNeXt/ResNeXt152_64x4d.yaml b/configs/ResNeXt/ResNeXt152_64x4d.yaml new file mode 100644 index 000000000..4cf492e25 --- /dev/null +++ b/configs/ResNeXt/ResNeXt152_64x4d.yaml @@ -0,0 +1,74 @@ +mode: 'train' +architecture: 'ResNeXt152_64x4d' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 120 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: False +ls_epsilon: -1 + +LEARNING_RATE: + function: 'Piecewise' + params: + lr: 0.1 + decay_epochs: [30, 60, 90] + gamma: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.000180 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/ResNeXt/ResNeXt152_vd_32x4d.yaml b/configs/ResNeXt/ResNeXt152_vd_32x4d.yaml new file mode 100644 index 000000000..5d89b7e24 --- /dev/null +++ b/configs/ResNeXt/ResNeXt152_vd_32x4d.yaml @@ -0,0 +1,75 @@ +mode: 'train' +architecture: 'ResNeXt152_vd_32x4d' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 200 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: True +ls_epsilon: 0.1 + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.000100 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + mix: + - MixupOperator: + alpha: 0.2 + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/ResNeXt/ResNeXt152_vd_64x4d.yaml b/configs/ResNeXt/ResNeXt152_vd_64x4d.yaml new file mode 100644 index 000000000..877f4b6c9 --- /dev/null +++ b/configs/ResNeXt/ResNeXt152_vd_64x4d.yaml @@ -0,0 +1,75 @@ +mode: 'train' +architecture: 'ResNeXt152_vd_64x4d' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 200 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: True +ls_epsilon: 0.1 + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.000100 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + mix: + - MixupOperator: + alpha: 0.2 + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/ResNeXt/ResNeXt50_32x4d.yaml b/configs/ResNeXt/ResNeXt50_32x4d.yaml new file mode 100644 index 000000000..f8a7e8dd9 --- /dev/null +++ b/configs/ResNeXt/ResNeXt50_32x4d.yaml @@ -0,0 +1,74 @@ +mode: 'train' +architecture: 'ResNeXt50_32x4d' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 120 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: False +ls_epsilon: -1 + +LEARNING_RATE: + function: 'Piecewise' + params: + lr: 0.1 + decay_epochs: [30, 60, 90] + gamma: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.000100 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/ResNeXt/ResNeXt50_64x4d.yaml b/configs/ResNeXt/ResNeXt50_64x4d.yaml new file mode 100644 index 000000000..4a5bf99da --- /dev/null +++ b/configs/ResNeXt/ResNeXt50_64x4d.yaml @@ -0,0 +1,75 @@ +mode: 'train' +architecture: "ResNeXt50_64x4d" +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 120 +topk: 5 +image_shape: [3, 224, 224] + +LEARNING_RATE: + function: 'Piecewise' + params: + lr: 0.1 + decay_epochs: [30, 60, 90] + gamma: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.0001 + +TRAIN: + batch_size: 32 + num_workers: 8 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + diff --git a/configs/ResNeXt/ResNeXt50_vd_32x4d.yaml b/configs/ResNeXt/ResNeXt50_vd_32x4d.yaml new file mode 100644 index 000000000..b779b0524 --- /dev/null +++ b/configs/ResNeXt/ResNeXt50_vd_32x4d.yaml @@ -0,0 +1,80 @@ +mode: 'train' +architecture: "ResNeXt50_vd_32x4d" +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 200 +topk: 5 +image_shape: [3, 224, 224] +use_mix: True +ls_epsilon: 0.1 + +LEARNING_RATE: + function: 'CosineWarmup' + params: + lr: 0.1 + decay_epochs: [30, 60, 90] + gamma: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.0001 + +TRAIN: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + mix: + - MixupOperator: + alpha: 0.2 + + + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + diff --git a/configs/ResNeXt/ResNeXt50_vd_64x4d.yaml b/configs/ResNeXt/ResNeXt50_vd_64x4d.yaml new file mode 100644 index 000000000..b79a63513 --- /dev/null +++ b/configs/ResNeXt/ResNeXt50_vd_64x4d.yaml @@ -0,0 +1,75 @@ +mode: 'train' +architecture: 'ResNeXt50_vd_64x4d' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 200 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: True +ls_epsilon: 0.1 + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.000100 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + mix: + - MixupOperator: + alpha: 0.2 + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/ResNet/ResNet101.yaml b/configs/ResNet/ResNet101.yaml new file mode 100644 index 000000000..0ccbb13e4 --- /dev/null +++ b/configs/ResNet/ResNet101.yaml @@ -0,0 +1,74 @@ +mode: 'train' +architecture: 'ResNet101' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 120 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: False +ls_epsilon: -1 + +LEARNING_RATE: + function: 'Piecewise' + params: + lr: 0.1 + decay_epochs: [30, 60, 90] + gamma: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.000100 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/ResNet/ResNet101_vd.yaml b/configs/ResNet/ResNet101_vd.yaml new file mode 100644 index 000000000..c74b5b20b --- /dev/null +++ b/configs/ResNet/ResNet101_vd.yaml @@ -0,0 +1,75 @@ +mode: 'train' +architecture: 'ResNet101_vd' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 200 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: True +ls_epsilon: 0.1 + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.000100 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + mix: + - MixupOperator: + alpha: 0.2 + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/ResNet/ResNet152.yaml b/configs/ResNet/ResNet152.yaml new file mode 100644 index 000000000..c7934a040 --- /dev/null +++ b/configs/ResNet/ResNet152.yaml @@ -0,0 +1,74 @@ +mode: 'train' +architecture: 'ResNet152' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 120 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: False +ls_epsilon: -1 + +LEARNING_RATE: + function: 'Piecewise' + params: + lr: 0.1 + decay_epochs: [30, 60, 90] + gamma: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.000100 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/ResNet/ResNet152_vd.yaml b/configs/ResNet/ResNet152_vd.yaml new file mode 100644 index 000000000..fbf08ede3 --- /dev/null +++ b/configs/ResNet/ResNet152_vd.yaml @@ -0,0 +1,75 @@ +mode: 'train' +architecture: 'ResNet152_vd' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 200 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: True +ls_epsilon: 0.1 + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.000100 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + mix: + - MixupOperator: + alpha: 0.2 + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/ResNet/ResNet18.yaml b/configs/ResNet/ResNet18.yaml new file mode 100644 index 000000000..270cd8ed3 --- /dev/null +++ b/configs/ResNet/ResNet18.yaml @@ -0,0 +1,72 @@ +mode: 'train' +architecture: 'ResNet18' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 120 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: False +ls_epsilon: -1 + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.000100 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/ResNet/ResNet18_vd.yaml b/configs/ResNet/ResNet18_vd.yaml new file mode 100644 index 000000000..54f36b0f4 --- /dev/null +++ b/configs/ResNet/ResNet18_vd.yaml @@ -0,0 +1,75 @@ +mode: 'train' +architecture: 'ResNet18_vd' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 200 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: True +ls_epsilon: 0.1 + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.000070 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + mix: + - MixupOperator: + alpha: 0.2 + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/ResNet/ResNet200_vd.yaml b/configs/ResNet/ResNet200_vd.yaml new file mode 100644 index 000000000..f2004fdef --- /dev/null +++ b/configs/ResNet/ResNet200_vd.yaml @@ -0,0 +1,75 @@ +mode: 'train' +architecture: 'ResNet200_vd' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 200 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: True +ls_epsilon: 0.1 + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.000100 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + mix: + - MixupOperator: + alpha: 0.2 + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/ResNet/ResNet34.yaml b/configs/ResNet/ResNet34.yaml new file mode 100644 index 000000000..cfe715d87 --- /dev/null +++ b/configs/ResNet/ResNet34.yaml @@ -0,0 +1,72 @@ +mode: 'train' +architecture: 'ResNet34' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 120 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: False +ls_epsilon: -1 + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.000100 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/ResNet/ResNet34_vd.yaml b/configs/ResNet/ResNet34_vd.yaml new file mode 100644 index 000000000..39b9a3556 --- /dev/null +++ b/configs/ResNet/ResNet34_vd.yaml @@ -0,0 +1,75 @@ +mode: 'train' +architecture: 'ResNet34_vd' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 200 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: True +ls_epsilon: 0.1 + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.000070 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + mix: + - MixupOperator: + alpha: 0.2 + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/ResNet/ResNet50.yaml b/configs/ResNet/ResNet50.yaml new file mode 100644 index 000000000..1fb825b58 --- /dev/null +++ b/configs/ResNet/ResNet50.yaml @@ -0,0 +1,74 @@ +mode: 'train' +architecture: 'ResNet50' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 120 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: False +ls_epsilon: -1 + +LEARNING_RATE: + function: 'Piecewise' + params: + lr: 0.1 + decay_epochs: [30, 60, 90] + gamma: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.000100 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/ResNet/ResNet50_vc.yaml b/configs/ResNet/ResNet50_vc.yaml new file mode 100644 index 000000000..233f00ce1 --- /dev/null +++ b/configs/ResNet/ResNet50_vc.yaml @@ -0,0 +1,72 @@ +mode: 'train' +architecture: 'ResNet50_vc' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 200 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: False +ls_epsilon: 0.1 + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.000100 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/ResNet/ResNet50_vd.yaml b/configs/ResNet/ResNet50_vd.yaml new file mode 100644 index 000000000..dbb52e32b --- /dev/null +++ b/configs/ResNet/ResNet50_vd.yaml @@ -0,0 +1,75 @@ +mode: 'train' +architecture: 'ResNet50_vd' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 200 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: True +ls_epsilon: 0.1 + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.000070 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + mix: + - MixupOperator: + alpha: 0.2 + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/ResNet_ACNet/ResNet_ACNet.yaml b/configs/ResNet_ACNet/ResNet_ACNet.yaml new file mode 100644 index 000000000..309f3821e --- /dev/null +++ b/configs/ResNet_ACNet/ResNet_ACNet.yaml @@ -0,0 +1,75 @@ +mode: 'train' +architecture: "ResNet_ACNet" +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 120 +topk: 5 +image_shape: [3, 224, 224] + +LEARNING_RATE: + function: 'Piecewise' + params: + lr: 0.1 + decay_epochs: [30, 60, 90] + gamma: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.0001 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + diff --git a/configs/SENet/SENet154_vd.yaml b/configs/SENet/SENet154_vd.yaml new file mode 100644 index 000000000..72adc722f --- /dev/null +++ b/configs/SENet/SENet154_vd.yaml @@ -0,0 +1,75 @@ +mode: 'train' +architecture: 'SENet154_vd' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 200 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: True +ls_epsilon: 0.1 + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.000100 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + mix: + - MixupOperator: + alpha: 0.2 + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/SENet/SE_ResNeXt101_32x4d.yaml b/configs/SENet/SE_ResNeXt101_32x4d.yaml new file mode 100644 index 000000000..bd9f20e7b --- /dev/null +++ b/configs/SENet/SE_ResNeXt101_32x4d.yaml @@ -0,0 +1,72 @@ +mode: 'train' +architecture: 'SE_ResNeXt101_32x4d' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 200 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: False +ls_epsilon: -1 + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.000015 + +TRAIN: + batch_size: 400 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/SENet/SE_ResNeXt50_32x4d.yaml b/configs/SENet/SE_ResNeXt50_32x4d.yaml new file mode 100644 index 000000000..c2a766013 --- /dev/null +++ b/configs/SENet/SE_ResNeXt50_32x4d.yaml @@ -0,0 +1,72 @@ +mode: 'train' +architecture: 'SE_ResNeXt50_32x4d' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 200 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: False +ls_epsilon: -1 + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.000120 + +TRAIN: + batch_size: 400 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/SENet/SE_ResNeXt50_vd_32x4d.yaml b/configs/SENet/SE_ResNeXt50_vd_32x4d.yaml new file mode 100644 index 000000000..f0adfb4b1 --- /dev/null +++ b/configs/SENet/SE_ResNeXt50_vd_32x4d.yaml @@ -0,0 +1,75 @@ +mode: 'train' +architecture: 'SE_ResNeXt50_vd_32x4d' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 200 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: True +ls_epsilon: 0.1 + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.000100 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + mix: + - MixupOperator: + alpha: 0.2 + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/SENet/SE_ResNet18_vd.yaml b/configs/SENet/SE_ResNet18_vd.yaml new file mode 100644 index 000000000..9684c6b51 --- /dev/null +++ b/configs/SENet/SE_ResNet18_vd.yaml @@ -0,0 +1,75 @@ +mode: 'train' +architecture: 'SE_ResNet18_vd' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 200 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: True +ls_epsilon: 0.1 + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.000070 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + mix: + - MixupOperator: + alpha: 0.2 + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/SENet/SE_ResNet34_vd.yaml b/configs/SENet/SE_ResNet34_vd.yaml new file mode 100644 index 000000000..1ffe543dd --- /dev/null +++ b/configs/SENet/SE_ResNet34_vd.yaml @@ -0,0 +1,75 @@ +mode: 'train' +architecture: 'SE_ResNet34_vd' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 200 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: True +ls_epsilon: 0.1 + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.000070 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + mix: + - MixupOperator: + alpha: 0.2 + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/SENet/SE_ResNet50_vd.yaml b/configs/SENet/SE_ResNet50_vd.yaml new file mode 100644 index 000000000..8ca11f271 --- /dev/null +++ b/configs/SENet/SE_ResNet50_vd.yaml @@ -0,0 +1,75 @@ +mode: 'train' +architecture: 'SE_ResNet50_vd' +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 200 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: True +ls_epsilon: 0.1 + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.000100 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + mix: + - MixupOperator: + alpha: 0.2 + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/ShuffleNet/ShuffleNetV2.yaml b/configs/ShuffleNet/ShuffleNetV2.yaml new file mode 100644 index 000000000..5993e3f8c --- /dev/null +++ b/configs/ShuffleNet/ShuffleNetV2.yaml @@ -0,0 +1,74 @@ +mode: 'train' +architecture: "ShuffleNetV2" +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 240 +topk: 5 +image_shape: [3, 224, 224] + +LEARNING_RATE: + function: 'CosineWarmup' + params: + lr: 0.5 + warmup_epoch: 5 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.00004 + +TRAIN: + batch_size: 1024 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + diff --git a/configs/ShuffleNet/ShuffleNetV2_swish.yaml b/configs/ShuffleNet/ShuffleNetV2_swish.yaml new file mode 100644 index 000000000..e8ee5f446 --- /dev/null +++ b/configs/ShuffleNet/ShuffleNetV2_swish.yaml @@ -0,0 +1,74 @@ +mode: 'train' +architecture: "ShuffleNetV2_swish" +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 240 +topk: 5 +image_shape: [3, 224, 224] + +LEARNING_RATE: + function: 'CosineWarmup' + params: + lr: 0.5 + warmup_epoch: 5 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.00004 + +TRAIN: + batch_size: 1024 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + diff --git a/configs/ShuffleNet/ShuffleNetV2_x0_25.yaml b/configs/ShuffleNet/ShuffleNetV2_x0_25.yaml new file mode 100644 index 000000000..9ab65f0d8 --- /dev/null +++ b/configs/ShuffleNet/ShuffleNetV2_x0_25.yaml @@ -0,0 +1,76 @@ +mode: 'train' +architecture: "ShuffleNetV2_x0_25" +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 240 +topk: 5 +image_shape: [3, 224, 224] + +LEARNING_RATE: + function: 'CosineWarmup' + params: + lr: 0.5 + warmup_epoch: 5 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.00003 + +TRAIN: + batch_size: 1024 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + scale: [0.64, 1.0] + ratio: [0.8, 1.2] + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + diff --git a/configs/ShuffleNet/ShuffleNetV2_x0_33.yaml b/configs/ShuffleNet/ShuffleNetV2_x0_33.yaml new file mode 100644 index 000000000..134d5b9f3 --- /dev/null +++ b/configs/ShuffleNet/ShuffleNetV2_x0_33.yaml @@ -0,0 +1,76 @@ +mode: 'train' +architecture: "ShuffleNetV2_x0_33" +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 240 +topk: 5 +image_shape: [3, 224, 224] + +LEARNING_RATE: + function: 'CosineWarmup' + params: + lr: 0.5 + warmup_epoch: 5 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.00003 + +TRAIN: + batch_size: 1024 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + scale: [0.64, 1.0] + ratio: [0.8, 1.2] + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + diff --git a/configs/ShuffleNet/ShuffleNetV2_x0_5.yaml b/configs/ShuffleNet/ShuffleNetV2_x0_5.yaml new file mode 100644 index 000000000..120ea1c11 --- /dev/null +++ b/configs/ShuffleNet/ShuffleNetV2_x0_5.yaml @@ -0,0 +1,76 @@ +mode: 'train' +architecture: "ShuffleNetV2_x0_5" +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 240 +topk: 5 +image_shape: [3, 224, 224] + +LEARNING_RATE: + function: 'CosineWarmup' + params: + lr: 0.5 + warmup_epoch: 5 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.00003 + +TRAIN: + batch_size: 1024 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + scale: [0.64, 1.0] + ratio: [0.8, 1.2] + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + diff --git a/configs/ShuffleNet/ShuffleNetV2_x1_5.yaml b/configs/ShuffleNet/ShuffleNetV2_x1_5.yaml new file mode 100644 index 000000000..c1fc3d18c --- /dev/null +++ b/configs/ShuffleNet/ShuffleNetV2_x1_5.yaml @@ -0,0 +1,75 @@ +mode: 'train' +architecture: "ShuffleNetV2_x1_5" +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 240 +topk: 5 +image_shape: [3, 224, 224] + +LEARNING_RATE: + function: 'CosineWarmup' + params: + lr: 0.25 + warmup_epoch: 5 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.00004 + +TRAIN: + batch_size: 512 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + ratio: [1.0, 1.0] + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + diff --git a/configs/ShuffleNet/ShuffleNetV2_x2_0.yaml b/configs/ShuffleNet/ShuffleNetV2_x2_0.yaml new file mode 100644 index 000000000..b45b70b64 --- /dev/null +++ b/configs/ShuffleNet/ShuffleNetV2_x2_0.yaml @@ -0,0 +1,74 @@ +mode: 'train' +architecture: "ShuffleNetV2_x2_0" +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 240 +topk: 5 +image_shape: [3, 224, 224] + +LEARNING_RATE: + function: 'CosineWarmup' + params: + lr: 0.25 + warmup_epoch: 5 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.00004 + +TRAIN: + batch_size: 512 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + diff --git a/configs/SqueezeNet/SqueezeNet1_0.yaml b/configs/SqueezeNet/SqueezeNet1_0.yaml new file mode 100644 index 000000000..163bb33aa --- /dev/null +++ b/configs/SqueezeNet/SqueezeNet1_0.yaml @@ -0,0 +1,71 @@ +mode: 'train' +architecture: "SqueezeNet1_0" +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 120 +topk: 5 +image_shape: [3, 224, 224] + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.02 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.0001 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + diff --git a/configs/SqueezeNet/SqueezeNet1_1.yaml b/configs/SqueezeNet/SqueezeNet1_1.yaml new file mode 100644 index 000000000..4b716bc59 --- /dev/null +++ b/configs/SqueezeNet/SqueezeNet1_1.yaml @@ -0,0 +1,69 @@ +mode: 'train' +architecture: "SqueezeNet1_1" +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 120 +topk: 5 +image_shape: [3, 224, 224] + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.02 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.0001 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/VGG/VGG11.yaml b/configs/VGG/VGG11.yaml new file mode 100644 index 000000000..d1cd5fab1 --- /dev/null +++ b/configs/VGG/VGG11.yaml @@ -0,0 +1,69 @@ +mode: 'train' +architecture: "VGG11" +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 90 +topk: 5 +image_shape: [3, 224, 224] + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.0004 + +TRAIN: + batch_size: 512 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/configs/VGG/VGG13.yaml b/configs/VGG/VGG13.yaml new file mode 100644 index 000000000..732695a15 --- /dev/null +++ b/configs/VGG/VGG13.yaml @@ -0,0 +1,73 @@ +mode: 'train' +architecture: "VGG13" +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 90 +topk: 5 +image_shape: [3, 224, 224] + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.01 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.0003 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + diff --git a/configs/VGG/VGG16.yaml b/configs/VGG/VGG16.yaml new file mode 100644 index 000000000..78f46b7f9 --- /dev/null +++ b/configs/VGG/VGG16.yaml @@ -0,0 +1,73 @@ +mode: 'train' +architecture: "VGG16" +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 90 +topk: 5 +image_shape: [3, 224, 224] + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.01 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.0004 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + diff --git a/configs/VGG/VGG19.yaml b/configs/VGG/VGG19.yaml new file mode 100644 index 000000000..94aea056d --- /dev/null +++ b/configs/VGG/VGG19.yaml @@ -0,0 +1,49 @@ +mode: 'train' +architecture: "VGG19" +pretrained_model: "" +model_save_dir: "./checkpoints/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 150 +topk: 5 +image_shape: [3, 224, 224] + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.01 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.0004 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + diff --git a/configs/eval.yaml b/configs/eval.yaml new file mode 100644 index 000000000..8edec558b --- /dev/null +++ b/configs/eval.yaml @@ -0,0 +1,31 @@ +mode: 'valid' +architecture: "" +pretrained_model: "" +classes_num: 1000 +total_images: 1281167 +topk: 5 +image_shape: [3, 224, 224] + + +VALID: + batch_size: 16 + num_workers: 4 + file_list: "../dataset/ILSVRC2012/val_list.txt" + data_dir: "../dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + diff --git a/ppcls/__init__.py b/ppcls/__init__.py new file mode 100644 index 000000000..3cee44185 --- /dev/null +++ b/ppcls/__init__.py @@ -0,0 +1,20 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import optimizer + +from .modeling import * +from .optimizer import * +from .data import * +from .utils import * diff --git a/ppcls/data/__init__.py b/ppcls/data/__init__.py new file mode 100644 index 000000000..72779cb55 --- /dev/null +++ b/ppcls/data/__init__.py @@ -0,0 +1,15 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .reader import Reader diff --git a/ppcls/data/imaug/__init__.py b/ppcls/data/imaug/__init__.py new file mode 100644 index 000000000..55be7a373 --- /dev/null +++ b/ppcls/data/imaug/__init__.py @@ -0,0 +1,94 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .autoaugment import ImageNetPolicy as RawImageNetPolicy +from .randaugment import RandAugment as RawRandAugment +from .cutout import Cutout + +from .hide_and_seek import HideAndSeek +from .random_erasing import RandomErasing +from .grid import GridMask + +from .operators import DecodeImage +from .operators import ResizeImage +from .operators import CropImage +from .operators import RandCropImage +from .operators import RandFlipImage +from .operators import NormalizeImage +from .operators import ToCHWImage + +from .batch_operators import MixupOperator +from .batch_operators import CutmixOperator +from .batch_operators import FmixOperator + +import six +import numpy as np +from PIL import Image + + +def transform(data, ops=[]): + """ transform """ + for op in ops: + data = op(data) + return data + + +class ImageNetPolicy(RawImageNetPolicy): + """ ImageNetPolicy wrapper to auto fit different img types """ + + def __init__(self, *args, **kwargs): + if six.PY2: + super(ImageNetPolicy, self).__init__(*args, **kwargs) + else: + super().__init__(*args, **kwargs) + + def __call__(self, img): + if not isinstance(img, Image.Image): + img = np.ascontiguousarray(img) + img = Image.fromarray(img) + + if six.PY2: + img = super(ImageNetPolicy, self).__call__(img) + else: + img = super().__call__(img) + + if isinstance(img, Image.Image): + img = np.asarray(img) + + return img + + +class RandAugment(RawRandAugment): + """ RandAugment wrapper to auto fit different img types """ + + def __init__(self, *args, **kwargs): + if six.PY2: + super(RandAugment, self).__init__(*args, **kwargs) + else: + super().__init__(*args, **kwargs) + + def __call__(self, img): + if not isinstance(img, Image.Image): + img = np.ascontiguousarray(img) + img = Image.fromarray(img) + + if six.PY2: + img = super(RandAugment, self).__call__(img) + else: + img = super().__call__(img) + + if isinstance(img, Image.Image): + img = np.asarray(img) + + return img diff --git a/ppcls/data/imaug/autoaugment.py b/ppcls/data/imaug/autoaugment.py new file mode 100644 index 000000000..e241855ce --- /dev/null +++ b/ppcls/data/imaug/autoaugment.py @@ -0,0 +1,264 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#This code is based on https://github.com/DeepVoltaire/AutoAugment/blob/master/autoaugment.py + +from PIL import Image, ImageEnhance, ImageOps +import numpy as np +import random + + +class ImageNetPolicy(object): + """ Randomly choose one of the best 24 Sub-policies on ImageNet. + + Example: + >>> policy = ImageNetPolicy() + >>> transformed = policy(image) + + Example as a PyTorch Transform: + >>> transform=transforms.Compose([ + >>> transforms.Resize(256), + >>> ImageNetPolicy(), + >>> transforms.ToTensor()]) + """ + + def __init__(self, fillcolor=(128, 128, 128)): + self.policies = [ + SubPolicy(0.4, "posterize", 8, 0.6, "rotate", 9, fillcolor), + SubPolicy(0.6, "solarize", 5, 0.6, "autocontrast", 5, fillcolor), + SubPolicy(0.8, "equalize", 8, 0.6, "equalize", 3, fillcolor), + SubPolicy(0.6, "posterize", 7, 0.6, "posterize", 6, fillcolor), + SubPolicy(0.4, "equalize", 7, 0.2, "solarize", 4, fillcolor), + SubPolicy(0.4, "equalize", 4, 0.8, "rotate", 8, fillcolor), + SubPolicy(0.6, "solarize", 3, 0.6, "equalize", 7, fillcolor), + SubPolicy(0.8, "posterize", 5, 1.0, "equalize", 2, fillcolor), + SubPolicy(0.2, "rotate", 3, 0.6, "solarize", 8, fillcolor), + SubPolicy(0.6, "equalize", 8, 0.4, "posterize", 6, fillcolor), + SubPolicy(0.8, "rotate", 8, 0.4, "color", 0, fillcolor), + SubPolicy(0.4, "rotate", 9, 0.6, "equalize", 2, fillcolor), + SubPolicy(0.0, "equalize", 7, 0.8, "equalize", 8, fillcolor), + SubPolicy(0.6, "invert", 4, 1.0, "equalize", 8, fillcolor), + SubPolicy(0.6, "color", 4, 1.0, "contrast", 8, fillcolor), + SubPolicy(0.8, "rotate", 8, 1.0, "color", 2, fillcolor), + SubPolicy(0.8, "color", 8, 0.8, "solarize", 7, fillcolor), + SubPolicy(0.4, "sharpness", 7, 0.6, "invert", 8, fillcolor), + SubPolicy(0.6, "shearX", 5, 1.0, "equalize", 9, fillcolor), + SubPolicy(0.4, "color", 0, 0.6, "equalize", 3, fillcolor), + SubPolicy(0.4, "equalize", 7, 0.2, "solarize", 4, fillcolor), + SubPolicy(0.6, "solarize", 5, 0.6, "autocontrast", 5, fillcolor), + SubPolicy(0.6, "invert", 4, 1.0, "equalize", 8, fillcolor), + SubPolicy(0.6, "color", 4, 1.0, "contrast", 8, fillcolor), + SubPolicy(0.8, "equalize", 8, 0.6, "equalize", 3, fillcolor) + ] + + def __call__(self, img, policy_idx=None): + if policy_idx is None or not isinstance(policy_idx, int): + policy_idx = random.randint(0, len(self.policies) - 1) + else: + policy_idx = policy_idx % len(self.policies) + return self.policies[policy_idx](img) + + def __repr__(self): + return "AutoAugment ImageNet Policy" + + +class CIFAR10Policy(object): + """ Randomly choose one of the best 25 Sub-policies on CIFAR10. + + Example: + >>> policy = CIFAR10Policy() + >>> transformed = policy(image) + + Example as a PyTorch Transform: + >>> transform=transforms.Compose([ + >>> transforms.Resize(256), + >>> CIFAR10Policy(), + >>> transforms.ToTensor()]) + """ + + def __init__(self, fillcolor=(128, 128, 128)): + self.policies = [ + SubPolicy(0.1, "invert", 7, 0.2, "contrast", 6, fillcolor), + SubPolicy(0.7, "rotate", 2, 0.3, "translateX", 9, fillcolor), + SubPolicy(0.8, "sharpness", 1, 0.9, "sharpness", 3, fillcolor), + SubPolicy(0.5, "shearY", 8, 0.7, "translateY", 9, fillcolor), + SubPolicy(0.5, "autocontrast", 8, 0.9, "equalize", 2, fillcolor), + SubPolicy(0.2, "shearY", 7, 0.3, "posterize", 7, fillcolor), + SubPolicy(0.4, "color", 3, 0.6, "brightness", 7, fillcolor), + SubPolicy(0.3, "sharpness", 9, 0.7, "brightness", 9, fillcolor), + SubPolicy(0.6, "equalize", 5, 0.5, "equalize", 1, fillcolor), + SubPolicy(0.6, "contrast", 7, 0.6, "sharpness", 5, fillcolor), + SubPolicy(0.7, "color", 7, 0.5, "translateX", 8, fillcolor), + SubPolicy(0.3, "equalize", 7, 0.4, "autocontrast", 8, fillcolor), + SubPolicy(0.4, "translateY", 3, 0.2, "sharpness", 6, fillcolor), + SubPolicy(0.9, "brightness", 6, 0.2, "color", 8, fillcolor), + SubPolicy(0.5, "solarize", 2, 0.0, "invert", 3, fillcolor), + SubPolicy(0.2, "equalize", 0, 0.6, "autocontrast", 0, fillcolor), + SubPolicy(0.2, "equalize", 8, 0.8, "equalize", 4, fillcolor), + SubPolicy(0.9, "color", 9, 0.6, "equalize", 6, fillcolor), + SubPolicy(0.8, "autocontrast", 4, 0.2, "solarize", 8, fillcolor), + SubPolicy(0.1, "brightness", 3, 0.7, "color", 0, fillcolor), + SubPolicy(0.4, "solarize", 5, 0.9, "autocontrast", 3, fillcolor), + SubPolicy(0.9, "translateY", 9, 0.7, "translateY", 9, fillcolor), + SubPolicy(0.9, "autocontrast", 2, 0.8, "solarize", 3, fillcolor), + SubPolicy(0.8, "equalize", 8, 0.1, "invert", 3, fillcolor), + SubPolicy(0.7, "translateY", 9, 0.9, "autocontrast", 1, fillcolor) + ] + + def __call__(self, img, policy_idx=None): + if policy_idx is None or not isinstance(policy_idx, int): + policy_idx = random.randint(0, len(self.policies) - 1) + else: + policy_idx = policy_idx % len(self.policies) + return self.policies[policy_idx](img) + + def __repr__(self): + return "AutoAugment CIFAR10 Policy" + + +class SVHNPolicy(object): + """ Randomly choose one of the best 25 Sub-policies on SVHN. + + Example: + >>> policy = SVHNPolicy() + >>> transformed = policy(image) + + Example as a PyTorch Transform: + >>> transform=transforms.Compose([ + >>> transforms.Resize(256), + >>> SVHNPolicy(), + >>> transforms.ToTensor()]) + """ + + def __init__(self, fillcolor=(128, 128, 128)): + self.policies = [ + SubPolicy(0.9, "shearX", 4, 0.2, "invert", 3, fillcolor), + SubPolicy(0.9, "shearY", 8, 0.7, "invert", 5, fillcolor), + SubPolicy(0.6, "equalize", 5, 0.6, "solarize", 6, fillcolor), + SubPolicy(0.9, "invert", 3, 0.6, "equalize", 3, fillcolor), + SubPolicy(0.6, "equalize", 1, 0.9, "rotate", 3, fillcolor), + SubPolicy(0.9, "shearX", 4, 0.8, "autocontrast", 3, fillcolor), + SubPolicy(0.9, "shearY", 8, 0.4, "invert", 5, fillcolor), + SubPolicy(0.9, "shearY", 5, 0.2, "solarize", 6, fillcolor), + SubPolicy(0.9, "invert", 6, 0.8, "autocontrast", 1, fillcolor), + SubPolicy(0.6, "equalize", 3, 0.9, "rotate", 3, fillcolor), + SubPolicy(0.9, "shearX", 4, 0.3, "solarize", 3, fillcolor), + SubPolicy(0.8, "shearY", 8, 0.7, "invert", 4, fillcolor), + SubPolicy(0.9, "equalize", 5, 0.6, "translateY", 6, fillcolor), + SubPolicy(0.9, "invert", 4, 0.6, "equalize", 7, fillcolor), + SubPolicy(0.3, "contrast", 3, 0.8, "rotate", 4, fillcolor), + SubPolicy(0.8, "invert", 5, 0.0, "translateY", 2, fillcolor), + SubPolicy(0.7, "shearY", 6, 0.4, "solarize", 8, fillcolor), + SubPolicy(0.6, "invert", 4, 0.8, "rotate", 4, fillcolor), + SubPolicy( + 0.3, "shearY", 7, 0.9, "translateX", 3, fillcolor), SubPolicy( + 0.1, "shearX", 6, 0.6, "invert", 5, fillcolor), SubPolicy( + 0.7, "solarize", 2, 0.6, "translateY", 7, + fillcolor), SubPolicy(0.8, "shearY", 4, 0.8, "invert", + 8, fillcolor), SubPolicy( + 0.7, "shearX", 9, 0.8, + "translateY", 3, + fillcolor), SubPolicy( + 0.8, "shearY", 5, 0.7, + "autocontrast", 3, + fillcolor), + SubPolicy(0.7, "shearX", 2, 0.1, "invert", 5, fillcolor) + ] + + def __call__(self, img, policy_idx=None): + if policy_idx is None or not isinstance(policy_idx, int): + policy_idx = random.randint(0, len(self.policies) - 1) + else: + policy_idx = policy_idx % len(self.policies) + return self.policies[policy_idx](img) + + def __repr__(self): + return "AutoAugment SVHN Policy" + + +class SubPolicy(object): + def __init__(self, + p1, + operation1, + magnitude_idx1, + p2, + operation2, + magnitude_idx2, + fillcolor=(128, 128, 128)): + ranges = { + "shearX": np.linspace(0, 0.3, 10), + "shearY": np.linspace(0, 0.3, 10), + "translateX": np.linspace(0, 150 / 331, 10), + "translateY": np.linspace(0, 150 / 331, 10), + "rotate": np.linspace(0, 30, 10), + "color": np.linspace(0.0, 0.9, 10), + "posterize": np.round(np.linspace(8, 4, 10), 0).astype(np.int), + "solarize": np.linspace(256, 0, 10), + "contrast": np.linspace(0.0, 0.9, 10), + "sharpness": np.linspace(0.0, 0.9, 10), + "brightness": np.linspace(0.0, 0.9, 10), + "autocontrast": [0] * 10, + "equalize": [0] * 10, + "invert": [0] * 10 + } + + # from https://stackoverflow.com/questions/5252170/specify-image-filling-color-when-rotating-in-python-with-pil-and-setting-expand + def rotate_with_fill(img, magnitude): + rot = img.convert("RGBA").rotate(magnitude) + return Image.composite(rot, + Image.new("RGBA", rot.size, (128, ) * 4), + rot).convert(img.mode) + + func = { + "shearX": lambda img, magnitude: img.transform( + img.size, Image.AFFINE, (1, magnitude * random.choice([-1, 1]), 0, 0, 1, 0), + Image.BICUBIC, fillcolor=fillcolor), + "shearY": lambda img, magnitude: img.transform( + img.size, Image.AFFINE, (1, 0, 0, magnitude * random.choice([-1, 1]), 1, 0), + Image.BICUBIC, fillcolor=fillcolor), + "translateX": lambda img, magnitude: img.transform( + img.size, Image.AFFINE, (1, 0, magnitude * img.size[0] * random.choice([-1, 1]), 0, 1, 0), + fillcolor=fillcolor), + "translateY": lambda img, magnitude: img.transform( + img.size, Image.AFFINE, (1, 0, 0, 0, 1, magnitude * img.size[1] * random.choice([-1, 1])), + fillcolor=fillcolor), + "rotate": lambda img, magnitude: rotate_with_fill(img, magnitude), + # "rotate": lambda img, magnitude: img.rotate(magnitude * random.choice([-1, 1])), + "color": lambda img, magnitude: ImageEnhance.Color(img).enhance(1 + magnitude * random.choice([-1, 1])), + "posterize": lambda img, magnitude: ImageOps.posterize(img, magnitude), + "solarize": lambda img, magnitude: ImageOps.solarize(img, magnitude), + "contrast": lambda img, magnitude: ImageEnhance.Contrast(img).enhance( + 1 + magnitude * random.choice([-1, 1])), + "sharpness": lambda img, magnitude: ImageEnhance.Sharpness(img).enhance( + 1 + magnitude * random.choice([-1, 1])), + "brightness": lambda img, magnitude: ImageEnhance.Brightness(img).enhance( + 1 + magnitude * random.choice([-1, 1])), + "autocontrast": lambda img, magnitude: ImageOps.autocontrast(img), + "equalize": lambda img, magnitude: ImageOps.equalize(img), + "invert": lambda img, magnitude: ImageOps.invert(img) + } + + self.p1 = p1 + self.operation1 = func[operation1] + self.magnitude1 = ranges[operation1][magnitude_idx1] + self.p2 = p2 + self.operation2 = func[operation2] + self.magnitude2 = ranges[operation2][magnitude_idx2] + + def __call__(self, img): + if random.random() < self.p1: + img = self.operation1(img, self.magnitude1) + if random.random() < self.p2: + img = self.operation2(img, self.magnitude2) + return img diff --git a/ppcls/data/imaug/batch_operators.py b/ppcls/data/imaug/batch_operators.py new file mode 100644 index 000000000..aa18aedfa --- /dev/null +++ b/ppcls/data/imaug/batch_operators.py @@ -0,0 +1,115 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import numpy as np + +from .fmix import sample_mask + + +class BatchOperator(object): + """ BatchOperator """ + + def __init__(self, *args, **kwargs): + pass + + def _unpack(self, batch): + """ _unpack """ + assert isinstance(batch, list), \ + 'batch should be a list filled with tuples (img, label)' + bs = len(batch) + assert bs > 0, 'size of the batch data should > 0' + imgs, labels = list(zip(*batch)) + return np.array(imgs), np.array(labels), bs + + def __call__(self, batch): + return batch + + +class MixupOperator(BatchOperator): + """ Mixup operator """ + + def __init__(self, alpha=0.2): + assert alpha > 0., \ + 'parameter alpha[%f] should > 0.0' % (alpha) + self._alpha = alpha + + def __call__(self, batch): + imgs, labels, bs = self._unpack(batch) + idx = np.random.permutation(bs) + lam = np.random.beta(self._alpha, self._alpha) + imgs = lam * imgs + (1 - lam) * imgs[idx] + return list(zip(imgs, labels, labels[idx], [lam] * bs)) + + +class CutmixOperator(BatchOperator): + """ Cutmix operator """ + + def __init__(self, alpha=0.2): + assert alpha > 0., \ + 'parameter alpha[%f] should > 0.0' % (alpha) + self._alpha = alpha + + def _rand_bbox(self, size, lam): + """ _rand_bbox """ + w = size[2] + h = size[3] + cut_rat = np.sqrt(1. - lam) + cut_w = np.int(w * cut_rat) + cut_h = np.int(h * cut_rat) + + # uniform + cx = np.random.randint(w) + cy = np.random.randint(h) + + bbx1 = np.clip(cx - cut_w // 2, 0, w) + bby1 = np.clip(cy - cut_h // 2, 0, h) + bbx2 = np.clip(cx + cut_w // 2, 0, w) + bby2 = np.clip(cy + cut_h // 2, 0, h) + + return bbx1, bby1, bbx2, bby2 + + def __call__(self, batch): + imgs, labels, bs = self._unpack(batch) + idx = np.random.permutation(bs) + lam = np.random.beta(self._alpha, self._alpha) + + bbx1, bby1, bbx2, bby2 = self._rand_bbox(imgs.shape, lam) + imgs[:, :, bbx1:bbx2, bby1:bby2] = imgs[idx, :, bbx1:bbx2, bby1:bby2] + lam = 1 - (float(bbx2 - bbx1) * (bby2 - bby1) / + (imgs.shape[-2] * imgs.shape[-1])) + return list(zip(imgs, labels, labels[idx], [lam] * bs)) + + +class FmixOperator(BatchOperator): + """ Fmix operator """ + + def __init__(self, alpha=1, decay_power=3, max_soft=0., reformulate=False): + self._alpha = alpha + self._decay_power = decay_power + self._max_soft = max_soft + self._reformulate = reformulate + + def __call__(self, batch): + imgs, labels, bs = self._unpack(batch) + idx = np.random.permutation(bs) + size = (imgs.shape[2], imgs.shape[3]) + lam, mask = sample_mask(self._alpha, self._decay_power, \ + size, self._max_soft, self._reformulate) + imgs = mask * imgs + (1 - mask) * imgs[idx] + return list(zip(imgs, labels, labels[idx], [lam] * bs)) diff --git a/ppcls/data/imaug/cutout.py b/ppcls/data/imaug/cutout.py new file mode 100644 index 000000000..1d80a36f2 --- /dev/null +++ b/ppcls/data/imaug/cutout.py @@ -0,0 +1,39 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import random + + +class Cutout(object): + def __init__(self, n_holes=1, length=112): + self.n_holes = n_holes + self.length = length + + def __call__(self, img): + """ cutout_image """ + h, w = img.shape[:2] + mask = np.ones((h, w), np.float32) + + for n in range(self.n_holes): + y = np.random.randint(h) + x = np.random.randint(w) + + y1 = np.clip(y - self.length // 2, 0, h) + y2 = np.clip(y + self.length // 2, 0, h) + x1 = np.clip(x - self.length // 2, 0, w) + x2 = np.clip(x + self.length // 2, 0, w) + + img[y1:y2, x1:x2] = 0 + return img diff --git a/ppcls/data/imaug/fmix.py b/ppcls/data/imaug/fmix.py new file mode 100644 index 000000000..fb9382115 --- /dev/null +++ b/ppcls/data/imaug/fmix.py @@ -0,0 +1,217 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +import random + +import numpy as np +from scipy.stats import beta + + +def fftfreqnd(h, w=None, z=None): + """ Get bin values for discrete fourier transform of size (h, w, z) + + :param h: Required, first dimension size + :param w: Optional, second dimension size + :param z: Optional, third dimension size + """ + fz = fx = 0 + fy = np.fft.fftfreq(h) + + if w is not None: + fy = np.expand_dims(fy, -1) + + if w % 2 == 1: + fx = np.fft.fftfreq(w)[:w // 2 + 2] + else: + fx = np.fft.fftfreq(w)[:w // 2 + 1] + + if z is not None: + fy = np.expand_dims(fy, -1) + if z % 2 == 1: + fz = np.fft.fftfreq(z)[:, None] + else: + fz = np.fft.fftfreq(z)[:, None] + + return np.sqrt(fx * fx + fy * fy + fz * fz) + + +def get_spectrum(freqs, decay_power, ch, h, w=0, z=0): + """ Samples a fourier image with given size and frequencies decayed by decay power + + :param freqs: Bin values for the discrete fourier transform + :param decay_power: Decay power for frequency decay prop 1/f**d + :param ch: Number of channels for the resulting mask + :param h: Required, first dimension size + :param w: Optional, second dimension size + :param z: Optional, third dimension size + """ + scale = np.ones(1) / (np.maximum(freqs, np.array([1. / max(w, h, z)])) + **decay_power) + + param_size = [ch] + list(freqs.shape) + [2] + param = np.random.randn(*param_size) + + scale = np.expand_dims(scale, -1)[None, :] + + return scale * param + + +def make_low_freq_image(decay, shape, ch=1): + """ Sample a low frequency image from fourier space + + :param decay_power: Decay power for frequency decay prop 1/f**d + :param shape: Shape of desired mask, list up to 3 dims + :param ch: Number of channels for desired mask + """ + freqs = fftfreqnd(*shape) + spectrum = get_spectrum(freqs, decay, ch, + *shape) #.reshape((1, *shape[:-1], -1)) + spectrum = spectrum[:, 0] + 1j * spectrum[:, 1] + mask = np.real(np.fft.irfftn(spectrum, shape)) + + if len(shape) == 1: + mask = mask[:1, :shape[0]] + if len(shape) == 2: + mask = mask[:1, :shape[0], :shape[1]] + if len(shape) == 3: + mask = mask[:1, :shape[0], :shape[1], :shape[2]] + + mask = mask + mask = (mask - mask.min()) + mask = mask / mask.max() + return mask + + +def sample_lam(alpha, reformulate=False): + """ Sample a lambda from symmetric beta distribution with given alpha + + :param alpha: Alpha value for beta distribution + :param reformulate: If True, uses the reformulation of [1]. + """ + if reformulate: + lam = beta.rvs(alpha + 1, alpha) + else: + lam = beta.rvs(alpha, alpha) + + return lam + + +def binarise_mask(mask, lam, in_shape, max_soft=0.0): + """ Binarises a given low frequency image such that it has mean lambda. + + :param mask: Low frequency image, usually the result of `make_low_freq_image` + :param lam: Mean value of final mask + :param in_shape: Shape of inputs + :param max_soft: Softening value between 0 and 0.5 which smooths hard edges in the mask. + :return: + """ + idx = mask.reshape(-1).argsort()[::-1] + mask = mask.reshape(-1) + num = math.ceil(lam * mask.size) if random.random() > 0.5 else math.floor( + lam * mask.size) + + eff_soft = max_soft + if max_soft > lam or max_soft > (1 - lam): + eff_soft = min(lam, 1 - lam) + + soft = int(mask.size * eff_soft) + num_low = int(num - soft) + num_high = int(num + soft) + + mask[idx[:num_high]] = 1 + mask[idx[num_low:]] = 0 + mask[idx[num_low:num_high]] = np.linspace(1, 0, (num_high - num_low)) + + mask = mask.reshape((1, 1, in_shape[0], in_shape[1])) + return mask + + +def sample_mask(alpha, decay_power, shape, max_soft=0.0, reformulate=False): + """ Samples a mean lambda from beta distribution parametrised by alpha, creates a low frequency image and binarises + it based on this lambda + + :param alpha: Alpha value for beta distribution from which to sample mean of mask + :param decay_power: Decay power for frequency decay prop 1/f**d + :param shape: Shape of desired mask, list up to 3 dims + :param max_soft: Softening value between 0 and 0.5 which smooths hard edges in the mask. + :param reformulate: If True, uses the reformulation of [1]. + """ + if isinstance(shape, int): + shape = (shape, ) + + # Choose lambda + lam = sample_lam(alpha, reformulate) + + # Make mask, get mean / std + mask = make_low_freq_image(decay_power, shape) + mask = binarise_mask(mask, lam, shape, max_soft) + + return float(lam), mask + + +def sample_and_apply(x, + alpha, + decay_power, + shape, + max_soft=0.0, + reformulate=False): + """ + + :param x: Image batch on which to apply fmix of shape [b, c, shape*] + :param alpha: Alpha value for beta distribution from which to sample mean of mask + :param decay_power: Decay power for frequency decay prop 1/f**d + :param shape: Shape of desired mask, list up to 3 dims + :param max_soft: Softening value between 0 and 0.5 which smooths hard edges in the mask. + :param reformulate: If True, uses the reformulation of [1]. + :return: mixed input, permutation indices, lambda value of mix, + """ + lam, mask = sample_mask(alpha, decay_power, shape, max_soft, reformulate) + index = np.random.permutation(x.shape[0]) + + x1, x2 = x * mask, x[index] * (1 - mask) + return x1 + x2, index, lam + + +class FMixBase: + """ FMix augmentation + + Args: + decay_power (float): Decay power for frequency decay prop 1/f**d + alpha (float): Alpha value for beta distribution from which to sample mean of mask + size ([int] | [int, int] | [int, int, int]): Shape of desired mask, list up to 3 dims + max_soft (float): Softening value between 0 and 0.5 which smooths hard edges in the mask. + reformulate (bool): If True, uses the reformulation of [1]. + """ + + def __init__(self, + decay_power=3, + alpha=1, + size=(32, 32), + max_soft=0.0, + reformulate=False): + super().__init__() + self.decay_power = decay_power + self.reformulate = reformulate + self.size = size + self.alpha = alpha + self.max_soft = max_soft + self.index = None + self.lam = None + + def __call__(self, x): + raise NotImplementedError + + def loss(self, *args, **kwargs): + raise NotImplementedError diff --git a/ppcls/data/imaug/grid.py b/ppcls/data/imaug/grid.py new file mode 100644 index 000000000..2f3a238a5 --- /dev/null +++ b/ppcls/data/imaug/grid.py @@ -0,0 +1,87 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +from PIL import Image +import pdb + +# curr +CURR_EPOCH = 0 +# epoch for the prob to be the upper limit +NUM_EPOCHS = 240 + + +class GridMask(object): + def __init__(self, d1, d2, rotate=1, ratio=0.5, mode=0, prob=1.): + self.d1 = d1 + self.d2 = d2 + self.rotate = rotate + self.ratio = ratio + self.mode = mode + self.st_prob = prob + self.prob = prob + self.last_prob = -1 + + def set_prob(self): + global CURR_EPOCH + global NUM_EPOCHS + self.prob = self.st_prob * min(1, 1.0 * CURR_EPOCH / NUM_EPOCHS) + + def __call__(self, img): + self.set_prob() + if abs(self.last_prob - self.prob) > 1e-10: + global CURR_EPOCH + global NUM_EPOCHS + print( + "self.prob is updated, self.prob={}, CURR_EPOCH: {}, NUM_EPOCHS: {}". + format(self.prob, CURR_EPOCH, NUM_EPOCHS)) + self.last_prob = self.prob + # print("CURR_EPOCH: {}, NUM_EPOCHS: {}, self.prob is set as: {}".format(CURR_EPOCH, NUM_EPOCHS, self.prob) ) + if np.random.rand() > self.prob: + return img + _, h, w = img.shape + hh = int(1.5 * h) + ww = int(1.5 * w) + d = np.random.randint(self.d1, self.d2) + #d = self.d + self.l = int(d * self.ratio + 0.5) + mask = np.ones((hh, ww), np.float32) + st_h = np.random.randint(d) + st_w = np.random.randint(d) + for i in range(-1, hh // d + 1): + s = d * i + st_h + t = s + self.l + s = max(min(s, hh), 0) + t = max(min(t, hh), 0) + mask[s:t, :] *= 0 + for i in range(-1, ww // d + 1): + s = d * i + st_w + t = s + self.l + s = max(min(s, ww), 0) + t = max(min(t, ww), 0) + mask[:, s:t] *= 0 + r = np.random.randint(self.rotate) + mask = Image.fromarray(np.uint8(mask)) + mask = mask.rotate(r) + mask = np.asarray(mask) + mask = mask[(hh - h) // 2:(hh - h) // 2 + h, (ww - w) // 2:(ww - w) // + 2 + w] + + if self.mode == 1: + mask = 1 - mask + + mask = np.expand_dims(mask, axis=0) + img = (img * mask).astype(img.dtype) + + return img diff --git a/ppcls/data/imaug/hide_and_seek.py b/ppcls/data/imaug/hide_and_seek.py new file mode 100644 index 000000000..8bb394494 --- /dev/null +++ b/ppcls/data/imaug/hide_and_seek.py @@ -0,0 +1,42 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import random + + +class HideAndSeek(object): + def __init__(self): + # possible grid size, 0 means no hiding + self.grid_sizes = [0, 16, 32, 44, 56] + # hiding probability + self.hide_prob = 0.5 + + def __call__(self, img): + # randomly choose one grid size + grid_size = np.random.choice(self.grid_sizes) + + _, h, w = img.shape + + # hide the patches + if grid_size == 0: + return img + for x in range(0, w, grid_size): + for y in range(0, h, grid_size): + x_end = min(w, x + grid_size) + y_end = min(h, y + grid_size) + if (random.random() <= self.hide_prob): + img[:, x:x_end, y:y_end] = 0 + + return img diff --git a/ppcls/data/imaug/operators.py b/ppcls/data/imaug/operators.py new file mode 100644 index 000000000..98e89ca83 --- /dev/null +++ b/ppcls/data/imaug/operators.py @@ -0,0 +1,210 @@ +""" +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import six +import math +import random +import functools +import cv2 +import numpy as np + + +class OperatorParamError(ValueError): + """ OperatorParamError + """ + pass + + +class DecodeImage(object): + """ decode image """ + + def __init__(self, to_rgb=True, to_np=False, channel_first=False): + self.to_rgb = to_rgb + self.to_np = to_np #to numpy + self.channel_first = channel_first #only enabled when to_np is True + + def __call__(self, img): + if six.PY2: + assert type(img) is str and len( + img) > 0, "invalid input 'img' in DecodeImage" + else: + assert type(img) is bytes and len( + img) > 0, "invalid input 'img' in DecodeImage" + data = np.frombuffer(img, dtype='uint8') + img = cv2.imdecode(data, 1) + if self.to_rgb: + assert img.shape[2] == 3, 'invalid shape of image[%s]' % ( + img.shape) + img = img[:, :, ::-1] + + if self.channel_first: + img = img.transpose((2, 0, 1)) + + return img + + +class ResizeImage(object): + """ resize image """ + + def __init__(self, size=None, resize_short=None): + if resize_short is not None and resize_short > 0: + self.resize_short = resize_short + self.w = None + self.h = None + elif size is not None: + self.resize_short = None + self.w = size if type(size) is int else size[0] + self.h = size if type(size) is int else size[1] + else: + raise OperatorParamError("invalid params for ReisizeImage for '\ + 'both 'size' and 'resize_short' are None") + + def __call__(self, img): + img_h, img_w = img.shape[:2] + if self.resize_short is not None: + percent = float(self.resize_short) / min(img_w, img_h) + w = int(round(img_w * percent)) + h = int(round(img_h * percent)) + else: + w = self.w + h = self.h + + return cv2.resize(img, (w, h)) + + +class CropImage(object): + """ crop image """ + + def __init__(self, size): + if type(size) is int: + self.size = (size, size) + else: + self.size = size # (h, w) + + def __call__(self, img): + w, h = self.size + img_h, img_w = img.shape[:2] + w_start = (img_w - w) // 2 + h_start = (img_h - h) // 2 + + w_end = w_start + w + h_end = h_start + h + return img[h_start:h_end, w_start:w_end, :] + + +class RandCropImage(object): + """ random crop image """ + + def __init__(self, size, scale=None, ratio=None): + if type(size) is int: + self.size = (size, size) # (h, w) + else: + self.size = size + + self.scale = [0.08, 1.0] if scale is None else scale + self.ratio = [3. / 4., 4. / 3.] if ratio is None else ratio + + def __call__(self, img): + size = self.size + scale = self.scale + ratio = self.ratio + + aspect_ratio = math.sqrt(random.uniform(*ratio)) + w = 1. * aspect_ratio + h = 1. / aspect_ratio + + img_h, img_w = img.shape[:2] + + bound = min((float(img_w) / img_h) / (w**2), + (float(img_h) / img_w) / (h**2)) + scale_max = min(scale[1], bound) + scale_min = min(scale[0], bound) + + target_area = img_w * img_h * random.uniform(\ + scale_min, scale_max) + target_size = math.sqrt(target_area) + w = int(target_size * w) + h = int(target_size * h) + + i = random.randint(0, img_w - w) + j = random.randint(0, img_h - h) + + img = img[j:j + h, i:i + w, :] + return cv2.resize(img, size) + + +class RandFlipImage(object): + """ random flip image + flip_code: + 1: Flipped Horizontally + 0: Flipped Vertically + -1: Flipped Horizontally & Vertically + """ + + def __init__(self, flip_code=1): + assert flip_code in [-1, 0, 1 + ], "flip_code should be a value in [-1, 0, 1]" + self.flip_code = flip_code + + def __call__(self, img): + if random.randint(0, 1) == 1: + return cv2.flip(img, self.flip_code) + else: + return img + + +class NormalizeImage(object): + """ normalize image such as substract mean, divide std + """ + + def __init__(self, scale=None, mean=None, std=None, order='chw'): + if isinstance(scale, str): scale = eval(scale) + self.scale = np.float32(scale if scale is not None else 1.0 / 255.0) + mean = mean if mean is not None else [0.485, 0.456, 0.406] + std = std if std is not None else [0.229, 0.224, 0.225] + + shape = (3, 1, 1) if order == 'chw' else (1, 1, 3) + self.mean = np.array(mean).reshape(shape).astype('float32') + self.std = np.array(std).reshape(shape).astype('float32') + + def __call__(self, img): + from PIL import Image + if isinstance(img, Image.Image): + img = np.array(img) + + assert isinstance(img, + np.ndarray), "invalid input 'img' in NormalizeImage" + return (img.astype('float32') * self.scale - self.mean) / self.std + + +class ToCHWImage(object): + """ convert hwc image to chw image + """ + + def __init__(self): + pass + + def __call__(self, img): + from PIL import Image + if isinstance(img, Image.Image): + img = np.array(img) + + return img.transpose((2, 0, 1)) diff --git a/ppcls/data/imaug/randaugment.py b/ppcls/data/imaug/randaugment.py new file mode 100644 index 000000000..03805c608 --- /dev/null +++ b/ppcls/data/imaug/randaugment.py @@ -0,0 +1,87 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#This code is based on https://github.com/ + +from PIL import Image, ImageEnhance, ImageOps +import numpy as np +import random + + +class RandAugment(object): + def __init__(self, num_layers, magnitude, fillcolor=(128, 128, 128)): + self.num_layers = num_layers + self.magnitude = magnitude + self.max_level = 10 + + abso_level = self.magnitude / self.max_level + self.level_map = { + "shearX": 0.3 * abso_level, + "shearY": 0.3 * abso_level, + "translateX": 150.0 / 331 * abso_level, + "translateY": 150.0 / 331 * abso_level, + "rotate": 30 * abso_level, + "color": 0.9 * abso_level, + "posterize": int(4.0 * abso_level), + "solarize": 256.0 * abso_level, + "contrast": 0.9 * abso_level, + "sharpness": 0.9 * abso_level, + "brightness": 0.9 * abso_level, + "autocontrast": 0, + "equalize": 0, + "invert": 0 + } + + # from https://stackoverflow.com/questions/5252170/specify-image-filling-color-when-rotating-in-python-with-pil-and-setting-expand + def rotate_with_fill(img, magnitude): + rot = img.convert("RGBA").rotate(magnitude) + return Image.composite(rot, + Image.new("RGBA", rot.size, (128, ) * 4), + rot).convert(img.mode) + + self.func = { + "shearX": lambda img, magnitude: img.transform( + img.size, Image.AFFINE, (1, magnitude * random.choice([-1, 1]), 0, 0, 1, 0), + Image.BICUBIC, fillcolor=fillcolor), + "shearY": lambda img, magnitude: img.transform( + img.size, Image.AFFINE, (1, 0, 0, magnitude * random.choice([-1, 1]), 1, 0), + Image.BICUBIC, fillcolor=fillcolor), + "translateX": lambda img, magnitude: img.transform( + img.size, Image.AFFINE, (1, 0, magnitude * img.size[0] * random.choice([-1, 1]), 0, 1, 0), + fillcolor=fillcolor), + "translateY": lambda img, magnitude: img.transform( + img.size, Image.AFFINE, (1, 0, 0, 0, 1, magnitude * img.size[1] * random.choice([-1, 1])), + fillcolor=fillcolor), + "rotate": lambda img, magnitude: rotate_with_fill(img, magnitude), + # "rotate": lambda img, magnitude: img.rotate(magnitude * random.choice([-1, 1])), + "color": lambda img, magnitude: ImageEnhance.Color(img).enhance(1 + magnitude * random.choice([-1, 1])), + "posterize": lambda img, magnitude: ImageOps.posterize(img, magnitude), + "solarize": lambda img, magnitude: ImageOps.solarize(img, magnitude), + "contrast": lambda img, magnitude: ImageEnhance.Contrast(img).enhance( + 1 + magnitude * random.choice([-1, 1])), + "sharpness": lambda img, magnitude: ImageEnhance.Sharpness(img).enhance( + 1 + magnitude * random.choice([-1, 1])), + "brightness": lambda img, magnitude: ImageEnhance.Brightness(img).enhance( + 1 + magnitude * random.choice([-1, 1])), + "autocontrast": lambda img, magnitude: ImageOps.autocontrast(img), + "equalize": lambda img, magnitude: ImageOps.equalize(img), + "invert": lambda img, magnitude: ImageOps.invert(img) + } + + def __call__(self, img): + avaiable_op_names = self.level_map.keys() + for layer_num in range(self.num_layers): + op_name = np.random.choice(avaiable_op_names) + img = self.func[op_name](img, self.level_map[op_name]) + return img diff --git a/ppcls/data/imaug/random_erasing.py b/ppcls/data/imaug/random_erasing.py new file mode 100644 index 000000000..76527535b --- /dev/null +++ b/ppcls/data/imaug/random_erasing.py @@ -0,0 +1,53 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +import random + +import numpy as np + + +class RandomErasing(object): + def __init__(self, EPSILON=0.5, sl=0.02, sh=0.4, r1=0.3, + mean=[0., 0., 0.]): + self.EPSILON = EPSILON + self.mean = mean + self.sl = sl + self.sh = sh + self.r1 = r1 + + def __call__(self, img): + if random.uniform(0, 1) > self.EPSILON: + return img + + for attempt in range(100): + area = img.shape[1] * img.shape[2] + + target_area = random.uniform(self.sl, self.sh) * area + aspect_ratio = random.uniform(self.r1, 1 / self.r1) + + h = int(round(math.sqrt(target_area * aspect_ratio))) + w = int(round(math.sqrt(target_area / aspect_ratio))) + + if w < img.shape[2] and h < img.shape[1]: + x1 = random.randint(0, img.shape[1] - h) + y1 = random.randint(0, img.shape[2] - w) + if img.shape[0] == 3: + img[0, x1:x1 + h, y1:y1 + w] = self.mean[0] + img[1, x1:x1 + h, y1:y1 + w] = self.mean[1] + img[2, x1:x1 + h, y1:y1 + w] = self.mean[2] + else: + img[0, x1:x1 + h, y1:y1 + w] = self.mean[1] + return img + return img diff --git a/ppcls/data/reader.py b/ppcls/data/reader.py new file mode 100755 index 000000000..6c3d39ef4 --- /dev/null +++ b/ppcls/data/reader.py @@ -0,0 +1,275 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import cv2 + +import numpy as np +import os +import signal + +import paddle + +import imaug +from imaug import transform +from imaug import MixupOperator +from ppcls.utils import logger + +trainers_num = int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) +trainer_id = int(os.environ.get("PADDLE_TRAINER_ID", 0)) + + +class ModeException(Exception): + """ + ModeException + """ + + def __init__(self, message='', mode=''): + message += "\nOnly the following 3 modes are supported: " \ + "train, valid, test. Given mode is {}".format(mode) + super(ModeException, self).__init__(message) + + +class SampleNumException(Exception): + """ + SampleNumException + """ + + def __init__(self, message='', sample_num=0, batch_size=1): + message += "\nError: The number of the whole data ({}) " \ + "is smaller than the batch_size ({}), and drop_last " \ + "is turnning on, so nothing will feed in program, " \ + "Terminated now. Please reset batch_size to a smaller " \ + "number or feed more data!".format(sample_num, batch_size) + super(SampleNumException, self).__init__(message) + + +class ShuffleSeedException(Exception): + """ + ShuffleSeedException + """ + + def __init__(self, message=''): + message += "\nIf trainers_num > 1, the shuffle_seed must be set, " \ + "because the order of batch data generated by reader " \ + "must be the same in the respective processes." + super(ShuffleSeedException, self).__init__(message) + + +def check_params(params): + """ + check params to avoid unexpect errors + + Args: + params(dict): + """ + if 'shuffle_seed' not in params: + params['shuffle_seed'] = None + + if trainers_num > 1 and params['shuffle_seed'] is None: + raise ShuffleSeedException() + + data_dir = params.get('data_dir', '') + assert os.path.isdir(data_dir), \ + "{} doesn't exist, please check datadir path".format(data_dir) + + if params['mode'] != 'test': + file_list = params.get('file_list', '') + assert os.path.isfile(file_list), \ + "{} doesn't exist, please check file list path".format(file_list) + + +def create_file_list(params): + """ + if mode is test, create the file list + + Args: + params(dict): + """ + data_dir = params.get('data_dir', '') + params['file_list'] = ".tmp.txt" + imgtype_list = {'jpg', 'bmp', 'png', 'jpeg', 'rgb', 'tif', 'tiff'} + with open(params['file_list'], "w") as fout: + tmp_file_list = os.listdir(data_dir) + for file_name in tmp_file_list: + file_path = os.path.join(data_dir, file_name) + if imghdr.what(file_path) not in imgtype_list: + continue + fout.write(file_name + " 0" + "\n") + + +def shuffle_lines(full_lines, seed=None): + """ + random shuffle lines + + Args: + full_lines(list): + seed(int): random seed + """ + if seed is not None: + np.random.RandomState(seed).shuffle(full_lines) + else: + np.random.shuffle(full_lines) + + return full_lines + + +def get_file_list(params): + """ + read label list from file and shuffle the list + + Args: + params(dict): + """ + if params['mode'] == 'test': + create_file_list(params) + + with open(params['file_list']) as flist: + full_lines = [line.strip() for line in flist] + + full_lines = shuffle_lines(full_lines, params["shuffle_seed"]) + + # use only partial data for each trainer in distributed training + full_lines = full_lines[trainer_id::trainers_num] + + return full_lines + + +def create_operators(params): + """ + create operators based on the config + + Args: + params(list): a dict list, used to create some operators + """ + assert isinstance(params, list), ('operator config should be a list') + ops = [] + for operator in params: + assert isinstance(operator, + dict) and len(operator) == 1, "yaml format error" + op_name = list(operator)[0] + param = {} if operator[op_name] is None else operator[op_name] + op = getattr(imaug, op_name)(**param) + ops.append(op) + + return ops + + +def partial_reader(params, full_lines, part_id=0, part_num=1): + """ + create a reader with partial data + + Args: + params(dict): + full_lines: label list + part_id(int): part index of the current partial data + part_num(int): part num of the dataset + """ + assert part_id < part_num, ("part_num: {} should be larger " \ + "than part_id: {}".format(part_num, part_id)) + + full_lines = full_lines[part_id::part_num] + + batch_size = int(params['batch_size']) // trainers_num + if params['mode'] != "test" and len(full_lines) < batch_size: + raise SampleNumException('', len(full_lines), batch_size) + + def reader(): + ops = create_operators(params['transforms']) + for line in full_lines: + img_path, label = line.split() + img_path = os.path.join(params['data_dir'], img_path) + img = open(img_path).read() + img = transform(img, ops) + yield (img, int(label)) + + return reader + + +def mp_reader(params): + """ + multiprocess reader + + Args: + params(dict): + """ + check_params(params) + + full_lines = get_file_list(params) + + part_num = 1 if 'num_workers' not in params else params['num_workers'] + + readers = [] + for part_id in range(part_num): + readers.append(partial_reader(params, full_lines, part_id, part_num)) + + return paddle.reader.multiprocess_reader(readers, use_pipe=False) + + +def term_mp(sig_num, frame): + """ kill all child processes + """ + pid = os.getpid() + pgid = os.getpgid(os.getpid()) + logger.info("main proc {} exit, kill process group " + "{}".format(pid, pgid)) + os.killpg(pgid, signal.SIGKILL) + + +class Reader: + """ + Create a reader for trainning/validate/test + + Args: + config(dict): arguments + mode(str): train or val or test + seed(int): random seed used to generate same sequence in each trainer + + Returns: + the specific reader + """ + + def __init__(self, config, mode='train', seed=None): + try: + self.params = config[mode.upper()] + except KeyError: + raise ModeException(mode=mode) + + use_mix = config.get('use_mix') + self.params['mode'] = mode + if seed is not None: + self.params['shuffle_seed'] = seed + self.batch_ops = [] + if use_mix and mode == "train": + self.batch_ops = create_operators(self.params['mix']) + + def __call__(self): + reader = mp_reader(self.params) + + batch_size = int(self.params['batch_size']) // trainers_num + + def wrapper(): + batch = [] + for idx, sample in enumerate(reader()): + img, label = sample + batch.append((img, label)) + if (idx + 1) % batch_size == 0: + batch = transform(batch, self.batch_ops) + yield batch + batch = [] + + return wrapper + + +signal.signal(signal.SIGINT, term_mp) +signal.signal(signal.SIGTERM, term_mp) diff --git a/ppcls/modeling/__init__.py b/ppcls/modeling/__init__.py new file mode 100644 index 000000000..e5badd50a --- /dev/null +++ b/ppcls/modeling/__init__.py @@ -0,0 +1,20 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from . import architectures +from . import loss + +from .architectures import * +from .loss import * +from .utils import similar_architectures diff --git a/ppcls/modeling/architectures/__init__.py b/ppcls/modeling/architectures/__init__.py new file mode 100644 index 000000000..f1ff7d2eb --- /dev/null +++ b/ppcls/modeling/architectures/__init__.py @@ -0,0 +1,44 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from .alexnet import AlexNet +from .mobilenet_v1 import MobileNetV1_x0_25, MobileNetV1_x0_5, MobileNetV1_x1_0, MobileNetV1_x0_75, MobileNetV1 +from .mobilenet_v2 import MobileNetV2_x0_25, MobileNetV2_x0_5, MobileNetV2_x0_75, MobileNetV2_x1_0, MobileNetV2_x1_5, MobileNetV2_x2_0, MobileNetV2 +from .mobilenet_v3 import MobileNetV3_small_x0_35, MobileNetV3_small_x0_5, MobileNetV3_small_x0_75, MobileNetV3_small_x1_0, MobileNetV3_small_x1_25, MobileNetV3_large_x0_35, MobileNetV3_large_x0_5, MobileNetV3_large_x0_75, MobileNetV3_large_x1_0, MobileNetV3_large_x1_25 +from .googlenet import GoogLeNet +from .vgg import VGG11, VGG13, VGG16, VGG19 +from .resnet import ResNet18, ResNet34, ResNet50, ResNet101, ResNet152 +from .resnet_vc import ResNet50_vc, ResNet101_vc, ResNet152_vc +from .resnet_vd import ResNet18_vd, ResNet34_vd, ResNet50_vd, ResNet101_vd, ResNet152_vd, ResNet200_vd +from .resnext import ResNeXt50_64x4d, ResNeXt101_64x4d, ResNeXt152_64x4d, ResNeXt50_32x4d, ResNeXt101_32x4d, ResNeXt152_32x4d +from .resnext_vd import ResNeXt50_vd_64x4d, ResNeXt101_vd_64x4d, ResNeXt152_vd_64x4d, ResNeXt50_vd_32x4d, ResNeXt101_vd_32x4d, ResNeXt152_vd_32x4d +from .inception_v4 import InceptionV4 +from .se_resnet_vd import SE_ResNet18_vd, SE_ResNet34_vd, SE_ResNet50_vd, SE_ResNet101_vd, SE_ResNet152_vd, SE_ResNet200_vd +from .se_resnext import SE_ResNeXt50_32x4d, SE_ResNeXt101_32x4d, SE_ResNeXt152_32x4d +from .se_resnext_vd import SE_ResNeXt50_vd_32x4d, SE_ResNeXt101_vd_32x4d, SENet154_vd +from .dpn import DPN68, DPN92, DPN98, DPN107, DPN131 +from .shufflenet_v2_swish import ShuffleNetV2_swish, ShuffleNetV2_x0_5_swish, ShuffleNetV2_x1_0_swish, ShuffleNetV2_x1_5_swish, ShuffleNetV2_x2_0_swish +from .shufflenet_v2 import ShuffleNetV2_x0_25, ShuffleNetV2_x0_33, ShuffleNetV2_x0_5, ShuffleNetV2_x1_0, ShuffleNetV2_x1_5, ShuffleNetV2_x2_0, ShuffleNetV2 +from .xception import Xception41, Xception65, Xception71 +from .xception_deeplab import Xception41_deeplab, Xception65_deeplab, Xception71_deeplab +from .densenet import DenseNet121, DenseNet161, DenseNet169, DenseNet201, DenseNet264 +from .squeezenet import SqueezeNet1_0, SqueezeNet1_1 +from .darknet import DarkNet53 +from .resnext101_wsl import ResNeXt101_32x8d_wsl, ResNeXt101_32x16d_wsl, ResNeXt101_32x32d_wsl, ResNeXt101_32x48d_wsl, Fix_ResNeXt101_32x48d_wsl +from .efficientnet import EfficientNet, EfficientNetB0, EfficientNetB1, EfficientNetB2, EfficientNetB3, EfficientNetB4, EfficientNetB5, EfficientNetB6, EfficientNetB7 +from .res2net import Res2Net50_48w_2s, Res2Net50_26w_4s, Res2Net50_14w_8s, Res2Net50_26w_6s, Res2Net50_26w_8s, Res2Net101_26w_4s, Res2Net152_26w_4s +from .res2net_vd import Res2Net50_vd_48w_2s, Res2Net50_vd_26w_4s, Res2Net50_vd_14w_8s, Res2Net50_vd_26w_6s, Res2Net50_vd_26w_8s, Res2Net101_vd_26w_4s, Res2Net152_vd_26w_4s, Res2Net200_vd_26w_4s +from .hrnet import HRNet_W18_C, HRNet_W30_C, HRNet_W32_C, HRNet_W40_C, HRNet_W44_C, HRNet_W48_C, HRNet_W60_C, HRNet_W64_C, SE_HRNet_W18_C, SE_HRNet_W30_C, SE_HRNet_W32_C, SE_HRNet_W40_C, SE_HRNet_W44_C, SE_HRNet_W48_C, SE_HRNet_W60_C, SE_HRNet_W64_C +from .darts_gs import DARTS_GS_6M, DARTS_GS_4M +from .resnet_acnet import ResNet18_ACNet, ResNet34_ACNet, ResNet50_ACNet, ResNet101_ACNet, ResNet152_ACNet diff --git a/ppcls/modeling/architectures/alexnet.py b/ppcls/modeling/architectures/alexnet.py new file mode 100644 index 000000000..36f7e8678 --- /dev/null +++ b/ppcls/modeling/architectures/alexnet.py @@ -0,0 +1,172 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math + +import paddle +import paddle.fluid as fluid + +__all__ = ['AlexNet'] + + +class AlexNet(): + def __init__(self): + pass + + def net(self, input, class_dim=1000): + stdv = 1.0 / math.sqrt(input.shape[1] * 11 * 11) + layer_name = [ + "conv1", "conv2", "conv3", "conv4", "conv5", "fc6", "fc7", "fc8" + ] + conv1 = fluid.layers.conv2d( + input=input, + num_filters=64, + filter_size=11, + stride=4, + padding=2, + groups=1, + act='relu', + bias_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=layer_name[0] + "_offset"), + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=layer_name[0] + "_weights")) + pool1 = fluid.layers.pool2d( + input=conv1, + pool_size=3, + pool_stride=2, + pool_padding=0, + pool_type='max') + + stdv = 1.0 / math.sqrt(pool1.shape[1] * 5 * 5) + conv2 = fluid.layers.conv2d( + input=pool1, + num_filters=192, + filter_size=5, + stride=1, + padding=2, + groups=1, + act='relu', + bias_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=layer_name[1] + "_offset"), + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=layer_name[1] + "_weights")) + pool2 = fluid.layers.pool2d( + input=conv2, + pool_size=3, + pool_stride=2, + pool_padding=0, + pool_type='max') + + stdv = 1.0 / math.sqrt(pool2.shape[1] * 3 * 3) + conv3 = fluid.layers.conv2d( + input=pool2, + num_filters=384, + filter_size=3, + stride=1, + padding=1, + groups=1, + act='relu', + bias_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=layer_name[2] + "_offset"), + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=layer_name[2] + "_weights")) + + stdv = 1.0 / math.sqrt(conv3.shape[1] * 3 * 3) + conv4 = fluid.layers.conv2d( + input=conv3, + num_filters=256, + filter_size=3, + stride=1, + padding=1, + groups=1, + act='relu', + bias_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=layer_name[3] + "_offset"), + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=layer_name[3] + "_weights")) + + stdv = 1.0 / math.sqrt(conv4.shape[1] * 3 * 3) + conv5 = fluid.layers.conv2d( + input=conv4, + num_filters=256, + filter_size=3, + stride=1, + padding=1, + groups=1, + act='relu', + bias_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=layer_name[4] + "_offset"), + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=layer_name[4] + "_weights")) + pool5 = fluid.layers.pool2d( + input=conv5, + pool_size=3, + pool_stride=2, + pool_padding=0, + pool_type='max') + + drop6 = fluid.layers.dropout(x=pool5, dropout_prob=0.5) + stdv = 1.0 / math.sqrt(drop6.shape[1] * drop6.shape[2] * + drop6.shape[3] * 1.0) + + fc6 = fluid.layers.fc( + input=drop6, + size=4096, + act='relu', + bias_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=layer_name[5] + "_offset"), + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=layer_name[5] + "_weights")) + + drop7 = fluid.layers.dropout(x=fc6, dropout_prob=0.5) + stdv = 1.0 / math.sqrt(drop7.shape[1] * 1.0) + + fc7 = fluid.layers.fc( + input=drop7, + size=4096, + act='relu', + bias_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=layer_name[6] + "_offset"), + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=layer_name[6] + "_weights")) + + stdv = 1.0 / math.sqrt(fc7.shape[1] * 1.0) + out = fluid.layers.fc( + input=fc7, + size=class_dim, + bias_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=layer_name[7] + "_offset"), + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=layer_name[7] + "_weights")) + return out diff --git a/ppcls/modeling/architectures/darknet.py b/ppcls/modeling/architectures/darknet.py new file mode 100644 index 000000000..b091e6ffa --- /dev/null +++ b/ppcls/modeling/architectures/darknet.py @@ -0,0 +1,120 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr +import math +__all__ = ["DarkNet53"] + + +class DarkNet53(): + def __init__(self): + + pass + + def net(self, input, class_dim=1000): + DarkNet_cfg = {53: ([1, 2, 8, 8, 4], self.basicblock)} + stages, block_func = DarkNet_cfg[53] + stages = stages[0:5] + conv1 = self.conv_bn_layer( + input, + ch_out=32, + filter_size=3, + stride=1, + padding=1, + name="yolo_input") + conv = self.downsample( + conv1, ch_out=conv1.shape[1] * 2, name="yolo_input.downsample") + + for i, stage in enumerate(stages): + conv = self.layer_warp( + block_func, + conv, + 32 * (2**i), + stage, + name="stage.{}".format(i)) + if i < len(stages) - 1: # do not downsaple in the last stage + conv = self.downsample( + conv, + ch_out=conv.shape[1] * 2, + name="stage.{}.downsample".format(i)) + pool = fluid.layers.pool2d( + input=conv, pool_type='avg', global_pooling=True) + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + out = fluid.layers.fc( + input=pool, + size=class_dim, + param_attr=ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name='fc_weights'), + bias_attr=ParamAttr(name='fc_offset')) + return out + + def conv_bn_layer(self, + input, + ch_out, + filter_size, + stride, + padding, + name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=ch_out, + filter_size=filter_size, + stride=stride, + padding=padding, + act=None, + param_attr=ParamAttr(name=name + ".conv.weights"), + bias_attr=False) + + bn_name = name + ".bn" + out = fluid.layers.batch_norm( + input=conv, + act='relu', + param_attr=ParamAttr(name=bn_name + '.scale'), + bias_attr=ParamAttr(name=bn_name + '.offset'), + moving_mean_name=bn_name + '.mean', + moving_variance_name=bn_name + '.var') + return out + + def downsample(self, + input, + ch_out, + filter_size=3, + stride=2, + padding=1, + name=None): + return self.conv_bn_layer( + input, + ch_out=ch_out, + filter_size=filter_size, + stride=stride, + padding=padding, + name=name) + + def basicblock(self, input, ch_out, name=None): + conv1 = self.conv_bn_layer(input, ch_out, 1, 1, 0, name=name + ".0") + conv2 = self.conv_bn_layer( + conv1, ch_out * 2, 3, 1, 1, name=name + ".1") + out = fluid.layers.elementwise_add(x=input, y=conv2, act=None) + return out + + def layer_warp(self, block_func, input, ch_out, count, name=None): + res_out = block_func(input, ch_out, name='{}.0'.format(name)) + for j in range(1, count): + res_out = block_func(res_out, ch_out, name='{}.{}'.format(name, j)) + return res_out diff --git a/ppcls/modeling/architectures/darts_gs.py b/ppcls/modeling/architectures/darts_gs.py new file mode 100644 index 000000000..ff5c9655d --- /dev/null +++ b/ppcls/modeling/architectures/darts_gs.py @@ -0,0 +1,543 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. +# +# Based on: +# -------------------------------------------------------- +# DARTS +# Copyright (c) 2018, Hanxiao Liu. +# Licensed under the Apache License, Version 2.0; +# -------------------------------------------------------- + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import os +import sys +import numpy as np +import time +import functools +import paddle +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.initializer import Xavier +from paddle.fluid.initializer import Normal +from paddle.fluid.initializer import Constant + +from collections import namedtuple +Genotype = namedtuple('Genotype', 'normal normal_concat reduce reduce_concat') + +arch_dict = { + 'DARTS_GS_6M': Genotype( + normal=[('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_5x5', 1), + ('sep_conv_5x5', 0), ('sep_conv_3x3', 2), ('sep_conv_3x3', 1), + ('skip_connect', 4), ('sep_conv_3x3', 3)], + normal_concat=range(2, 6), + reduce=[('sep_conv_5x5', 0), ('max_pool_3x3', 1), ('dil_conv_5x5', 2), + ('sep_conv_5x5', 0), ('sep_conv_3x3', 1), ('dil_conv_5x5', 3), + ('dil_conv_3x3', 1), ('sep_conv_3x3', 2)], + reduce_concat=range(2, 6)), + 'DARTS_GS_4M': Genotype( + normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), + ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('skip_connect', 0), + ('skip_connect', 0), ('dil_conv_3x3', 1)], + normal_concat=range(2, 6), + reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('max_pool_3x3', 0), + ('avg_pool_3x3', 1), ('skip_connect', 3), ('skip_connect', 2), + ('sep_conv_3x3', 0), ('sep_conv_5x5', 2)], + reduce_concat=range(2, 6)), +} + +__all__ = list(arch_dict.keys()) + +OPS = { + 'none' : lambda input, C, stride, name, affine: Zero(input, stride, name), + 'avg_pool_3x3' : lambda input, C, stride, name, affine: fluid.layers.pool2d(input, 3, 'avg', pool_stride=stride, pool_padding=1, name=name), + 'max_pool_3x3' : lambda input, C, stride, name, affine: fluid.layers.pool2d(input, 3, 'max', pool_stride=stride, pool_padding=1, name=name), + 'skip_connect' : lambda input,C, stride, name, affine: Identity(input, name) if stride == 1 else FactorizedReduce(input, C, name=name, affine=affine), + 'sep_conv_3x3' : lambda input,C, stride, name, affine: SepConv(input, C, C, 3, stride, 1, name=name, affine=affine), + 'sep_conv_5x5' : lambda input,C, stride, name, affine: SepConv(input, C, C, 5, stride, 2, name=name, affine=affine), + 'sep_conv_7x7' : lambda input,C, stride, name, affine: SepConv(input, C, C, 7, stride, 3, name=name, affine=affine), + 'dil_conv_3x3' : lambda input,C, stride, name, affine: DilConv(input, C, C, 3, stride, 2, 2, name=name, affine=affine), + 'dil_conv_5x5' : lambda input,C, stride, name, affine: DilConv(input, C, C, 5, stride, 4, 2, name=name, affine=affine), + 'conv_7x1_1x7' : lambda input,C, stride, name, affine: SevenConv(input, C, name=name, affine=affine) +} + + +def ReLUConvBN(input, + C_out, + kernel_size, + stride, + padding, + name='', + affine=True): + relu_a = fluid.layers.relu(input) + conv2d_a = fluid.layers.conv2d( + relu_a, C_out, kernel_size, stride, padding, bias_attr=False) + if affine: + reluconvbn_out = fluid.layers.batch_norm( + conv2d_a, + param_attr=ParamAttr( + initializer=Constant(1.), name=name + 'op.2.weight'), + bias_attr=ParamAttr( + initializer=Constant(0.), name=name + 'op.2.bias'), + moving_mean_name=name + 'op.2.running_mean', + moving_variance_name=name + 'op.2.running_var') + else: + reluconvbn_out = fluid.layers.batch_norm( + conv2d_a, + param_attr=ParamAttr( + initializer=Constant(1.), + learning_rate=0., + name=name + 'op.2.weight'), + bias_attr=ParamAttr( + initializer=Constant(0.), + learning_rate=0., + name=name + 'op.2.bias'), + moving_mean_name=name + 'op.2.running_mean', + moving_variance_name=name + 'op.2.running_var') + return reluconvbn_out + + +def DilConv(input, + C_in, + C_out, + kernel_size, + stride, + padding, + dilation, + name='', + affine=True): + relu_a = fluid.layers.relu(input) + conv2d_a = fluid.layers.conv2d( + relu_a, + C_in, + kernel_size, + stride, + padding, + dilation, + groups=C_in, + bias_attr=False, + use_cudnn=False) + conv2d_b = fluid.layers.conv2d(conv2d_a, C_out, 1, bias_attr=False) + if affine: + dilconv_out = fluid.layers.batch_norm( + conv2d_b, + param_attr=ParamAttr( + initializer=Constant(1.), name=name + 'op.3.weight'), + bias_attr=ParamAttr( + initializer=Constant(0.), name=name + 'op.3.bias'), + moving_mean_name=name + 'op.3.running_mean', + moving_variance_name=name + 'op.3.running_var') + else: + dilconv_out = fluid.layers.batch_norm( + conv2d_b, + param_attr=ParamAttr( + initializer=Constant(1.), + learning_rate=0., + name=name + 'op.3.weight'), + bias_attr=ParamAttr( + initializer=Constant(0.), + learning_rate=0., + name=name + 'op.3.bias'), + moving_mean_name=name + 'op.3.running_mean', + moving_variance_name=name + 'op.3.running_var') + return dilconv_out + + +def SepConv(input, + C_in, + C_out, + kernel_size, + stride, + padding, + name='', + affine=True): + relu_a = fluid.layers.relu(input) + conv2d_a = fluid.layers.conv2d( + relu_a, + C_in, + kernel_size, + stride, + padding, + groups=C_in, + bias_attr=False, + use_cudnn=False) + conv2d_b = fluid.layers.conv2d(conv2d_a, C_in, 1, bias_attr=False) + if affine: + bn_a = fluid.layers.batch_norm( + conv2d_b, + param_attr=ParamAttr( + initializer=Constant(1.), name=name + 'op.3.weight'), + bias_attr=ParamAttr( + initializer=Constant(0.), name=name + 'op.3.bias'), + moving_mean_name=name + 'op.3.running_mean', + moving_variance_name=name + 'op.3.running_var') + else: + bn_a = fluid.layers.batch_norm( + conv2d_b, + param_attr=ParamAttr( + initializer=Constant(1.), + learning_rate=0., + name=name + 'op.3.weight'), + bias_attr=ParamAttr( + initializer=Constant(0.), + learning_rate=0., + name=name + 'op.3.bias'), + moving_mean_name=name + 'op.3.running_mean', + moving_variance_name=name + 'op.3.running_var') + + relu_b = fluid.layers.relu(bn_a) + conv2d_d = fluid.layers.conv2d( + relu_b, + C_in, + kernel_size, + 1, + padding, + groups=C_in, + bias_attr=False, + use_cudnn=False) + conv2d_e = fluid.layers.conv2d(conv2d_d, C_out, 1, bias_attr=False) + if affine: + sepconv_out = fluid.layers.batch_norm( + conv2d_e, + param_attr=ParamAttr( + initializer=Constant(1.), name=name + 'op.7.weight'), + bias_attr=ParamAttr( + initializer=Constant(0.), name=name + 'op.7.bias'), + moving_mean_name=name + 'op.7.running_mean', + moving_variance_name=name + 'op.7.running_var') + else: + sepconv_out = fluid.layers.batch_norm( + conv2d_e, + param_attr=ParamAttr( + initializer=Constant(1.), + learning_rate=0., + name=name + 'op.7.weight'), + bias_attr=ParamAttr( + initializer=Constant(0.), + learning_rate=0., + name=name + 'op.7.bias'), + moving_mean_name=name + 'op.7.running_mean', + moving_variance_name=name + 'op.7.running_var') + return sepconv_out + + +def SevenConv(input, C_out, stride, name='', affine=True): + relu_a = fluid.layers.relu(input) + conv2d_a = fluid.layers.conv2d( + relu_a, + C_out, (1, 7), (1, stride), (0, 3), + param_attr=ParamAttr( + initializer=Xavier( + uniform=False, fan_in=0), + name=name + 'op.1.weight'), + bias_attr=False) + conv2d_b = fluid.layers.conv2d( + conv2d_a, + C_out, (7, 1), (stride, 1), (3, 0), + param_attr=ParamAttr( + initializer=Xavier( + uniform=False, fan_in=0), + name=name + 'op.2.weight'), + bias_attr=False) + if affine: + out = fluid.layers.batch_norm( + conv2d_b, + param_attr=ParamAttr( + initializer=Constant(1.), name=name + 'op.3.weight'), + bias_attr=ParamAttr( + initializer=Constant(0.), name=name + 'op.3.bias'), + moving_mean_name=name + 'op.3.running_mean', + moving_variance_name=name + 'op.3.running_var') + else: + out = fluid.layers.batch_norm( + conv2d_b, + param_attr=ParamAttr( + initializer=Constant(1.), + learning_rate=0., + name=name + 'op.3.weight'), + bias_attr=ParamAttr( + initializer=Constant(0.), + learning_rate=0., + name=name + 'op.3.bias'), + moving_mean_name=name + 'op.3.running_mean', + moving_variance_name=name + 'op.3.running_var') + + +def Identity(input, name=''): + return input + + +def Zero(input, stride, name=''): + ones = np.ones(input.shape[-2:]) + ones[::stride, ::stride] = 0 + ones = fluid.layers.assign(ones) + return input * ones + + +def FactorizedReduce(input, C_out, name='', affine=True): + relu_a = fluid.layers.relu(input) + conv2d_a = fluid.layers.conv2d( + relu_a, + C_out // 2, + 1, + 2, + param_attr=ParamAttr( + initializer=Xavier( + uniform=False, fan_in=0), + name=name + 'conv_1.weight'), + bias_attr=False) + h_end = relu_a.shape[2] + w_end = relu_a.shape[3] + slice_a = fluid.layers.slice(relu_a, [2, 3], [1, 1], [h_end, w_end]) + conv2d_b = fluid.layers.conv2d( + slice_a, + C_out // 2, + 1, + 2, + param_attr=ParamAttr( + initializer=Xavier( + uniform=False, fan_in=0), + name=name + 'conv_2.weight'), + bias_attr=False) + out = fluid.layers.concat([conv2d_a, conv2d_b], axis=1) + if affine: + out = fluid.layers.batch_norm( + out, + param_attr=ParamAttr( + initializer=Constant(1.), name=name + 'bn.weight'), + bias_attr=ParamAttr( + initializer=Constant(0.), name=name + 'bn.bias'), + moving_mean_name=name + 'bn.running_mean', + moving_variance_name=name + 'bn.running_var') + else: + out = fluid.layers.batch_norm( + out, + param_attr=ParamAttr( + initializer=Constant(1.), + learning_rate=0., + name=name + 'bn.weight'), + bias_attr=ParamAttr( + initializer=Constant(0.), + learning_rate=0., + name=name + 'bn.bias'), + moving_mean_name=name + 'bn.running_mean', + moving_variance_name=name + 'bn.running_var') + return out + + +class Cell(): + def __init__(self, genotype, C_prev_prev, C_prev, C, reduction, + reduction_prev): + + if reduction_prev: + self.preprocess0 = functools.partial(FactorizedReduce, C_out=C) + else: + self.preprocess0 = functools.partial( + ReLUConvBN, C_out=C, kernel_size=1, stride=1, padding=0) + self.preprocess1 = functools.partial( + ReLUConvBN, C_out=C, kernel_size=1, stride=1, padding=0) + if reduction: + op_names, indices = zip(*genotype.reduce) + concat = genotype.reduce_concat + else: + op_names, indices = zip(*genotype.normal) + concat = genotype.normal_concat + print(op_names, indices, concat, reduction) + self._compile(C, op_names, indices, concat, reduction) + + def _compile(self, C, op_names, indices, concat, reduction): + assert len(op_names) == len(indices) + self._steps = len(op_names) // 2 + self._concat = concat + self.multiplier = len(concat) + + self._ops = [] + for name, index in zip(op_names, indices): + stride = 2 if reduction and index < 2 else 1 + op = functools.partial(OPS[name], C=C, stride=stride, affine=True) + self._ops += [op] + self._indices = indices + + def forward(self, s0, s1, drop_prob, is_train, name): + self.training = is_train + preprocess0_name = name + 'preprocess0.' + preprocess1_name = name + 'preprocess1.' + s0 = self.preprocess0(s0, name=preprocess0_name) + s1 = self.preprocess1(s1, name=preprocess1_name) + out = [s0, s1] + for i in range(self._steps): + h1 = out[self._indices[2 * i]] + h2 = out[self._indices[2 * i + 1]] + op1 = self._ops[2 * i] + op2 = self._ops[2 * i + 1] + h3 = op1(h1, name=name + '_ops.' + str(2 * i) + '.') + h4 = op2(h2, name=name + '_ops.' + str(2 * i + 1) + '.') + if self.training and drop_prob > 0.: + if h3 != h1: + h3 = fluid.layers.dropout( + h3, + drop_prob, + dropout_implementation='upscale_in_train') + if h4 != h2: + h4 = fluid.layers.dropout( + h4, + drop_prob, + dropout_implementation='upscale_in_train') + s = h3 + h4 + out += [s] + return fluid.layers.concat([out[i] for i in self._concat], axis=1) + + +def AuxiliaryHeadImageNet(input, num_classes, aux_name='auxiliary_head'): + relu_a = fluid.layers.relu(input) + pool_a = fluid.layers.pool2d(relu_a, 5, 'avg', 2) + conv2d_a = fluid.layers.conv2d( + pool_a, 128, 1, name=aux_name + '.features.2', bias_attr=False) + bn_a_name = aux_name + '.features.3' + bn_a = fluid.layers.batch_norm( + conv2d_a, + act='relu', + name=bn_a_name, + param_attr=ParamAttr( + initializer=Constant(1.), name=bn_a_name + '.weight'), + bias_attr=ParamAttr( + initializer=Constant(0.), name=bn_a_name + '.bias'), + moving_mean_name=bn_a_name + '.running_mean', + moving_variance_name=bn_a_name + '.running_var') + conv2d_b = fluid.layers.conv2d( + bn_a, 768, 2, name=aux_name + '.features.5', bias_attr=False) + bn_b_name = aux_name + '.features.6' + bn_b = fluid.layers.batch_norm( + conv2d_b, + act='relu', + name=bn_b_name, + param_attr=ParamAttr( + initializer=Constant(1.), name=bn_b_name + '.weight'), + bias_attr=ParamAttr( + initializer=Constant(0.), name=bn_b_name + '.bias'), + moving_mean_name=bn_b_name + '.running_mean', + moving_variance_name=bn_b_name + '.running_var') + pool_b = fluid.layers.adaptive_pool2d(bn_b, (1, 1), "avg") + fc_name = aux_name + '.classifier' + fc = fluid.layers.fc(pool_b, + num_classes, + name=fc_name, + param_attr=ParamAttr( + initializer=Normal(scale=1e-3), + name=fc_name + '.weight'), + bias_attr=ParamAttr( + initializer=Constant(0.), name=fc_name + '.bias')) + return fc + + +def StemConv0(input, C_out): + conv_a = fluid.layers.conv2d( + input, C_out // 2, 3, stride=2, padding=1, bias_attr=False) + bn_a = fluid.layers.batch_norm( + conv_a, + act='relu', + param_attr=ParamAttr( + initializer=Constant(1.), name='stem0.1.weight'), + bias_attr=ParamAttr( + initializer=Constant(0.), name='stem0.1.bias'), + moving_mean_name='stem0.1.running_mean', + moving_variance_name='stem0.1.running_var') + + conv_b = fluid.layers.conv2d( + bn_a, C_out, 3, stride=2, padding=1, bias_attr=False) + bn_b = fluid.layers.batch_norm( + conv_b, + param_attr=ParamAttr( + initializer=Constant(1.), name='stem0.3.weight'), + bias_attr=ParamAttr( + initializer=Constant(0.), name='stem0.3.bias'), + moving_mean_name='stem0.3.running_mean', + moving_variance_name='stem0.3.running_var') + return bn_b + + +def StemConv1(input, C_out): + relu_a = fluid.layers.relu(input) + conv_a = fluid.layers.conv2d( + relu_a, C_out, 3, stride=2, padding=1, bias_attr=False) + bn_a = fluid.layers.batch_norm( + conv_a, + param_attr=ParamAttr( + initializer=Constant(1.), name='stem1.1.weight'), + bias_attr=ParamAttr( + initializer=Constant(0.), name='stem1.1.bias'), + moving_mean_name='stem1.1.running_mean', + moving_variance_name='stem1.1.running_var') + return bn_a + + +class NetworkImageNet(object): + def __init__(self, arch='DARTS_6M'): + self.class_num = 1000 + self.init_channel = 48 + self._layers = 14 + self._auxiliary = False + self.drop_path_prob = 0 + genotype = arch_dict[arch] + + C = self.init_channel + layers = self._layers + C_prev_prev, C_prev, C_curr = C, C, C + self.cells = [] + reduction_prev = True + for i in range(layers): + if i in [layers // 3, 2 * layers // 3]: + C_curr *= 2 + reduction = True + else: + reduction = False + cell = Cell(genotype, C_prev_prev, C_prev, C_curr, reduction, + reduction_prev) + reduction_prev = reduction + self.cells += [cell] + C_prev_prev, C_prev = C_prev, cell.multiplier * C_curr + if i == 2 * layers // 3: + C_to_auxiliary = C_prev + + def net(self, input, class_dim=1000, is_train=True): + self.logits_aux = None + num_channel = self.init_channel + s0 = StemConv0(input, num_channel) + s1 = StemConv1(s0, num_channel) + for i, cell in enumerate(self.cells): + name = 'cells.' + str(i) + '.' + s0, s1 = s1, cell.forward(s0, s1, self.drop_path_prob, is_train, + name) + if i == int(2 * self._layers // 3): + if self._auxiliary and is_train: + self.logits_aux = AuxiliaryHeadImageNet(s1, self.class_num) + out = fluid.layers.adaptive_pool2d(s1, (1, 1), "avg") + self.logits = fluid.layers.fc(out, + size=self.class_num, + param_attr=ParamAttr( + initializer=Normal(scale=1e-4), + name='classifier.weight'), + bias_attr=ParamAttr( + initializer=Constant(0.), + name='classifier.bias')) + return self.logits + + +def DARTS_GS_6M(): + return NetworkImageNet(arch='DARTS_GS_6M') + + +def DARTS_GS_4M(): + return NetworkImageNet(arch='DARTS_GS_4M') diff --git a/ppcls/modeling/architectures/densenet.py b/ppcls/modeling/architectures/densenet.py new file mode 100644 index 000000000..e8ba3818f --- /dev/null +++ b/ppcls/modeling/architectures/densenet.py @@ -0,0 +1,204 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math + +import paddle +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr + +__all__ = [ + "DenseNet", "DenseNet121", "DenseNet161", "DenseNet169", "DenseNet201", + "DenseNet264" +] + + +class DenseNet(): + def __init__(self, layers=121): + self.layers = layers + + def net(self, input, bn_size=4, dropout=0, class_dim=1000): + layers = self.layers + supported_layers = [121, 161, 169, 201, 264] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format(supported_layers, layers) + densenet_spec = { + 121: (64, 32, [6, 12, 24, 16]), + 161: (96, 48, [6, 12, 36, 24]), + 169: (64, 32, [6, 12, 32, 32]), + 201: (64, 32, [6, 12, 48, 32]), + 264: (64, 32, [6, 12, 64, 48]) + } + + num_init_features, growth_rate, block_config = densenet_spec[layers] + conv = fluid.layers.conv2d( + input=input, + num_filters=num_init_features, + filter_size=7, + stride=2, + padding=3, + act=None, + param_attr=ParamAttr(name="conv1_weights"), + bias_attr=False) + conv = fluid.layers.batch_norm( + input=conv, + act='relu', + param_attr=ParamAttr(name='conv1_bn_scale'), + bias_attr=ParamAttr(name='conv1_bn_offset'), + moving_mean_name='conv1_bn_mean', + moving_variance_name='conv1_bn_variance') + conv = fluid.layers.pool2d( + input=conv, + pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') + num_features = num_init_features + for i, num_layers in enumerate(block_config): + conv = self.make_dense_block( + conv, + num_layers, + bn_size, + growth_rate, + dropout, + name='conv' + str(i + 2)) + num_features = num_features + num_layers * growth_rate + if i != len(block_config) - 1: + conv = self.make_transition( + conv, num_features // 2, name='conv' + str(i + 2) + '_blk') + num_features = num_features // 2 + conv = fluid.layers.batch_norm( + input=conv, + act='relu', + param_attr=ParamAttr(name='conv5_blk_bn_scale'), + bias_attr=ParamAttr(name='conv5_blk_bn_offset'), + moving_mean_name='conv5_blk_bn_mean', + moving_variance_name='conv5_blk_bn_variance') + conv = fluid.layers.pool2d( + input=conv, pool_type='avg', global_pooling=True) + stdv = 1.0 / math.sqrt(conv.shape[1] * 1.0) + out = fluid.layers.fc( + input=conv, + size=class_dim, + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name="fc_weights"), + bias_attr=ParamAttr(name='fc_offset')) + return out + + def make_transition(self, input, num_output_features, name=None): + bn_ac = fluid.layers.batch_norm( + input, + act='relu', + param_attr=ParamAttr(name=name + '_bn_scale'), + bias_attr=ParamAttr(name + '_bn_offset'), + moving_mean_name=name + '_bn_mean', + moving_variance_name=name + '_bn_variance') + + bn_ac_conv = fluid.layers.conv2d( + input=bn_ac, + num_filters=num_output_features, + filter_size=1, + stride=1, + act=None, + bias_attr=False, + param_attr=ParamAttr(name=name + "_weights")) + pool = fluid.layers.pool2d( + input=bn_ac_conv, pool_size=2, pool_stride=2, pool_type='avg') + return pool + + def make_dense_block(self, + input, + num_layers, + bn_size, + growth_rate, + dropout, + name=None): + conv = input + for layer in range(num_layers): + conv = self.make_dense_layer( + conv, + growth_rate, + bn_size, + dropout, + name=name + '_' + str(layer + 1)) + return conv + + def make_dense_layer(self, input, growth_rate, bn_size, dropout, + name=None): + bn_ac = fluid.layers.batch_norm( + input, + act='relu', + param_attr=ParamAttr(name=name + '_x1_bn_scale'), + bias_attr=ParamAttr(name + '_x1_bn_offset'), + moving_mean_name=name + '_x1_bn_mean', + moving_variance_name=name + '_x1_bn_variance') + bn_ac_conv = fluid.layers.conv2d( + input=bn_ac, + num_filters=bn_size * growth_rate, + filter_size=1, + stride=1, + act=None, + bias_attr=False, + param_attr=ParamAttr(name=name + "_x1_weights")) + bn_ac = fluid.layers.batch_norm( + bn_ac_conv, + act='relu', + param_attr=ParamAttr(name=name + '_x2_bn_scale'), + bias_attr=ParamAttr(name + '_x2_bn_offset'), + moving_mean_name=name + '_x2_bn_mean', + moving_variance_name=name + '_x2_bn_variance') + bn_ac_conv = fluid.layers.conv2d( + input=bn_ac, + num_filters=growth_rate, + filter_size=3, + stride=1, + padding=1, + act=None, + bias_attr=False, + param_attr=ParamAttr(name=name + "_x2_weights")) + if dropout: + bn_ac_conv = fluid.layers.dropout( + x=bn_ac_conv, dropout_prob=dropout) + bn_ac_conv = fluid.layers.concat([input, bn_ac_conv], axis=1) + return bn_ac_conv + + +def DenseNet121(): + model = DenseNet(layers=121) + return model + + +def DenseNet161(): + model = DenseNet(layers=161) + return model + + +def DenseNet169(): + model = DenseNet(layers=169) + return model + + +def DenseNet201(): + model = DenseNet(layers=201) + return model + + +def DenseNet264(): + model = DenseNet(layers=264) + return model diff --git a/ppcls/modeling/architectures/dpn.py b/ppcls/modeling/architectures/dpn.py new file mode 100644 index 000000000..61f8f596a --- /dev/null +++ b/ppcls/modeling/architectures/dpn.py @@ -0,0 +1,337 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import numpy as np +import time +import sys +import math + +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr + +__all__ = ["DPN", "DPN68", "DPN92", "DPN98", "DPN107", "DPN131"] + + +class DPN(object): + def __init__(self, layers=68): + self.layers = layers + + def net(self, input, class_dim=1000): + # get network args + args = self.get_net_args(self.layers) + bws = args['bw'] + inc_sec = args['inc_sec'] + rs = args['r'] + k_r = args['k_r'] + k_sec = args['k_sec'] + G = args['G'] + init_num_filter = args['init_num_filter'] + init_filter_size = args['init_filter_size'] + init_padding = args['init_padding'] + + ## define Dual Path Network + + # conv1 + conv1_x_1 = fluid.layers.conv2d( + input=input, + num_filters=init_num_filter, + filter_size=init_filter_size, + stride=2, + padding=init_padding, + groups=1, + act=None, + bias_attr=False, + name="conv1", + param_attr=ParamAttr(name="conv1_weights"), ) + + conv1_x_1 = fluid.layers.batch_norm( + input=conv1_x_1, + act='relu', + is_test=False, + name="conv1_bn", + param_attr=ParamAttr(name='conv1_bn_scale'), + bias_attr=ParamAttr('conv1_bn_offset'), + moving_mean_name='conv1_bn_mean', + moving_variance_name='conv1_bn_variance', ) + + convX_x_x = fluid.layers.pool2d( + input=conv1_x_1, + pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max', + name="pool1") + + #conv2 - conv5 + match_list, num = [], 0 + for gc in range(4): + bw = bws[gc] + inc = inc_sec[gc] + R = (k_r * bw) // rs[gc] + if gc == 0: + _type1 = 'proj' + _type2 = 'normal' + match = 1 + else: + _type1 = 'down' + _type2 = 'normal' + match = match + k_sec[gc - 1] + match_list.append(match) + + convX_x_x = self.dual_path_factory( + convX_x_x, R, R, bw, inc, G, _type1, name="dpn" + str(match)) + for i_ly in range(2, k_sec[gc] + 1): + num += 1 + if num in match_list: + num += 1 + convX_x_x = self.dual_path_factory( + convX_x_x, R, R, bw, inc, G, _type2, name="dpn" + str(num)) + + conv5_x_x = fluid.layers.concat(convX_x_x, axis=1) + conv5_x_x = fluid.layers.batch_norm( + input=conv5_x_x, + act='relu', + is_test=False, + name="final_concat_bn", + param_attr=ParamAttr(name='final_concat_bn_scale'), + bias_attr=ParamAttr('final_concat_bn_offset'), + moving_mean_name='final_concat_bn_mean', + moving_variance_name='final_concat_bn_variance', ) + pool5 = fluid.layers.pool2d( + input=conv5_x_x, + pool_size=7, + pool_stride=1, + pool_padding=0, + pool_type='avg', ) + + stdv = 0.01 + fc6 = fluid.layers.fc( + input=pool5, + size=class_dim, + param_attr=ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name='fc_weights'), + bias_attr=ParamAttr(name='fc_offset')) + + return fc6 + + def get_net_args(self, layers): + if layers == 68: + k_r = 128 + G = 32 + k_sec = [3, 4, 12, 3] + inc_sec = [16, 32, 32, 64] + bw = [64, 128, 256, 512] + r = [64, 64, 64, 64] + init_num_filter = 10 + init_filter_size = 3 + init_padding = 1 + elif layers == 92: + k_r = 96 + G = 32 + k_sec = [3, 4, 20, 3] + inc_sec = [16, 32, 24, 128] + bw = [256, 512, 1024, 2048] + r = [256, 256, 256, 256] + init_num_filter = 64 + init_filter_size = 7 + init_padding = 3 + elif layers == 98: + k_r = 160 + G = 40 + k_sec = [3, 6, 20, 3] + inc_sec = [16, 32, 32, 128] + bw = [256, 512, 1024, 2048] + r = [256, 256, 256, 256] + init_num_filter = 96 + init_filter_size = 7 + init_padding = 3 + elif layers == 107: + k_r = 200 + G = 50 + k_sec = [4, 8, 20, 3] + inc_sec = [20, 64, 64, 128] + bw = [256, 512, 1024, 2048] + r = [256, 256, 256, 256] + init_num_filter = 128 + init_filter_size = 7 + init_padding = 3 + elif layers == 131: + k_r = 160 + G = 40 + k_sec = [4, 8, 28, 3] + inc_sec = [16, 32, 32, 128] + bw = [256, 512, 1024, 2048] + r = [256, 256, 256, 256] + init_num_filter = 128 + init_filter_size = 7 + init_padding = 3 + else: + raise NotImplementedError + net_arg = { + 'k_r': k_r, + 'G': G, + 'k_sec': k_sec, + 'inc_sec': inc_sec, + 'bw': bw, + 'r': r + } + net_arg['init_num_filter'] = init_num_filter + net_arg['init_filter_size'] = init_filter_size + net_arg['init_padding'] = init_padding + + return net_arg + + def dual_path_factory(self, + data, + num_1x1_a, + num_3x3_b, + num_1x1_c, + inc, + G, + _type='normal', + name=None): + kw = 3 + kh = 3 + pw = (kw - 1) // 2 + ph = (kh - 1) // 2 + + # type + if _type is 'proj': + key_stride = 1 + has_proj = True + if _type is 'down': + key_stride = 2 + has_proj = True + if _type is 'normal': + key_stride = 1 + has_proj = False + + # PROJ + if type(data) is list: + data_in = fluid.layers.concat([data[0], data[1]], axis=1) + else: + data_in = data + + if has_proj: + c1x1_w = self.bn_ac_conv( + data=data_in, + num_filter=(num_1x1_c + 2 * inc), + kernel=(1, 1), + pad=(0, 0), + stride=(key_stride, key_stride), + name=name + "_match") + data_o1, data_o2 = fluid.layers.split( + c1x1_w, + num_or_sections=[num_1x1_c, 2 * inc], + dim=1, + name=name + "_match_conv_Slice") + else: + data_o1 = data[0] + data_o2 = data[1] + + # MAIN + c1x1_a = self.bn_ac_conv( + data=data_in, + num_filter=num_1x1_a, + kernel=(1, 1), + pad=(0, 0), + name=name + "_conv1") + c3x3_b = self.bn_ac_conv( + data=c1x1_a, + num_filter=num_3x3_b, + kernel=(kw, kh), + pad=(pw, ph), + stride=(key_stride, key_stride), + num_group=G, + name=name + "_conv2") + c1x1_c = self.bn_ac_conv( + data=c3x3_b, + num_filter=(num_1x1_c + inc), + kernel=(1, 1), + pad=(0, 0), + name=name + "_conv3") + + c1x1_c1, c1x1_c2 = fluid.layers.split( + c1x1_c, + num_or_sections=[num_1x1_c, inc], + dim=1, + name=name + "_conv3_Slice") + + # OUTPUTS + summ = fluid.layers.elementwise_add( + x=data_o1, y=c1x1_c1, name=name + "_elewise") + dense = fluid.layers.concat( + [data_o2, c1x1_c2], axis=1, name=name + "_concat") + + return [summ, dense] + + def bn_ac_conv(self, + data, + num_filter, + kernel, + pad, + stride=(1, 1), + num_group=1, + name=None): + bn_ac = fluid.layers.batch_norm( + input=data, + act='relu', + is_test=False, + name=name + '.output.1', + param_attr=ParamAttr(name=name + '_bn_scale'), + bias_attr=ParamAttr(name + '_bn_offset'), + moving_mean_name=name + '_bn_mean', + moving_variance_name=name + '_bn_variance', ) + bn_ac_conv = fluid.layers.conv2d( + input=bn_ac, + num_filters=num_filter, + filter_size=kernel, + stride=stride, + padding=pad, + groups=num_group, + act=None, + bias_attr=False, + param_attr=ParamAttr(name=name + "_weights")) + return bn_ac_conv + + +def DPN68(): + model = DPN(layers=68) + return model + + +def DPN92(): + model = DPN(layers=92) + return model + + +def DPN98(): + model = DPN(layers=98) + return model + + +def DPN107(): + model = DPN(layers=107) + return model + + +def DPN131(): + model = DPN(layers=131) + return model diff --git a/ppcls/modeling/architectures/efficientnet.py b/ppcls/modeling/architectures/efficientnet.py new file mode 100644 index 000000000..082a14226 --- /dev/null +++ b/ppcls/modeling/architectures/efficientnet.py @@ -0,0 +1,616 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections +import re +import math +import copy + +import paddle.fluid as fluid + +from .layers import conv2d, init_batch_norm_layer, init_fc_layer + +__all__ = [ + 'EfficientNet', 'EfficientNetB0', 'EfficientNetB1', 'EfficientNetB2', + 'EfficientNetB3', 'EfficientNetB4', 'EfficientNetB5', 'EfficientNetB6', + 'EfficientNetB7' +] + +GlobalParams = collections.namedtuple('GlobalParams', [ + 'batch_norm_momentum', + 'batch_norm_epsilon', + 'dropout_rate', + 'num_classes', + 'width_coefficient', + 'depth_coefficient', + 'depth_divisor', + 'min_depth', + 'drop_connect_rate', +]) + +BlockArgs = collections.namedtuple('BlockArgs', [ + 'kernel_size', 'num_repeat', 'input_filters', 'output_filters', + 'expand_ratio', 'id_skip', 'stride', 'se_ratio' +]) + +GlobalParams.__new__.__defaults__ = (None, ) * len(GlobalParams._fields) +BlockArgs.__new__.__defaults__ = (None, ) * len(BlockArgs._fields) + + +def efficientnet_params(model_name): + """ Map EfficientNet model name to parameter coefficients. """ + params_dict = { + # Coefficients: width,depth,resolution,dropout + 'efficientnet-b0': (1.0, 1.0, 224, 0.2), + 'efficientnet-b1': (1.0, 1.1, 240, 0.2), + 'efficientnet-b2': (1.1, 1.2, 260, 0.3), + 'efficientnet-b3': (1.2, 1.4, 300, 0.3), + 'efficientnet-b4': (1.4, 1.8, 380, 0.4), + 'efficientnet-b5': (1.6, 2.2, 456, 0.4), + 'efficientnet-b6': (1.8, 2.6, 528, 0.5), + 'efficientnet-b7': (2.0, 3.1, 600, 0.5), + } + return params_dict[model_name] + + +def efficientnet(width_coefficient=None, + depth_coefficient=None, + dropout_rate=0.2, + drop_connect_rate=0.2): + """ Get block arguments according to parameter and coefficients. """ + blocks_args = [ + 'r1_k3_s11_e1_i32_o16_se0.25', + 'r2_k3_s22_e6_i16_o24_se0.25', + 'r2_k5_s22_e6_i24_o40_se0.25', + 'r3_k3_s22_e6_i40_o80_se0.25', + 'r3_k5_s11_e6_i80_o112_se0.25', + 'r4_k5_s22_e6_i112_o192_se0.25', + 'r1_k3_s11_e6_i192_o320_se0.25', + ] + blocks_args = BlockDecoder.decode(blocks_args) + + global_params = GlobalParams( + batch_norm_momentum=0.99, + batch_norm_epsilon=1e-3, + dropout_rate=dropout_rate, + drop_connect_rate=drop_connect_rate, + num_classes=1000, + width_coefficient=width_coefficient, + depth_coefficient=depth_coefficient, + depth_divisor=8, + min_depth=None) + + return blocks_args, global_params + + +def get_model_params(model_name, override_params): + """ Get the block args and global params for a given model """ + if model_name.startswith('efficientnet'): + w, d, _, p = efficientnet_params(model_name) + blocks_args, global_params = efficientnet( + width_coefficient=w, depth_coefficient=d, dropout_rate=p) + else: + raise NotImplementedError('model name is not pre-defined: %s' % + model_name) + if override_params: + global_params = global_params._replace(**override_params) + return blocks_args, global_params + + +def round_filters(filters, global_params): + """ Calculate and round number of filters based on depth multiplier. """ + multiplier = global_params.width_coefficient + if not multiplier: + return filters + divisor = global_params.depth_divisor + min_depth = global_params.min_depth + filters *= multiplier + min_depth = min_depth or divisor + new_filters = max(min_depth, + int(filters + divisor / 2) // divisor * divisor) + if new_filters < 0.9 * filters: # prevent rounding by more than 10% + new_filters += divisor + return int(new_filters) + + +def round_repeats(repeats, global_params): + """ Round number of filters based on depth multiplier. """ + multiplier = global_params.depth_coefficient + if not multiplier: + return repeats + return int(math.ceil(multiplier * repeats)) + + +class EfficientNet(): + def __init__(self, + name='b0', + padding_type='SAME', + override_params=None, + is_test=False, + use_se=True): + valid_names = ['b' + str(i) for i in range(8)] + assert name in valid_names, 'efficient name should be in b0~b7' + model_name = 'efficientnet-' + name + self._blocks_args, self._global_params = get_model_params( + model_name, override_params) + self._bn_mom = self._global_params.batch_norm_momentum + self._bn_eps = self._global_params.batch_norm_epsilon + self.is_test = is_test + self.padding_type = padding_type + self.use_se = use_se + + def net(self, input, class_dim=1000, is_test=False): + + conv = self.extract_features(input, is_test=is_test) + + out_channels = round_filters(1280, self._global_params) + conv = self.conv_bn_layer( + conv, + num_filters=out_channels, + filter_size=1, + bn_act='swish', + bn_mom=self._bn_mom, + bn_eps=self._bn_eps, + padding_type=self.padding_type, + name='', + conv_name='_conv_head', + bn_name='_bn1') + + pool = fluid.layers.pool2d( + input=conv, pool_type='avg', global_pooling=True, use_cudnn=False) + + if self._global_params.dropout_rate: + pool = fluid.layers.dropout( + pool, + self._global_params.dropout_rate, + dropout_implementation='upscale_in_train') + + param_attr, bias_attr = init_fc_layer(class_dim, '_fc') + out = fluid.layers.fc(pool, + class_dim, + name='_fc', + param_attr=param_attr, + bias_attr=bias_attr) + return out + + def _drop_connect(self, inputs, prob, is_test): + if is_test: + return inputs + keep_prob = 1.0 - prob + random_tensor = keep_prob + fluid.layers.uniform_random_batch_size_like( + inputs, [-1, 1, 1, 1], min=0., max=1.) + binary_tensor = fluid.layers.floor(random_tensor) + output = inputs / keep_prob * binary_tensor + return output + + def _expand_conv_norm(self, inputs, block_args, is_test, name=None): + # Expansion phase + oup = block_args.input_filters * block_args.expand_ratio # number of output channels + + if block_args.expand_ratio != 1: + conv = self.conv_bn_layer( + inputs, + num_filters=oup, + filter_size=1, + bn_act=None, + bn_mom=self._bn_mom, + bn_eps=self._bn_eps, + padding_type=self.padding_type, + name=name, + conv_name=name + '_expand_conv', + bn_name='_bn0') + + return conv + + def _depthwise_conv_norm(self, inputs, block_args, is_test, name=None): + k = block_args.kernel_size + s = block_args.stride + if isinstance(s, list) or isinstance(s, tuple): + s = s[0] + oup = block_args.input_filters * block_args.expand_ratio # number of output channels + + conv = self.conv_bn_layer( + inputs, + num_filters=oup, + filter_size=k, + stride=s, + num_groups=oup, + bn_act=None, + padding_type=self.padding_type, + bn_mom=self._bn_mom, + bn_eps=self._bn_eps, + name=name, + use_cudnn=False, + conv_name=name + '_depthwise_conv', + bn_name='_bn1') + + return conv + + def _project_conv_norm(self, inputs, block_args, is_test, name=None): + final_oup = block_args.output_filters + conv = self.conv_bn_layer( + inputs, + num_filters=final_oup, + filter_size=1, + bn_act=None, + padding_type=self.padding_type, + bn_mom=self._bn_mom, + bn_eps=self._bn_eps, + name=name, + conv_name=name + '_project_conv', + bn_name='_bn2') + return conv + + def conv_bn_layer(self, + input, + filter_size, + num_filters, + stride=1, + num_groups=1, + padding_type="SAME", + conv_act=None, + bn_act='swish', + use_cudnn=True, + use_bn=True, + bn_mom=0.9, + bn_eps=1e-05, + use_bias=False, + name=None, + conv_name=None, + bn_name=None): + conv = conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + groups=num_groups, + act=conv_act, + padding_type=padding_type, + use_cudnn=use_cudnn, + name=conv_name, + use_bias=use_bias) + + if use_bn == False: + return conv + else: + bn_name = name + bn_name + param_attr, bias_attr = init_batch_norm_layer(bn_name) + return fluid.layers.batch_norm( + input=conv, + act=bn_act, + momentum=bn_mom, + epsilon=bn_eps, + name=bn_name, + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance', + param_attr=param_attr, + bias_attr=bias_attr) + + def _conv_stem_norm(self, inputs, is_test): + out_channels = round_filters(32, self._global_params) + bn = self.conv_bn_layer( + inputs, + num_filters=out_channels, + filter_size=3, + stride=2, + bn_act=None, + bn_mom=self._bn_mom, + padding_type=self.padding_type, + bn_eps=self._bn_eps, + name='', + conv_name='_conv_stem', + bn_name='_bn0') + + return bn + + def mb_conv_block(self, + inputs, + block_args, + is_test=False, + drop_connect_rate=None, + name=None): + # Expansion and Depthwise Convolution + oup = block_args.input_filters * block_args.expand_ratio # number of output channels + has_se = self.use_se and (block_args.se_ratio is not None) and ( + 0 < block_args.se_ratio <= 1) + id_skip = block_args.id_skip # skip connection and drop connect + conv = inputs + if block_args.expand_ratio != 1: + conv = fluid.layers.swish( + self._expand_conv_norm(conv, block_args, is_test, name)) + + conv = fluid.layers.swish( + self._depthwise_conv_norm(conv, block_args, is_test, name)) + + # Squeeze and Excitation + if has_se: + num_squeezed_channels = max( + 1, int(block_args.input_filters * block_args.se_ratio)) + conv = self.se_block(conv, num_squeezed_channels, oup, name) + + conv = self._project_conv_norm(conv, block_args, is_test, name) + + # Skip connection and drop connect + input_filters, output_filters = block_args.input_filters, block_args.output_filters + if id_skip and block_args.stride == 1 and input_filters == output_filters: + if drop_connect_rate: + conv = self._drop_connect(conv, drop_connect_rate, + self.is_test) + conv = fluid.layers.elementwise_add(conv, inputs) + + return conv + + def se_block(self, inputs, num_squeezed_channels, oup, name): + x_squeezed = fluid.layers.pool2d( + input=inputs, + pool_type='avg', + global_pooling=True, + use_cudnn=False) + x_squeezed = conv2d( + x_squeezed, + num_filters=num_squeezed_channels, + filter_size=1, + use_bias=True, + padding_type=self.padding_type, + act='swish', + name=name + '_se_reduce') + x_squeezed = conv2d( + x_squeezed, + num_filters=oup, + filter_size=1, + use_bias=True, + padding_type=self.padding_type, + name=name + '_se_expand') + se_out = inputs * fluid.layers.sigmoid(x_squeezed) + return se_out + + def extract_features(self, inputs, is_test): + """ Returns output of the final convolution layer """ + + conv = fluid.layers.swish( + self._conv_stem_norm( + inputs, is_test=is_test)) + + block_args_copy = copy.deepcopy(self._blocks_args) + idx = 0 + block_size = 0 + for block_arg in block_args_copy: + block_arg = block_arg._replace( + input_filters=round_filters(block_arg.input_filters, + self._global_params), + output_filters=round_filters(block_arg.output_filters, + self._global_params), + num_repeat=round_repeats(block_arg.num_repeat, + self._global_params)) + block_size += 1 + for _ in range(block_arg.num_repeat - 1): + block_size += 1 + + for block_args in self._blocks_args: + + # Update block input and output filters based on depth multiplier. + block_args = block_args._replace( + input_filters=round_filters(block_args.input_filters, + self._global_params), + output_filters=round_filters(block_args.output_filters, + self._global_params), + num_repeat=round_repeats(block_args.num_repeat, + self._global_params)) + + # The first block needs to take care of stride and filter size increase. + drop_connect_rate = self._global_params.drop_connect_rate + if drop_connect_rate: + drop_connect_rate *= float(idx) / block_size + conv = self.mb_conv_block(conv, block_args, is_test, + drop_connect_rate, + '_blocks.' + str(idx) + '.') + + idx += 1 + if block_args.num_repeat > 1: + block_args = block_args._replace( + input_filters=block_args.output_filters, stride=1) + for _ in range(block_args.num_repeat - 1): + drop_connect_rate = self._global_params.drop_connect_rate + if drop_connect_rate: + drop_connect_rate *= float(idx) / block_size + conv = self.mb_conv_block(conv, block_args, is_test, + drop_connect_rate, + '_blocks.' + str(idx) + '.') + idx += 1 + + return conv + + def shortcut(self, input, data_residual): + return fluid.layers.elementwise_add(input, data_residual) + + +class BlockDecoder(object): + """ Block Decoder for readability, straight from the official TensorFlow repository """ + + @staticmethod + def _decode_block_string(block_string): + """ Gets a block through a string notation of arguments. """ + assert isinstance(block_string, str) + + ops = block_string.split('_') + options = {} + for op in ops: + splits = re.split(r'(\d.*)', op) + if len(splits) >= 2: + key, value = splits[:2] + options[key] = value + + # Check stride + assert ( + ('s' in options and len(options['s']) == 1) or + (len(options['s']) == 2 and options['s'][0] == options['s'][1])) + + return BlockArgs( + kernel_size=int(options['k']), + num_repeat=int(options['r']), + input_filters=int(options['i']), + output_filters=int(options['o']), + expand_ratio=int(options['e']), + id_skip=('noskip' not in block_string), + se_ratio=float(options['se']) if 'se' in options else None, + stride=[int(options['s'][0])]) + + @staticmethod + def _encode_block_string(block): + """Encodes a block to a string.""" + args = [ + 'r%d' % block.num_repeat, 'k%d' % block.kernel_size, 's%d%d' % + (block.strides[0], block.strides[1]), 'e%s' % block.expand_ratio, + 'i%d' % block.input_filters, 'o%d' % block.output_filters + ] + if 0 < block.se_ratio <= 1: + args.append('se%s' % block.se_ratio) + if block.id_skip is False: + args.append('noskip') + return '_'.join(args) + + @staticmethod + def decode(string_list): + """ + Decodes a list of string notations to specify blocks inside the network. + + :param string_list: a list of strings, each string is a notation of block + :return: a list of BlockArgs namedtuples of block args + """ + assert isinstance(string_list, list) + blocks_args = [] + for block_string in string_list: + blocks_args.append(BlockDecoder._decode_block_string(block_string)) + return blocks_args + + @staticmethod + def encode(blocks_args): + """ + Encodes a list of BlockArgs to a list of strings. + + :param blocks_args: a list of BlockArgs namedtuples of block args + :return: a list of strings, each string is a notation of block + """ + block_strings = [] + for block in blocks_args: + block_strings.append(BlockDecoder._encode_block_string(block)) + return block_strings + + +def EfficientNetB0(is_test=False, + padding_type='SAME', + override_params=None, + use_se=True): + model = EfficientNet( + name='b0', + is_test=is_test, + padding_type=padding_type, + override_params=override_params, + use_se=use_se) + return model + + +def EfficientNetB1(is_test=False, + padding_type='SAME', + override_params=None, + use_se=True): + model = EfficientNet( + name='b1', + is_test=is_test, + padding_type=padding_type, + override_params=override_params, + use_se=use_se) + return model + + +def EfficientNetB2(is_test=False, + padding_type='SAME', + override_params=None, + use_se=True): + model = EfficientNet( + name='b2', + is_test=is_test, + padding_type=padding_type, + override_params=override_params, + use_se=use_se) + return model + + +def EfficientNetB3(is_test=False, + padding_type='SAME', + override_params=None, + use_se=True): + model = EfficientNet( + name='b3', + is_test=is_test, + padding_type=padding_type, + override_params=override_params, + use_se=use_se) + return model + + +def EfficientNetB4(is_test=False, + padding_type='SAME', + override_params=None, + use_se=True): + model = EfficientNet( + name='b4', + is_test=is_test, + padding_type=padding_type, + override_params=override_params, + use_se=use_se) + return model + + +def EfficientNetB5(is_test=False, + padding_type='SAME', + override_params=None, + use_se=True): + model = EfficientNet( + name='b5', + is_test=is_test, + padding_type=padding_type, + override_params=override_params, + use_se=use_se) + return model + + +def EfficientNetB6(is_test=False, + padding_type='SAME', + override_params=None, + use_se=True): + model = EfficientNet( + name='b6', + is_test=is_test, + padding_type=padding_type, + override_params=override_params, + use_se=use_se) + return model + + +def EfficientNetB7(is_test=False, + padding_type='SAME', + override_params=None, + use_se=True): + model = EfficientNet( + name='b7', + is_test=is_test, + padding_type=padding_type, + override_params=override_params, + use_se=use_se) + return model diff --git a/ppcls/modeling/architectures/googlenet.py b/ppcls/modeling/architectures/googlenet.py new file mode 100644 index 000000000..9cf97c65a --- /dev/null +++ b/ppcls/modeling/architectures/googlenet.py @@ -0,0 +1,237 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import paddle +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr + +__all__ = ['GoogLeNet'] + + +class GoogLeNet(): + def __init__(self): + + pass + + def conv_layer(self, + input, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None): + channels = input.shape[1] + stdv = (3.0 / (filter_size**2 * channels))**0.5 + param_attr = ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=name + "_weights") + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=act, + param_attr=param_attr, + bias_attr=False, + name=name) + return conv + + def xavier(self, channels, filter_size, name): + stdv = (3.0 / (filter_size**2 * channels))**0.5 + param_attr = ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=name + "_weights") + + return param_attr + + def inception(self, + input, + channels, + filter1, + filter3R, + filter3, + filter5R, + filter5, + proj, + name=None): + conv1 = self.conv_layer( + input=input, + num_filters=filter1, + filter_size=1, + stride=1, + act=None, + name="inception_" + name + "_1x1") + conv3r = self.conv_layer( + input=input, + num_filters=filter3R, + filter_size=1, + stride=1, + act=None, + name="inception_" + name + "_3x3_reduce") + conv3 = self.conv_layer( + input=conv3r, + num_filters=filter3, + filter_size=3, + stride=1, + act=None, + name="inception_" + name + "_3x3") + conv5r = self.conv_layer( + input=input, + num_filters=filter5R, + filter_size=1, + stride=1, + act=None, + name="inception_" + name + "_5x5_reduce") + conv5 = self.conv_layer( + input=conv5r, + num_filters=filter5, + filter_size=5, + stride=1, + act=None, + name="inception_" + name + "_5x5") + pool = fluid.layers.pool2d( + input=input, + pool_size=3, + pool_stride=1, + pool_padding=1, + pool_type='max') + convprj = fluid.layers.conv2d( + input=pool, + filter_size=1, + num_filters=proj, + stride=1, + padding=0, + name="inception_" + name + "_3x3_proj", + param_attr=ParamAttr( + name="inception_" + name + "_3x3_proj_weights"), + bias_attr=False) + cat = fluid.layers.concat(input=[conv1, conv3, conv5, convprj], axis=1) + cat = fluid.layers.relu(cat) + return cat + + def net(self, input, class_dim=1000): + conv = self.conv_layer( + input=input, + num_filters=64, + filter_size=7, + stride=2, + act=None, + name="conv1") + pool = fluid.layers.pool2d( + input=conv, pool_size=3, pool_type='max', pool_stride=2) + + conv = self.conv_layer( + input=pool, + num_filters=64, + filter_size=1, + stride=1, + act=None, + name="conv2_1x1") + conv = self.conv_layer( + input=conv, + num_filters=192, + filter_size=3, + stride=1, + act=None, + name="conv2_3x3") + pool = fluid.layers.pool2d( + input=conv, pool_size=3, pool_type='max', pool_stride=2) + + ince3a = self.inception(pool, 192, 64, 96, 128, 16, 32, 32, "ince3a") + ince3b = self.inception(ince3a, 256, 128, 128, 192, 32, 96, 64, + "ince3b") + pool3 = fluid.layers.pool2d( + input=ince3b, pool_size=3, pool_type='max', pool_stride=2) + + ince4a = self.inception(pool3, 480, 192, 96, 208, 16, 48, 64, "ince4a") + ince4b = self.inception(ince4a, 512, 160, 112, 224, 24, 64, 64, + "ince4b") + ince4c = self.inception(ince4b, 512, 128, 128, 256, 24, 64, 64, + "ince4c") + ince4d = self.inception(ince4c, 512, 112, 144, 288, 32, 64, 64, + "ince4d") + ince4e = self.inception(ince4d, 528, 256, 160, 320, 32, 128, 128, + "ince4e") + pool4 = fluid.layers.pool2d( + input=ince4e, pool_size=3, pool_type='max', pool_stride=2) + + ince5a = self.inception(pool4, 832, 256, 160, 320, 32, 128, 128, + "ince5a") + ince5b = self.inception(ince5a, 832, 384, 192, 384, 48, 128, 128, + "ince5b") + pool5 = fluid.layers.pool2d( + input=ince5b, pool_size=7, pool_type='avg', pool_stride=7) + dropout = fluid.layers.dropout(x=pool5, dropout_prob=0.4) + out = fluid.layers.fc(input=dropout, + size=class_dim, + act='softmax', + param_attr=self.xavier(1024, 1, "out"), + name="out", + bias_attr=ParamAttr(name="out_offset")) + + pool_o1 = fluid.layers.pool2d( + input=ince4a, pool_size=5, pool_type='avg', pool_stride=3) + conv_o1 = self.conv_layer( + input=pool_o1, + num_filters=128, + filter_size=1, + stride=1, + act=None, + name="conv_o1") + fc_o1 = fluid.layers.fc(input=conv_o1, + size=1024, + act='relu', + param_attr=self.xavier(2048, 1, "fc_o1"), + name="fc_o1", + bias_attr=ParamAttr(name="fc_o1_offset")) + dropout_o1 = fluid.layers.dropout(x=fc_o1, dropout_prob=0.7) + out1 = fluid.layers.fc(input=dropout_o1, + size=class_dim, + act='softmax', + param_attr=self.xavier(1024, 1, "out1"), + name="out1", + bias_attr=ParamAttr(name="out1_offset")) + + pool_o2 = fluid.layers.pool2d( + input=ince4d, pool_size=5, pool_type='avg', pool_stride=3) + conv_o2 = self.conv_layer( + input=pool_o2, + num_filters=128, + filter_size=1, + stride=1, + act=None, + name="conv_o2") + fc_o2 = fluid.layers.fc(input=conv_o2, + size=1024, + act='relu', + param_attr=self.xavier(2048, 1, "fc_o2"), + name="fc_o2", + bias_attr=ParamAttr(name="fc_o2_offset")) + dropout_o2 = fluid.layers.dropout(x=fc_o2, dropout_prob=0.7) + out2 = fluid.layers.fc(input=dropout_o2, + size=class_dim, + act='softmax', + param_attr=self.xavier(1024, 1, "out2"), + name="out2", + bias_attr=ParamAttr(name="out2_offset")) + + # last fc layer is "out" + return [out, out1, out2] diff --git a/ppcls/modeling/architectures/hrnet.py b/ppcls/modeling/architectures/hrnet.py new file mode 100644 index 000000000..32f06df6a --- /dev/null +++ b/ppcls/modeling/architectures/hrnet.py @@ -0,0 +1,459 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math + +import paddle +import paddle.fluid as fluid +from paddle.fluid.initializer import MSRA +from paddle.fluid.param_attr import ParamAttr + +__all__ = [ + "HRNet", "HRNet_W18_C", "HRNet_W30_C", "HRNet_W32_C", "HRNet_W40_C", + "HRNet_W44_C", "HRNet_W48_C", "HRNet_W60_C", "HRNet_W64_C", + "SE_HRNet_W18_C", "SE_HRNet_W30_C", "SE_HRNet_W32_C", "SE_HRNet_W40_C", + "SE_HRNet_W44_C", "SE_HRNet_W48_C", "SE_HRNet_W60_C", "SE_HRNet_W64_C" +] + + +class HRNet(): + def __init__(self, width=18, has_se=False): + self.width = width + self.has_se = has_se + self.channels = { + 18: [[18, 36], [18, 36, 72], [18, 36, 72, 144]], + 30: [[30, 60], [30, 60, 120], [30, 60, 120, 240]], + 32: [[32, 64], [32, 64, 128], [32, 64, 128, 256]], + 40: [[40, 80], [40, 80, 160], [40, 80, 160, 320]], + 44: [[44, 88], [44, 88, 176], [44, 88, 176, 352]], + 48: [[48, 96], [48, 96, 192], [48, 96, 192, 384]], + 60: [[60, 120], [60, 120, 240], [60, 120, 240, 480]], + 64: [[64, 128], [64, 128, 256], [64, 128, 256, 512]] + } + + def net(self, input, class_dim=1000): + width = self.width + channels_2, channels_3, channels_4 = self.channels[width] + num_modules_2, num_modules_3, num_modules_4 = 1, 4, 3 + + x = self.conv_bn_layer( + input=input, + filter_size=3, + num_filters=64, + stride=2, + if_act=True, + name='layer1_1') + x = self.conv_bn_layer( + input=x, + filter_size=3, + num_filters=64, + stride=2, + if_act=True, + name='layer1_2') + + la1 = self.layer1(x, name='layer2') + tr1 = self.transition_layer([la1], [256], channels_2, name='tr1') + st2 = self.stage(tr1, num_modules_2, channels_2, name='st2') + tr2 = self.transition_layer(st2, channels_2, channels_3, name='tr2') + st3 = self.stage(tr2, num_modules_3, channels_3, name='st3') + tr3 = self.transition_layer(st3, channels_3, channels_4, name='tr3') + st4 = self.stage(tr3, num_modules_4, channels_4, name='st4') + + #classification + last_cls = self.last_cls_out(x=st4, name='cls_head') + y = last_cls[0] + last_num_filters = [256, 512, 1024] + for i in range(3): + y = fluid.layers.elementwise_add( + last_cls[i + 1], + self.conv_bn_layer( + input=y, + filter_size=3, + num_filters=last_num_filters[i], + stride=2, + name='cls_head_add' + str(i + 1))) + + y = self.conv_bn_layer( + input=y, + filter_size=1, + num_filters=2048, + stride=1, + name='cls_head_last_conv') + pool = fluid.layers.pool2d( + input=y, pool_type='avg', global_pooling=True) + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + out = fluid.layers.fc( + input=pool, + size=class_dim, + param_attr=ParamAttr( + name='fc_weights', + initializer=fluid.initializer.Uniform(-stdv, stdv)), + bias_attr=ParamAttr(name='fc_offset')) + return out + + def layer1(self, input, name=None): + conv = input + for i in range(4): + conv = self.bottleneck_block( + conv, + num_filters=64, + downsample=True if i == 0 else False, + name=name + '_' + str(i + 1)) + return conv + + def transition_layer(self, x, in_channels, out_channels, name=None): + num_in = len(in_channels) + num_out = len(out_channels) + out = [] + for i in range(num_out): + if i < num_in: + if in_channels[i] != out_channels[i]: + residual = self.conv_bn_layer( + x[i], + filter_size=3, + num_filters=out_channels[i], + name=name + '_layer_' + str(i + 1)) + out.append(residual) + else: + out.append(x[i]) + else: + residual = self.conv_bn_layer( + x[-1], + filter_size=3, + num_filters=out_channels[i], + stride=2, + name=name + '_layer_' + str(i + 1)) + out.append(residual) + return out + + def branches(self, x, block_num, channels, name=None): + out = [] + for i in range(len(channels)): + residual = x[i] + for j in range(block_num): + residual = self.basic_block( + residual, + channels[i], + name=name + '_branch_layer_' + str(i + 1) + '_' + + str(j + 1)) + out.append(residual) + return out + + def fuse_layers(self, x, channels, multi_scale_output=True, name=None): + out = [] + for i in range(len(channels) if multi_scale_output else 1): + residual = x[i] + for j in range(len(channels)): + if j > i: + y = self.conv_bn_layer( + x[j], + filter_size=1, + num_filters=channels[i], + if_act=False, + name=name + '_layer_' + str(i + 1) + '_' + str(j + 1)) + y = fluid.layers.resize_nearest(input=y, scale=2**(j - i)) + residual = fluid.layers.elementwise_add( + x=residual, y=y, act=None) + elif j < i: + y = x[j] + for k in range(i - j): + if k == i - j - 1: + y = self.conv_bn_layer( + y, + filter_size=3, + num_filters=channels[i], + stride=2, + if_act=False, + name=name + '_layer_' + str(i + 1) + '_' + + str(j + 1) + '_' + str(k + 1)) + else: + y = self.conv_bn_layer( + y, + filter_size=3, + num_filters=channels[j], + stride=2, + name=name + '_layer_' + str(i + 1) + '_' + + str(j + 1) + '_' + str(k + 1)) + residual = fluid.layers.elementwise_add( + x=residual, y=y, act=None) + + residual = fluid.layers.relu(residual) + out.append(residual) + return out + + def high_resolution_module(self, + x, + channels, + multi_scale_output=True, + name=None): + residual = self.branches(x, 4, channels, name=name) + out = self.fuse_layers( + residual, + channels, + multi_scale_output=multi_scale_output, + name=name) + return out + + def stage(self, + x, + num_modules, + channels, + multi_scale_output=True, + name=None): + out = x + for i in range(num_modules): + if i == num_modules - 1 and multi_scale_output == False: + out = self.high_resolution_module( + out, + channels, + multi_scale_output=False, + name=name + '_' + str(i + 1)) + else: + out = self.high_resolution_module( + out, channels, name=name + '_' + str(i + 1)) + + return out + + def last_cls_out(self, x, name=None): + out = [] + num_filters_list = [32, 64, 128, 256] + for i in range(len(x)): + out.append( + self.bottleneck_block( + input=x[i], + num_filters=num_filters_list[i], + name=name + 'conv_' + str(i + 1), + downsample=True)) + + return out + + def basic_block(self, + input, + num_filters, + stride=1, + downsample=False, + name=None): + residual = input + conv = self.conv_bn_layer( + input=input, + filter_size=3, + num_filters=num_filters, + stride=stride, + name=name + '_conv1') + conv = self.conv_bn_layer( + input=conv, + filter_size=3, + num_filters=num_filters, + if_act=False, + name=name + '_conv2') + if downsample: + residual = self.conv_bn_layer( + input=input, + filter_size=1, + num_filters=num_filters, + if_act=False, + name=name + '_downsample') + if self.has_se: + conv = self.squeeze_excitation( + input=conv, + num_channels=num_filters, + reduction_ratio=16, + name=name + '_fc') + return fluid.layers.elementwise_add(x=residual, y=conv, act='relu') + + def bottleneck_block(self, + input, + num_filters, + stride=1, + downsample=False, + name=None): + residual = input + conv = self.conv_bn_layer( + input=input, + filter_size=1, + num_filters=num_filters, + name=name + '_conv1') + conv = self.conv_bn_layer( + input=conv, + filter_size=3, + num_filters=num_filters, + stride=stride, + name=name + '_conv2') + conv = self.conv_bn_layer( + input=conv, + filter_size=1, + num_filters=num_filters * 4, + if_act=False, + name=name + '_conv3') + if downsample: + residual = self.conv_bn_layer( + input=input, + filter_size=1, + num_filters=num_filters * 4, + if_act=False, + name=name + '_downsample') + if self.has_se: + conv = self.squeeze_excitation( + input=conv, + num_channels=num_filters * 4, + reduction_ratio=16, + name=name + '_fc') + return fluid.layers.elementwise_add(x=residual, y=conv, act='relu') + + def squeeze_excitation(self, + input, + num_channels, + reduction_ratio, + name=None): + pool = fluid.layers.pool2d( + input=input, pool_size=0, pool_type='avg', global_pooling=True) + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + squeeze = fluid.layers.fc( + input=pool, + size=num_channels / reduction_ratio, + act='relu', + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=name + '_sqz_weights'), + bias_attr=ParamAttr(name=name + '_sqz_offset')) + stdv = 1.0 / math.sqrt(squeeze.shape[1] * 1.0) + excitation = fluid.layers.fc( + input=squeeze, + size=num_channels, + act='sigmoid', + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=name + '_exc_weights'), + bias_attr=ParamAttr(name=name + '_exc_offset')) + scale = fluid.layers.elementwise_mul(x=input, y=excitation, axis=0) + return scale + + def conv_bn_layer(self, + input, + filter_size, + num_filters, + stride=1, + padding=1, + num_groups=1, + if_act=True, + name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=num_groups, + act=None, + param_attr=ParamAttr( + initializer=MSRA(), name=name + '_weights'), + bias_attr=False) + bn_name = name + '_bn' + bn = fluid.layers.batch_norm( + input=conv, + param_attr=ParamAttr( + name=bn_name + "_scale", + initializer=fluid.initializer.Constant(1.0)), + bias_attr=ParamAttr( + name=bn_name + "_offset", + initializer=fluid.initializer.Constant(0.0)), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + if if_act: + bn = fluid.layers.relu(bn) + return bn + + +def HRNet_W18_C(): + model = HRNet(width=18) + return model + + +def HRNet_W30_C(): + model = HRNet(width=30) + return model + + +def HRNet_W32_C(): + model = HRNet(width=32) + return model + + +def HRNet_W40_C(): + model = HRNet(width=40) + return model + + +def HRNet_W44_C(): + model = HRNet(width=44) + return model + + +def HRNet_W48_C(): + model = HRNet(width=48) + return model + + +def HRNet_W60_C(): + model = HRNet(width=60) + return model + + +def HRNet_W64_C(): + model = HRNet(width=64) + return model + + +def SE_HRNet_W18_C(): + model = HRNet(width=18, has_se=True) + return model + + +def SE_HRNet_W30_C(): + model = HRNet(width=30, has_se=True) + return model + + +def SE_HRNet_W32_C(): + model = HRNet(width=32, has_se=True) + return model + + +def SE_HRNet_W40_C(): + model = HRNet(width=40, has_se=True) + return model + + +def SE_HRNet_W44_C(): + model = HRNet(width=44, has_se=True) + return model + + +def SE_HRNet_W48_C(): + model = HRNet(width=48, has_se=True) + return model + + +def SE_HRNet_W60_C(): + model = HRNet(width=60, has_se=True) + return model + + +def SE_HRNet_W64_C(): + model = HRNet(width=64, has_se=True) + return model diff --git a/ppcls/modeling/architectures/inception_v4.py b/ppcls/modeling/architectures/inception_v4.py new file mode 100644 index 000000000..a81d9f49a --- /dev/null +++ b/ppcls/modeling/architectures/inception_v4.py @@ -0,0 +1,354 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math + +import paddle +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr + +__all__ = ['InceptionV4'] + + +class InceptionV4(): + def __init__(self): + + pass + + def net(self, input, class_dim=1000): + x = self.inception_stem(input) + + for i in range(4): + x = self.inceptionA(x, name=str(i + 1)) + x = self.reductionA(x) + + for i in range(7): + x = self.inceptionB(x, name=str(i + 1)) + x = self.reductionB(x) + + for i in range(3): + x = self.inceptionC(x, name=str(i + 1)) + + pool = fluid.layers.pool2d( + input=x, pool_type='avg', global_pooling=True) + + drop = fluid.layers.dropout(x=pool, dropout_prob=0.2) + + stdv = 1.0 / math.sqrt(drop.shape[1] * 1.0) + out = fluid.layers.fc( + input=drop, + size=class_dim, + param_attr=ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name="final_fc_weights"), + bias_attr=ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name="final_fc_offset")) + return out + + def conv_bn_layer(self, + data, + num_filters, + filter_size, + stride=1, + padding=0, + groups=1, + act='relu', + name=None): + conv = fluid.layers.conv2d( + input=data, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=padding, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False, + name=name) + bn_name = name + "_bn" + return fluid.layers.batch_norm( + input=conv, + act=act, + name=bn_name, + param_attr=ParamAttr(name=bn_name + "_scale"), + bias_attr=ParamAttr(name=bn_name + "_offset"), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def inception_stem(self, data, name=None): + conv = self.conv_bn_layer( + data, 32, 3, stride=2, act='relu', name="conv1_3x3_s2") + conv = self.conv_bn_layer(conv, 32, 3, act='relu', name="conv2_3x3_s1") + conv = self.conv_bn_layer( + conv, 64, 3, padding=1, act='relu', name="conv3_3x3_s1") + + pool1 = fluid.layers.pool2d( + input=conv, pool_size=3, pool_stride=2, pool_type='max') + conv2 = self.conv_bn_layer( + conv, 96, 3, stride=2, act='relu', name="inception_stem1_3x3_s2") + concat = fluid.layers.concat([pool1, conv2], axis=1) + + conv1 = self.conv_bn_layer( + concat, 64, 1, act='relu', name="inception_stem2_3x3_reduce") + conv1 = self.conv_bn_layer( + conv1, 96, 3, act='relu', name="inception_stem2_3x3") + + conv2 = self.conv_bn_layer( + concat, 64, 1, act='relu', name="inception_stem2_1x7_reduce") + conv2 = self.conv_bn_layer( + conv2, + 64, (7, 1), + padding=(3, 0), + act='relu', + name="inception_stem2_1x7") + conv2 = self.conv_bn_layer( + conv2, + 64, (1, 7), + padding=(0, 3), + act='relu', + name="inception_stem2_7x1") + conv2 = self.conv_bn_layer( + conv2, 96, 3, act='relu', name="inception_stem2_3x3_2") + + concat = fluid.layers.concat([conv1, conv2], axis=1) + + conv1 = self.conv_bn_layer( + concat, + 192, + 3, + stride=2, + act='relu', + name="inception_stem3_3x3_s2") + pool1 = fluid.layers.pool2d( + input=concat, pool_size=3, pool_stride=2, pool_type='max') + + concat = fluid.layers.concat([conv1, pool1], axis=1) + + return concat + + def inceptionA(self, data, name=None): + pool1 = fluid.layers.pool2d( + input=data, pool_size=3, pool_padding=1, pool_type='avg') + conv1 = self.conv_bn_layer( + pool1, 96, 1, act='relu', name="inception_a" + name + "_1x1") + + conv2 = self.conv_bn_layer( + data, 96, 1, act='relu', name="inception_a" + name + "_1x1_2") + + conv3 = self.conv_bn_layer( + data, 64, 1, act='relu', name="inception_a" + name + "_3x3_reduce") + conv3 = self.conv_bn_layer( + conv3, + 96, + 3, + padding=1, + act='relu', + name="inception_a" + name + "_3x3") + + conv4 = self.conv_bn_layer( + data, + 64, + 1, + act='relu', + name="inception_a" + name + "_3x3_2_reduce") + conv4 = self.conv_bn_layer( + conv4, + 96, + 3, + padding=1, + act='relu', + name="inception_a" + name + "_3x3_2") + conv4 = self.conv_bn_layer( + conv4, + 96, + 3, + padding=1, + act='relu', + name="inception_a" + name + "_3x3_3") + + concat = fluid.layers.concat([conv1, conv2, conv3, conv4], axis=1) + + return concat + + def reductionA(self, data, name=None): + pool1 = fluid.layers.pool2d( + input=data, pool_size=3, pool_stride=2, pool_type='max') + + conv2 = self.conv_bn_layer( + data, 384, 3, stride=2, act='relu', name="reduction_a_3x3") + + conv3 = self.conv_bn_layer( + data, 192, 1, act='relu', name="reduction_a_3x3_2_reduce") + conv3 = self.conv_bn_layer( + conv3, 224, 3, padding=1, act='relu', name="reduction_a_3x3_2") + conv3 = self.conv_bn_layer( + conv3, 256, 3, stride=2, act='relu', name="reduction_a_3x3_3") + + concat = fluid.layers.concat([pool1, conv2, conv3], axis=1) + + return concat + + def inceptionB(self, data, name=None): + pool1 = fluid.layers.pool2d( + input=data, pool_size=3, pool_padding=1, pool_type='avg') + conv1 = self.conv_bn_layer( + pool1, 128, 1, act='relu', name="inception_b" + name + "_1x1") + + conv2 = self.conv_bn_layer( + data, 384, 1, act='relu', name="inception_b" + name + "_1x1_2") + + conv3 = self.conv_bn_layer( + data, + 192, + 1, + act='relu', + name="inception_b" + name + "_1x7_reduce") + conv3 = self.conv_bn_layer( + conv3, + 224, (1, 7), + padding=(0, 3), + act='relu', + name="inception_b" + name + "_1x7") + conv3 = self.conv_bn_layer( + conv3, + 256, (7, 1), + padding=(3, 0), + act='relu', + name="inception_b" + name + "_7x1") + + conv4 = self.conv_bn_layer( + data, + 192, + 1, + act='relu', + name="inception_b" + name + "_7x1_2_reduce") + conv4 = self.conv_bn_layer( + conv4, + 192, (1, 7), + padding=(0, 3), + act='relu', + name="inception_b" + name + "_1x7_2") + conv4 = self.conv_bn_layer( + conv4, + 224, (7, 1), + padding=(3, 0), + act='relu', + name="inception_b" + name + "_7x1_2") + conv4 = self.conv_bn_layer( + conv4, + 224, (1, 7), + padding=(0, 3), + act='relu', + name="inception_b" + name + "_1x7_3") + conv4 = self.conv_bn_layer( + conv4, + 256, (7, 1), + padding=(3, 0), + act='relu', + name="inception_b" + name + "_7x1_3") + + concat = fluid.layers.concat([conv1, conv2, conv3, conv4], axis=1) + + return concat + + def reductionB(self, data, name=None): + pool1 = fluid.layers.pool2d( + input=data, pool_size=3, pool_stride=2, pool_type='max') + + conv2 = self.conv_bn_layer( + data, 192, 1, act='relu', name="reduction_b_3x3_reduce") + conv2 = self.conv_bn_layer( + conv2, 192, 3, stride=2, act='relu', name="reduction_b_3x3") + + conv3 = self.conv_bn_layer( + data, 256, 1, act='relu', name="reduction_b_1x7_reduce") + conv3 = self.conv_bn_layer( + conv3, + 256, (1, 7), + padding=(0, 3), + act='relu', + name="reduction_b_1x7") + conv3 = self.conv_bn_layer( + conv3, + 320, (7, 1), + padding=(3, 0), + act='relu', + name="reduction_b_7x1") + conv3 = self.conv_bn_layer( + conv3, 320, 3, stride=2, act='relu', name="reduction_b_3x3_2") + + concat = fluid.layers.concat([pool1, conv2, conv3], axis=1) + + return concat + + def inceptionC(self, data, name=None): + pool1 = fluid.layers.pool2d( + input=data, pool_size=3, pool_padding=1, pool_type='avg') + conv1 = self.conv_bn_layer( + pool1, 256, 1, act='relu', name="inception_c" + name + "_1x1") + + conv2 = self.conv_bn_layer( + data, 256, 1, act='relu', name="inception_c" + name + "_1x1_2") + + conv3 = self.conv_bn_layer( + data, 384, 1, act='relu', name="inception_c" + name + "_1x1_3") + conv3_1 = self.conv_bn_layer( + conv3, + 256, (1, 3), + padding=(0, 1), + act='relu', + name="inception_c" + name + "_1x3") + conv3_2 = self.conv_bn_layer( + conv3, + 256, (3, 1), + padding=(1, 0), + act='relu', + name="inception_c" + name + "_3x1") + + conv4 = self.conv_bn_layer( + data, 384, 1, act='relu', name="inception_c" + name + "_1x1_4") + conv4 = self.conv_bn_layer( + conv4, + 448, (1, 3), + padding=(0, 1), + act='relu', + name="inception_c" + name + "_1x3_2") + conv4 = self.conv_bn_layer( + conv4, + 512, (3, 1), + padding=(1, 0), + act='relu', + name="inception_c" + name + "_3x1_2") + conv4_1 = self.conv_bn_layer( + conv4, + 256, (1, 3), + padding=(0, 1), + act='relu', + name="inception_c" + name + "_1x3_3") + conv4_2 = self.conv_bn_layer( + conv4, + 256, (3, 1), + padding=(1, 0), + act='relu', + name="inception_c" + name + "_3x1_3") + + concat = fluid.layers.concat( + [conv1, conv2, conv3_1, conv3_2, conv4_1, conv4_2], axis=1) + + return concat diff --git a/ppcls/modeling/architectures/layers.py b/ppcls/modeling/architectures/layers.py new file mode 100644 index 000000000..f99103b05 --- /dev/null +++ b/ppcls/modeling/architectures/layers.py @@ -0,0 +1,250 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math +import warnings + +import paddle.fluid as fluid + + +def initial_type(name, + input, + op_type, + fan_out, + init="google", + use_bias=False, + filter_size=0, + stddev=0.02): + if init == "kaiming": + if op_type == 'conv': + fan_in = input.shape[1] * filter_size * filter_size + elif op_type == 'deconv': + fan_in = fan_out * filter_size * filter_size + else: + if len(input.shape) > 2: + fan_in = input.shape[1] * input.shape[2] * input.shape[3] + else: + fan_in = input.shape[1] + bound = 1 / math.sqrt(fan_in) + param_attr = fluid.ParamAttr( + name=name + "_weights", + initializer=fluid.initializer.Uniform( + low=-bound, high=bound)) + if use_bias == True: + bias_attr = fluid.ParamAttr( + name=name + '_offset', + initializer=fluid.initializer.Uniform( + low=-bound, high=bound)) + else: + bias_attr = False + elif init == 'google': + n = filter_size * filter_size * fan_out + param_attr = fluid.ParamAttr( + name=name + "_weights", + initializer=fluid.initializer.NormalInitializer( + loc=0.0, scale=math.sqrt(2.0 / n))) + if use_bias == True: + bias_attr = fluid.ParamAttr( + name=name + "_offset", + initializer=fluid.initializer.Constant(0.0)) + else: + bias_attr = False + + else: + param_attr = fluid.ParamAttr( + name=name + "_weights", + initializer=fluid.initializer.NormalInitializer( + loc=0.0, scale=stddev)) + if use_bias == True: + bias_attr = fluid.ParamAttr( + name=name + "_offset", + initializer=fluid.initializer.Constant(0.0)) + else: + bias_attr = False + return param_attr, bias_attr + + +def cal_padding(img_size, stride, filter_size, dilation=1): + """Calculate padding size.""" + if img_size % stride == 0: + out_size = max(filter_size - stride, 0) + else: + out_size = max(filter_size - (img_size % stride), 0) + return out_size // 2, out_size - out_size // 2 + + +def init_batch_norm_layer(name="batch_norm"): + param_attr = fluid.ParamAttr( + name=name + '_scale', initializer=fluid.initializer.Constant(1.0)) + bias_attr = fluid.ParamAttr( + name=name + '_offset', + initializer=fluid.initializer.Constant(value=0.0)) + return param_attr, bias_attr + + +def init_fc_layer(fout, name='fc'): + n = fout # fan-out + init_range = 1.0 / math.sqrt(n) + + param_attr = fluid.ParamAttr( + name=name + '_weights', + initializer=fluid.initializer.UniformInitializer( + low=-init_range, high=init_range)) + bias_attr = fluid.ParamAttr( + name=name + '_offset', + initializer=fluid.initializer.Constant(value=0.0)) + return param_attr, bias_attr + + +def norm_layer(input, norm_type='batch_norm', name=None): + if norm_type == 'batch_norm': + param_attr = fluid.ParamAttr( + name=name + '_weights', + initializer=fluid.initializer.Constant(1.0)) + bias_attr = fluid.ParamAttr( + name=name + '_offset', + initializer=fluid.initializer.Constant(value=0.0)) + return fluid.layers.batch_norm( + input, + param_attr=param_attr, + bias_attr=bias_attr, + moving_mean_name=name + '_mean', + moving_variance_name=name + '_variance') + + elif norm_type == 'instance_norm': + helper = fluid.layer_helper.LayerHelper("instance_norm", **locals()) + dtype = helper.input_dtype() + epsilon = 1e-5 + mean = fluid.layers.reduce_mean(input, dim=[2, 3], keep_dim=True) + var = fluid.layers.reduce_mean( + fluid.layers.square(input - mean), dim=[2, 3], keep_dim=True) + if name is not None: + scale_name = name + "_scale" + offset_name = name + "_offset" + scale_param = fluid.ParamAttr( + name=scale_name, + initializer=fluid.initializer.Constant(1.0), + trainable=True) + offset_param = fluid.ParamAttr( + name=offset_name, + initializer=fluid.initializer.Constant(0.0), + trainable=True) + scale = helper.create_parameter( + attr=scale_param, shape=input.shape[1:2], dtype=dtype) + offset = helper.create_parameter( + attr=offset_param, shape=input.shape[1:2], dtype=dtype) + + tmp = fluid.layers.elementwise_mul(x=(input - mean), y=scale, axis=1) + tmp = tmp / fluid.layers.sqrt(var + epsilon) + tmp = fluid.layers.elementwise_add(tmp, offset, axis=1) + return tmp + else: + raise NotImplementedError("norm tyoe: [%s] is not support" % norm_type) + + +def conv2d(input, + num_filters=64, + filter_size=7, + stride=1, + stddev=0.02, + padding=0, + groups=None, + name="conv2d", + norm=None, + act=None, + relufactor=0.0, + use_bias=False, + padding_type=None, + initial="normal", + use_cudnn=True): + + if padding != 0 and padding_type != None: + warnings.warn( + 'padding value and padding type are set in the same time, and the final padding width and padding height are computed by padding_type' + ) + + param_attr, bias_attr = initial_type( + name=name, + input=input, + op_type='conv', + fan_out=num_filters, + init=initial, + use_bias=use_bias, + filter_size=filter_size, + stddev=stddev) + + def get_padding(filter_size, stride=1, dilation=1): + padding = ((stride - 1) + dilation * (filter_size - 1)) // 2 + return padding + + need_crop = False + if padding_type == "SAME": + top_padding, bottom_padding = cal_padding(input.shape[2], stride, + filter_size) + left_padding, right_padding = cal_padding(input.shape[2], stride, + filter_size) + height_padding = bottom_padding + width_padding = right_padding + if top_padding != bottom_padding or left_padding != right_padding: + height_padding = top_padding + stride + width_padding = left_padding + stride + need_crop = True + padding = [height_padding, width_padding] + elif padding_type == "VALID": + height_padding = 0 + width_padding = 0 + padding = [height_padding, width_padding] + elif padding_type == "DYNAMIC": + padding = get_padding(filter_size, stride) + else: + padding = padding + + conv = fluid.layers.conv2d( + input, + num_filters, + filter_size, + groups=groups, + name=name, + stride=stride, + padding=padding, + use_cudnn=use_cudnn, + param_attr=param_attr, + bias_attr=bias_attr) + + if need_crop: + conv = conv[:, :, 1:, 1:] + + if norm is not None: + conv = norm_layer(input=conv, norm_type=norm, name=name + "_norm") + if act == 'relu': + conv = fluid.layers.relu(conv, name=name + '_relu') + elif act == 'leaky_relu': + conv = fluid.layers.leaky_relu( + conv, alpha=relufactor, name=name + '_leaky_relu') + elif act == 'tanh': + conv = fluid.layers.tanh(conv, name=name + '_tanh') + elif act == 'sigmoid': + conv = fluid.layers.sigmoid(conv, name=name + '_sigmoid') + elif act == 'swish': + conv = fluid.layers.swish(conv, name=name + '_swish') + elif act == None: + conv = conv + else: + raise NotImplementedError("activation: [%s] is not support" % act) + + return conv diff --git a/ppcls/modeling/architectures/mobilenet_v1.py b/ppcls/modeling/architectures/mobilenet_v1.py new file mode 100644 index 000000000..b968a9161 --- /dev/null +++ b/ppcls/modeling/architectures/mobilenet_v1.py @@ -0,0 +1,218 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import paddle.fluid as fluid +from paddle.fluid.initializer import MSRA +from paddle.fluid.param_attr import ParamAttr + +__all__ = [ + 'MobileNetV1', 'MobileNetV1_x0_25', 'MobileNetV1_x0_5', 'MobileNetV1_x1_0', + 'MobileNetV1_x0_75' +] + + +class MobileNetV1(): + def __init__(self, scale=1.0): + self.scale = scale + + def net(self, input, class_dim=1000): + scale = self.scale + # conv1: 112x112 + input = self.conv_bn_layer( + input, + filter_size=3, + channels=3, + num_filters=int(32 * scale), + stride=2, + padding=1, + name="conv1") + + # 56x56 + input = self.depthwise_separable( + input, + num_filters1=32, + num_filters2=64, + num_groups=32, + stride=1, + scale=scale, + name="conv2_1") + + input = self.depthwise_separable( + input, + num_filters1=64, + num_filters2=128, + num_groups=64, + stride=2, + scale=scale, + name="conv2_2") + + # 28x28 + input = self.depthwise_separable( + input, + num_filters1=128, + num_filters2=128, + num_groups=128, + stride=1, + scale=scale, + name="conv3_1") + + input = self.depthwise_separable( + input, + num_filters1=128, + num_filters2=256, + num_groups=128, + stride=2, + scale=scale, + name="conv3_2") + + # 14x14 + input = self.depthwise_separable( + input, + num_filters1=256, + num_filters2=256, + num_groups=256, + stride=1, + scale=scale, + name="conv4_1") + + input = self.depthwise_separable( + input, + num_filters1=256, + num_filters2=512, + num_groups=256, + stride=2, + scale=scale, + name="conv4_2") + + # 14x14 + for i in range(5): + input = self.depthwise_separable( + input, + num_filters1=512, + num_filters2=512, + num_groups=512, + stride=1, + scale=scale, + name="conv5" + "_" + str(i + 1)) + # 7x7 + input = self.depthwise_separable( + input, + num_filters1=512, + num_filters2=1024, + num_groups=512, + stride=2, + scale=scale, + name="conv5_6") + + input = self.depthwise_separable( + input, + num_filters1=1024, + num_filters2=1024, + num_groups=1024, + stride=1, + scale=scale, + name="conv6") + + input = fluid.layers.pool2d( + input=input, pool_type='avg', global_pooling=True) + + output = fluid.layers.fc(input=input, + size=class_dim, + param_attr=ParamAttr( + initializer=MSRA(), name="fc7_weights"), + bias_attr=ParamAttr(name="fc7_offset")) + return output + + def conv_bn_layer(self, + input, + filter_size, + num_filters, + stride, + padding, + channels=None, + num_groups=1, + act='relu', + use_cudnn=True, + name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=padding, + groups=num_groups, + act=None, + use_cudnn=use_cudnn, + param_attr=ParamAttr( + initializer=MSRA(), name=name + "_weights"), + bias_attr=False) + bn_name = name + "_bn" + return fluid.layers.batch_norm( + input=conv, + act=act, + param_attr=ParamAttr(name=bn_name + "_scale"), + bias_attr=ParamAttr(name=bn_name + "_offset"), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def depthwise_separable(self, + input, + num_filters1, + num_filters2, + num_groups, + stride, + scale, + name=None): + depthwise_conv = self.conv_bn_layer( + input=input, + filter_size=3, + num_filters=int(num_filters1 * scale), + stride=stride, + padding=1, + num_groups=int(num_groups * scale), + use_cudnn=False, + name=name + "_dw") + + pointwise_conv = self.conv_bn_layer( + input=depthwise_conv, + filter_size=1, + num_filters=int(num_filters2 * scale), + stride=1, + padding=0, + name=name + "_sep") + return pointwise_conv + + +def MobileNetV1_x0_25(): + model = MobileNetV1(scale=0.25) + return model + + +def MobileNetV1_x0_5(): + model = MobileNetV1(scale=0.5) + return model + + +def MobileNetV1_x1_0(): + model = MobileNetV1(scale=1.0) + return model + + +def MobileNetV1_x0_75(): + model = MobileNetV1(scale=0.75) + return model diff --git a/ppcls/modeling/architectures/mobilenet_v2.py b/ppcls/modeling/architectures/mobilenet_v2.py new file mode 100644 index 000000000..8abaa416f --- /dev/null +++ b/ppcls/modeling/architectures/mobilenet_v2.py @@ -0,0 +1,230 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import paddle.fluid as fluid +from paddle.fluid.initializer import MSRA +from paddle.fluid.param_attr import ParamAttr + +__all__ = [ + 'MobileNetV2_x0_25', 'MobileNetV2_x0_5' + 'MobileNetV2_x0_75', 'MobileNetV2_x1_0', 'MobileNetV2_x1_5', + 'MobileNetV2_x2_0', 'MobileNetV2' +] + + +class MobileNetV2(): + def __init__(self, scale=1.0): + self.scale = scale + + def net(self, input, class_dim=1000): + scale = self.scale + bottleneck_params_list = [ + (1, 16, 1, 1), + (6, 24, 2, 2), + (6, 32, 3, 2), + (6, 64, 4, 2), + (6, 96, 3, 1), + (6, 160, 3, 2), + (6, 320, 1, 1), + ] + + #conv1 + input = self.conv_bn_layer( + input, + num_filters=int(32 * scale), + filter_size=3, + stride=2, + padding=1, + if_act=True, + name='conv1_1') + + # bottleneck sequences + i = 1 + in_c = int(32 * scale) + for layer_setting in bottleneck_params_list: + t, c, n, s = layer_setting + i += 1 + input = self.invresi_blocks( + input=input, + in_c=in_c, + t=t, + c=int(c * scale), + n=n, + s=s, + name='conv' + str(i)) + in_c = int(c * scale) + #last_conv + input = self.conv_bn_layer( + input=input, + num_filters=int(1280 * scale) if scale > 1.0 else 1280, + filter_size=1, + stride=1, + padding=0, + if_act=True, + name='conv9') + + input = fluid.layers.pool2d( + input=input, pool_type='avg', global_pooling=True) + + output = fluid.layers.fc(input=input, + size=class_dim, + param_attr=ParamAttr(name='fc10_weights'), + bias_attr=ParamAttr(name='fc10_offset')) + return output + + def conv_bn_layer(self, + input, + filter_size, + num_filters, + stride, + padding, + channels=None, + num_groups=1, + if_act=True, + name=None, + use_cudnn=True): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=padding, + groups=num_groups, + act=None, + use_cudnn=use_cudnn, + param_attr=ParamAttr(name=name + '_weights'), + bias_attr=False) + bn_name = name + '_bn' + bn = fluid.layers.batch_norm( + input=conv, + param_attr=ParamAttr(name=bn_name + "_scale"), + bias_attr=ParamAttr(name=bn_name + "_offset"), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + if if_act: + return fluid.layers.relu6(bn) + else: + return bn + + def shortcut(self, input, data_residual): + return fluid.layers.elementwise_add(input, data_residual) + + def inverted_residual_unit(self, + input, + num_in_filter, + num_filters, + ifshortcut, + stride, + filter_size, + padding, + expansion_factor, + name=None): + num_expfilter = int(round(num_in_filter * expansion_factor)) + + channel_expand = self.conv_bn_layer( + input=input, + num_filters=num_expfilter, + filter_size=1, + stride=1, + padding=0, + num_groups=1, + if_act=True, + name=name + '_expand') + + bottleneck_conv = self.conv_bn_layer( + input=channel_expand, + num_filters=num_expfilter, + filter_size=filter_size, + stride=stride, + padding=padding, + num_groups=num_expfilter, + if_act=True, + name=name + '_dwise', + use_cudnn=False) + + linear_out = self.conv_bn_layer( + input=bottleneck_conv, + num_filters=num_filters, + filter_size=1, + stride=1, + padding=0, + num_groups=1, + if_act=False, + name=name + '_linear') + if ifshortcut: + out = self.shortcut(input=input, data_residual=linear_out) + return out + else: + return linear_out + + def invresi_blocks(self, input, in_c, t, c, n, s, name=None): + first_block = self.inverted_residual_unit( + input=input, + num_in_filter=in_c, + num_filters=c, + ifshortcut=False, + stride=s, + filter_size=3, + padding=1, + expansion_factor=t, + name=name + '_1') + + last_residual_block = first_block + last_c = c + + for i in range(1, n): + last_residual_block = self.inverted_residual_unit( + input=last_residual_block, + num_in_filter=last_c, + num_filters=c, + ifshortcut=True, + stride=1, + filter_size=3, + padding=1, + expansion_factor=t, + name=name + '_' + str(i + 1)) + return last_residual_block + + +def MobileNetV2_x0_25(): + model = MobileNetV2(scale=0.25) + return model + + +def MobileNetV2_x0_5(): + model = MobileNetV2(scale=0.5) + return model + + +def MobileNetV2_x0_75(): + model = MobileNetV2(scale=0.75) + return model + + +def MobileNetV2_x1_0(): + model = MobileNetV2(scale=1.0) + return model + + +def MobileNetV2_x1_5(): + model = MobileNetV2(scale=1.5) + return model + + +def MobileNetV2_x2_0(): + model = MobileNetV2(scale=2.0) + return model diff --git a/ppcls/modeling/architectures/mobilenet_v3.py b/ppcls/modeling/architectures/mobilenet_v3.py new file mode 100644 index 000000000..2ec175db4 --- /dev/null +++ b/ppcls/modeling/architectures/mobilenet_v3.py @@ -0,0 +1,310 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import paddle.fluid as fluid +from paddle.fluid.initializer import MSRA +from paddle.fluid.param_attr import ParamAttr + +__all__ = [ + 'MobileNetV3', 'MobileNetV3_small_x0_35', 'MobileNetV3_small_x0_5', + 'MobileNetV3_small_x0_75', 'MobileNetV3_small_x1_0', + 'MobileNetV3_small_x1_25', 'MobileNetV3_large_x0_35', + 'MobileNetV3_large_x0_5', 'MobileNetV3_large_x0_75', + 'MobileNetV3_large_x1_0', 'MobileNetV3_large_x1_25' +] + + +class MobileNetV3(): + def __init__(self, scale=1.0, model_name='small'): + self.scale = scale + self.inplanes = 16 + if model_name == "large": + self.cfg = [ + # k, exp, c, se, nl, s, + [3, 16, 16, False, 'relu', 1], + [3, 64, 24, False, 'relu', 2], + [3, 72, 24, False, 'relu', 1], + [5, 72, 40, True, 'relu', 2], + [5, 120, 40, True, 'relu', 1], + [5, 120, 40, True, 'relu', 1], + [3, 240, 80, False, 'hard_swish', 2], + [3, 200, 80, False, 'hard_swish', 1], + [3, 184, 80, False, 'hard_swish', 1], + [3, 184, 80, False, 'hard_swish', 1], + [3, 480, 112, True, 'hard_swish', 1], + [3, 672, 112, True, 'hard_swish', 1], + [5, 672, 160, True, 'hard_swish', 2], + [5, 960, 160, True, 'hard_swish', 1], + [5, 960, 160, True, 'hard_swish', 1], + ] + self.cls_ch_squeeze = 960 + self.cls_ch_expand = 1280 + elif model_name == "small": + self.cfg = [ + # k, exp, c, se, nl, s, + [3, 16, 16, True, 'relu', 2], + [3, 72, 24, False, 'relu', 2], + [3, 88, 24, False, 'relu', 1], + [5, 96, 40, True, 'hard_swish', 2], + [5, 240, 40, True, 'hard_swish', 1], + [5, 240, 40, True, 'hard_swish', 1], + [5, 120, 48, True, 'hard_swish', 1], + [5, 144, 48, True, 'hard_swish', 1], + [5, 288, 96, True, 'hard_swish', 2], + [5, 576, 96, True, 'hard_swish', 1], + [5, 576, 96, True, 'hard_swish', 1], + ] + self.cls_ch_squeeze = 576 + self.cls_ch_expand = 1280 + else: + raise NotImplementedError("mode[" + model_name + + "_model] is not implemented!") + + def net(self, input, class_dim=1000): + scale = self.scale + inplanes = self.inplanes + cfg = self.cfg + cls_ch_squeeze = self.cls_ch_squeeze + cls_ch_expand = self.cls_ch_expand + #conv1 + conv = self.conv_bn_layer( + input, + filter_size=3, + num_filters=self.make_divisible(inplanes * scale), + stride=2, + padding=1, + num_groups=1, + if_act=True, + act='hard_swish', + name='conv1') + i = 0 + inplanes = self.make_divisible(inplanes * scale) + for layer_cfg in cfg: + conv = self.residual_unit( + input=conv, + num_in_filter=inplanes, + num_mid_filter=self.make_divisible(scale * layer_cfg[1]), + num_out_filter=self.make_divisible(scale * layer_cfg[2]), + act=layer_cfg[4], + stride=layer_cfg[5], + filter_size=layer_cfg[0], + use_se=layer_cfg[3], + name='conv' + str(i + 2)) + inplanes = self.make_divisible(scale * layer_cfg[2]) + i += 1 + + conv = self.conv_bn_layer( + input=conv, + filter_size=1, + num_filters=self.make_divisible(scale * cls_ch_squeeze), + stride=1, + padding=0, + num_groups=1, + if_act=True, + act='hard_swish', + name='conv_last') + conv = fluid.layers.pool2d( + input=conv, pool_type='avg', global_pooling=True, use_cudnn=False) + conv = fluid.layers.conv2d( + input=conv, + num_filters=cls_ch_expand, + filter_size=1, + stride=1, + padding=0, + act=None, + param_attr=ParamAttr(name='last_1x1_conv_weights'), + bias_attr=False) + conv = fluid.layers.hard_swish(conv) + drop = fluid.layers.dropout(x=conv, dropout_prob=0.2) + out = fluid.layers.fc(input=drop, + size=class_dim, + param_attr=ParamAttr(name='fc_weights'), + bias_attr=ParamAttr(name='fc_offset')) + return out + + def conv_bn_layer(self, + input, + filter_size, + num_filters, + stride, + padding, + num_groups=1, + if_act=True, + act=None, + name=None, + use_cudnn=True, + res_last_bn_init=False): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=padding, + groups=num_groups, + act=None, + use_cudnn=use_cudnn, + param_attr=ParamAttr(name=name + '_weights'), + bias_attr=False) + bn_name = name + '_bn' + bn = fluid.layers.batch_norm( + input=conv, + param_attr=ParamAttr( + name=bn_name + "_scale", + regularizer=fluid.regularizer.L2DecayRegularizer( + regularization_coeff=0.0)), + bias_attr=ParamAttr( + name=bn_name + "_offset", + regularizer=fluid.regularizer.L2DecayRegularizer( + regularization_coeff=0.0)), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + if if_act: + if act == 'relu': + bn = fluid.layers.relu(bn) + elif act == 'hard_swish': + bn = fluid.layers.hard_swish(bn) + return bn + + def make_divisible(self, v, divisor=8, min_value=None): + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + if new_v < 0.9 * v: + new_v += divisor + return new_v + + def se_block(self, input, num_out_filter, ratio=4, name=None): + num_mid_filter = num_out_filter // ratio + pool = fluid.layers.pool2d( + input=input, pool_type='avg', global_pooling=True, use_cudnn=False) + conv1 = fluid.layers.conv2d( + input=pool, + filter_size=1, + num_filters=num_mid_filter, + act='relu', + param_attr=ParamAttr(name=name + '_1_weights'), + bias_attr=ParamAttr(name=name + '_1_offset')) + conv2 = fluid.layers.conv2d( + input=conv1, + filter_size=1, + num_filters=num_out_filter, + act='hard_sigmoid', + param_attr=ParamAttr(name=name + '_2_weights'), + bias_attr=ParamAttr(name=name + '_2_offset')) + scale = fluid.layers.elementwise_mul(x=input, y=conv2, axis=0) + return scale + + def residual_unit(self, + input, + num_in_filter, + num_mid_filter, + num_out_filter, + stride, + filter_size, + act=None, + use_se=False, + name=None): + + conv0 = self.conv_bn_layer( + input=input, + filter_size=1, + num_filters=num_mid_filter, + stride=1, + padding=0, + if_act=True, + act=act, + name=name + '_expand') + + conv1 = self.conv_bn_layer( + input=conv0, + filter_size=filter_size, + num_filters=num_mid_filter, + stride=stride, + padding=int((filter_size - 1) // 2), + if_act=True, + act=act, + num_groups=num_mid_filter, + use_cudnn=False, + name=name + '_depthwise') + if use_se: + conv1 = self.se_block( + input=conv1, num_out_filter=num_mid_filter, name=name + '_se') + + conv2 = self.conv_bn_layer( + input=conv1, + filter_size=1, + num_filters=num_out_filter, + stride=1, + padding=0, + if_act=False, + name=name + '_linear', + res_last_bn_init=True) + if num_in_filter != num_out_filter or stride != 1: + return conv2 + else: + return fluid.layers.elementwise_add(x=input, y=conv2, act=None) + + +def MobileNetV3_small_x0_35(): + model = MobileNetV3(model_name='small', scale=0.35) + return model + + +def MobileNetV3_small_x0_5(): + model = MobileNetV3(model_name='small', scale=0.5) + return model + + +def MobileNetV3_small_x0_75(): + model = MobileNetV3(model_name='small', scale=0.75) + return model + + +def MobileNetV3_small_x1_0(): + model = MobileNetV3(model_name='small', scale=1.0) + return model + + +def MobileNetV3_small_x1_25(): + model = MobileNetV3(model_name='small', scale=1.25) + return model + + +def MobileNetV3_large_x0_35(): + model = MobileNetV3(model_name='large', scale=0.35) + return model + + +def MobileNetV3_large_x0_5(): + model = MobileNetV3(model_name='large', scale=0.5) + return model + + +def MobileNetV3_large_x0_75(): + model = MobileNetV3(model_name='large', scale=0.75) + return model + + +def MobileNetV3_large_x1_0(): + model = MobileNetV3(model_name='large', scale=1.0) + return model + + +def MobileNetV3_large_x1_25(): + model = MobileNetV3(model_name='large', scale=1.25) + return model diff --git a/ppcls/modeling/architectures/model_libs.py b/ppcls/modeling/architectures/model_libs.py new file mode 100644 index 000000000..49d708eee --- /dev/null +++ b/ppcls/modeling/architectures/model_libs.py @@ -0,0 +1,143 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import paddle +import paddle.fluid as fluid +import contextlib + +bn_regularizer = fluid.regularizer.L2DecayRegularizer(regularization_coeff=0.0) +name_scope = "" + + +@contextlib.contextmanager +def scope(name): + global name_scope + bk = name_scope + name_scope = name_scope + name + '/' + yield + name_scope = bk + + +def max_pool(input, kernel, stride, padding): + data = fluid.layers.pool2d( + input, + pool_size=kernel, + pool_type='max', + pool_stride=stride, + pool_padding=padding) + return data + + +def group_norm(input, G, eps=1e-5, param_attr=None, bias_attr=None): + N, C, H, W = input.shape + if C % G != 0: + # print "group can not divide channle:", C, G + for d in range(10): + for t in [d, -d]: + if G + t <= 0: continue + if C % (G + t) == 0: + G = G + t + break + if C % G == 0: + # print "use group size:", G + break + assert C % G == 0 + x = fluid.layers.group_norm( + input, + groups=G, + param_attr=param_attr, + bias_attr=bias_attr, + name=name_scope + 'group_norm') + return x + + +def bn(*args, **kargs): + with scope('BatchNorm'): + return fluid.layers.batch_norm( + *args, + epsilon=1e-3, + momentum=0.99, + param_attr=fluid.ParamAttr( + name=name_scope + 'gamma', regularizer=bn_regularizer), + bias_attr=fluid.ParamAttr( + name=name_scope + 'beta', regularizer=bn_regularizer), + moving_mean_name=name_scope + 'moving_mean', + moving_variance_name=name_scope + 'moving_variance', + **kargs) + + +def bn_relu(data): + return fluid.layers.relu(bn(data)) + + +def relu(data): + return fluid.layers.relu(data) + + +def conv(*args, **kargs): + kargs['param_attr'] = name_scope + 'weights' + if 'bias_attr' in kargs and kargs['bias_attr']: + kargs['bias_attr'] = fluid.ParamAttr( + name=name_scope + 'biases', + regularizer=None, + initializer=fluid.initializer.ConstantInitializer(value=0.0)) + else: + kargs['bias_attr'] = False + return fluid.layers.conv2d(*args, **kargs) + + +def deconv(*args, **kargs): + kargs['param_attr'] = name_scope + 'weights' + if 'bias_attr' in kargs and kargs['bias_attr']: + kargs['bias_attr'] = name_scope + 'biases' + else: + kargs['bias_attr'] = False + return fluid.layers.conv2d_transpose(*args, **kargs) + + +def seperate_conv(input, channel, stride, filter, dilation=1, act=None): + param_attr = fluid.ParamAttr( + name=name_scope + 'weights', + regularizer=fluid.regularizer.L2DecayRegularizer( + regularization_coeff=0.0), + initializer=fluid.initializer.TruncatedNormal( + loc=0.0, scale=0.33)) + with scope('depthwise'): + input = conv( + input, + input.shape[1], + filter, + stride, + groups=input.shape[1], + padding=(filter // 2) * dilation, + dilation=dilation, + use_cudnn=False, + param_attr=param_attr) + input = bn(input) + if act: input = act(input) + + param_attr = fluid.ParamAttr( + name=name_scope + 'weights', + regularizer=None, + initializer=fluid.initializer.TruncatedNormal( + loc=0.0, scale=0.06)) + with scope('pointwise'): + input = conv( + input, channel, 1, 1, groups=1, padding=0, param_attr=param_attr) + input = bn(input) + if act: input = act(input) + return input diff --git a/ppcls/modeling/architectures/res2net.py b/ppcls/modeling/architectures/res2net.py new file mode 100644 index 000000000..e6b118036 --- /dev/null +++ b/ppcls/modeling/architectures/res2net.py @@ -0,0 +1,225 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import paddle +import paddle.fluid as fluid +import math +from paddle.fluid.param_attr import ParamAttr + +__all__ = [ + "Res2Net", "Res2Net50_48w_2s", "Res2Net50_26w_4s", "Res2Net50_14w_8s", + "Res2Net50_26w_6s", "Res2Net50_26w_8s", "Res2Net101_26w_4s", + "Res2Net152_26w_4s" +] + + +class Res2Net(): + def __init__(self, layers=50, scales=4, width=26): + self.layers = layers + self.scales = scales + self.width = width + + def net(self, input, class_dim=1000): + layers = self.layers + supported_layers = [50, 101, 152] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format(supported_layers, layers) + basic_width = self.width * self.scales + num_filters1 = [basic_width * t for t in [1, 2, 4, 8]] + num_filters2 = [256 * t for t in [1, 2, 4, 8]] + + if layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + conv = self.conv_bn_layer( + input=input, + num_filters=64, + filter_size=7, + stride=2, + act='relu', + name="conv1") + + conv = fluid.layers.pool2d( + input=conv, + pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') + + for block in range(len(depth)): + for i in range(depth[block]): + if layers in [101, 152] and block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + conv = self.bottleneck_block( + input=conv, + num_filters1=num_filters1[block], + num_filters2=num_filters2[block], + stride=2 if i == 0 and block != 0 else 1, + name=conv_name) + pool = fluid.layers.pool2d( + input=conv, + pool_size=7, + pool_stride=1, + pool_type='avg', + global_pooling=True) + + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + out = fluid.layers.fc( + input=pool, + size=class_dim, + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name='fc_weights'), + bias_attr=fluid.param_attr.ParamAttr(name='fc_offset')) + return out + + def conv_bn_layer(self, + input, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + + return fluid.layers.batch_norm( + input=conv, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def shortcut(self, input, ch_out, stride, name): + ch_in = input.shape[1] + if ch_in != ch_out or stride != 1: + return self.conv_bn_layer(input, ch_out, 1, stride, name=name) + else: + return input + + def bottleneck_block(self, input, num_filters1, num_filters2, stride, + name): + conv0 = self.conv_bn_layer( + input=input, + num_filters=num_filters1, + filter_size=1, + stride=1, + act='relu', + name=name + '_branch2a') + xs = fluid.layers.split(conv0, self.scales, 1) + ys = [] + for s in range(self.scales - 1): + if s == 0 or stride == 2: + ys.append( + self.conv_bn_layer( + input=xs[s], + num_filters=num_filters1 // self.scales, + stride=stride, + filter_size=3, + act='relu', + name=name + '_branch2b_' + str(s + 1))) + else: + ys.append( + self.conv_bn_layer( + input=xs[s] + ys[-1], + num_filters=num_filters1 // self.scales, + stride=stride, + filter_size=3, + act='relu', + name=name + '_branch2b_' + str(s + 1))) + if stride == 1: + ys.append(xs[-1]) + else: + ys.append( + fluid.layers.pool2d( + input=xs[-1], + pool_size=3, + pool_stride=stride, + pool_padding=1, + pool_type='avg')) + + conv1 = fluid.layers.concat(ys, axis=1) + conv2 = self.conv_bn_layer( + input=conv1, + num_filters=num_filters2, + filter_size=1, + act=None, + name=name + "_branch2c") + + short = self.shortcut( + input, num_filters2, stride, name=name + "_branch1") + + return fluid.layers.elementwise_add(x=short, y=conv2, act='relu') + + +def Res2Net50_48w_2s(): + model = Res2Net(layers=50, scales=2, width=48) + return model + + +def Res2Net50_26w_4s(): + model = Res2Net(layers=50, scales=4, width=26) + return model + + +def Res2Net50_14w_8s(): + model = Res2Net(layers=50, scales=8, width=14) + return model + + +def Res2Net50_26w_6s(): + model = Res2Net(layers=50, scales=6, width=26) + return model + + +def Res2Net50_26w_8s(): + model = Res2Net(layers=50, scales=8, width=26) + return model + + +def Res2Net101_26w_4s(): + model = Res2Net(layers=101, scales=4, width=26) + return model + + +def Res2Net152_26w_4s(): + model = Res2Net(layers=152, scales=4, width=26) + return model diff --git a/ppcls/modeling/architectures/res2net_vd.py b/ppcls/modeling/architectures/res2net_vd.py new file mode 100644 index 000000000..5e3639749 --- /dev/null +++ b/ppcls/modeling/architectures/res2net_vd.py @@ -0,0 +1,294 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math + +import paddle +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr + +__all__ = [ + "Res2Net_vd", "Res2Net50_vd_48w_2s", "Res2Net50_vd_26w_4s", + "Res2Net50_vd_14w_8s", "Res2Net50_vd_26w_6s", "Res2Net50_vd_26w_8s", + "Res2Net101_vd_26w_4s", "Res2Net152_vd_26w_4s", "Res2Net200_vd_26w_4s" +] + + +class Res2Net_vd(): + def __init__(self, layers=50, scales=4, width=26): + self.layers = layers + self.scales = scales + self.width = width + + def net(self, input, class_dim=1000): + layers = self.layers + supported_layers = [50, 101, 152, 200] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format(supported_layers, layers) + basic_width = self.width * self.scales + num_filters1 = [basic_width * t for t in [1, 2, 4, 8]] + num_filters2 = [256 * t for t in [1, 2, 4, 8]] + if layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + elif layers == 200: + depth = [3, 12, 48, 3] + conv = self.conv_bn_layer( + input=input, + num_filters=32, + filter_size=3, + stride=2, + act='relu', + name='conv1_1') + conv = self.conv_bn_layer( + input=conv, + num_filters=32, + filter_size=3, + stride=1, + act='relu', + name='conv1_2') + conv = self.conv_bn_layer( + input=conv, + num_filters=64, + filter_size=3, + stride=1, + act='relu', + name='conv1_3') + + conv = fluid.layers.pool2d( + input=conv, + pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') + for block in range(len(depth)): + for i in range(depth[block]): + if layers in [101, 152] and block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + conv = self.bottleneck_block( + input=conv, + num_filters1=num_filters1[block], + num_filters2=num_filters2[block], + stride=2 if i == 0 and block != 0 else 1, + if_first=block == i == 0, + name=conv_name) + pool = fluid.layers.pool2d( + input=conv, + pool_size=7, + pool_stride=1, + pool_type='avg', + global_pooling=True) + + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + out = fluid.layers.fc( + input=pool, + size=class_dim, + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name='fc_weights'), + bias_attr=fluid.param_attr.ParamAttr(name='fc_offset')) + return out + + def conv_bn_layer(self, + input, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + return fluid.layers.batch_norm( + input=conv, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def conv_bn_layer_new(self, + input, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None): + pool = fluid.layers.pool2d( + input=input, + pool_size=2, + pool_stride=2, + pool_padding=0, + pool_type='avg', + ceil_mode=True) + + conv = fluid.layers.conv2d( + input=pool, + num_filters=num_filters, + filter_size=filter_size, + stride=1, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + return fluid.layers.batch_norm( + input=conv, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def shortcut(self, input, ch_out, stride, name, if_first=False): + ch_in = input.shape[1] + if ch_in != ch_out or stride != 1: + if if_first: + return self.conv_bn_layer(input, ch_out, 1, stride, name=name) + else: + return self.conv_bn_layer_new( + input, ch_out, 1, stride, name=name) + elif if_first: + return self.conv_bn_layer(input, ch_out, 1, stride, name=name) + else: + return input + + def bottleneck_block(self, input, num_filters1, num_filters2, stride, name, + if_first): + conv0 = self.conv_bn_layer( + input=input, + num_filters=num_filters1, + filter_size=1, + stride=1, + act='relu', + name=name + '_branch2a') + + xs = fluid.layers.split(conv0, self.scales, 1) + ys = [] + for s in range(self.scales - 1): + if s == 0 or stride == 2: + ys.append( + self.conv_bn_layer( + input=xs[s], + num_filters=num_filters1 // self.scales, + stride=stride, + filter_size=3, + act='relu', + name=name + '_branch2b_' + str(s + 1))) + else: + ys.append( + self.conv_bn_layer( + input=xs[s] + ys[-1], + num_filters=num_filters1 // self.scales, + stride=stride, + filter_size=3, + act='relu', + name=name + '_branch2b_' + str(s + 1))) + + if stride == 1: + ys.append(xs[-1]) + else: + ys.append( + fluid.layers.pool2d( + input=xs[-1], + pool_size=3, + pool_stride=stride, + pool_padding=1, + pool_type='avg')) + + conv1 = fluid.layers.concat(ys, axis=1) + conv2 = self.conv_bn_layer( + input=conv1, + num_filters=num_filters2, + filter_size=1, + act=None, + name=name + "_branch2c") + + short = self.shortcut( + input, + num_filters2, + stride, + if_first=if_first, + name=name + "_branch1") + + return fluid.layers.elementwise_add(x=short, y=conv2, act='relu') + + +def Res2Net50_vd_48w_2s(): + model = Res2Net_vd(layers=50, scales=2, width=48) + return model + + +def Res2Net50_vd_26w_4s(): + model = Res2Net_vd(layers=50, scales=4, width=26) + return model + + +def Res2Net50_vd_14w_8s(): + model = Res2Net_vd(layers=50, scales=8, width=14) + return model + + +def Res2Net50_vd_26w_6s(): + model = Res2Net_vd(layers=50, scales=6, width=26) + return model + + +def Res2Net50_vd_26w_8s(): + model = Res2Net_vd(layers=50, scales=8, width=26) + return model + + +def Res2Net101_vd_26w_4s(): + model = Res2Net_vd(layers=101, scales=4, width=26) + return model + + +def Res2Net152_vd_26w_4s(): + model = Res2Net_vd(layers=152, scales=4, width=26) + return model + + +def Res2Net200_vd_26w_4s(): + model = Res2Net_vd(layers=200, scales=4, width=26) + return model diff --git a/ppcls/modeling/architectures/resnet.py b/ppcls/modeling/architectures/resnet.py new file mode 100644 index 000000000..1480025b9 --- /dev/null +++ b/ppcls/modeling/architectures/resnet.py @@ -0,0 +1,240 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math + +import paddle +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr + +__all__ = [ + "ResNet", "ResNet18", "ResNet34", "ResNet50", "ResNet101", "ResNet152" +] + + +class ResNet(): + def __init__(self, layers=50): + self.layers = layers + + def net(self, input, class_dim=1000, data_format="NCHW"): + layers = self.layers + supported_layers = [18, 34, 50, 101, 152] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format(supported_layers, layers) + + if layers == 18: + depth = [2, 2, 2, 2] + elif layers == 34 or layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + num_filters = [64, 128, 256, 512] + + conv = self.conv_bn_layer( + input=input, + num_filters=64, + filter_size=7, + stride=2, + act='relu', + name="conv1", + data_format=data_format) + conv = fluid.layers.pool2d( + input=conv, + pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max', + data_format=data_format) + if layers >= 50: + for block in range(len(depth)): + for i in range(depth[block]): + if layers in [101, 152] and block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + conv = self.bottleneck_block( + input=conv, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + name=conv_name, + data_format=data_format) + + else: + for block in range(len(depth)): + for i in range(depth[block]): + conv_name = "res" + str(block + 2) + chr(97 + i) + conv = self.basic_block( + input=conv, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + is_first=block == i == 0, + name=conv_name, + data_format=data_format) + + pool = fluid.layers.pool2d( + input=conv, + pool_type='avg', + global_pooling=True, + data_format=data_format) + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + out = fluid.layers.fc( + input=pool, + size=class_dim, + param_attr=fluid.param_attr.ParamAttr( + name="fc_0.w_0", + initializer=fluid.initializer.Uniform(-stdv, stdv)), + bias_attr=ParamAttr(name="fc_0.b_0")) + return out + + def conv_bn_layer(self, + input, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None, + data_format='NCHW'): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False, + name=name + '.conv2d.output.1', + data_format=data_format) + + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + return fluid.layers.batch_norm( + input=conv, + act=act, + name=bn_name + '.output.1', + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance', + data_layout=data_format) + + def shortcut(self, input, ch_out, stride, is_first, name, data_format): + if data_format == 'NCHW': + ch_in = input.shape[1] + else: + ch_in = input.shape[-1] + if ch_in != ch_out or stride != 1 or is_first == True: + return self.conv_bn_layer( + input, ch_out, 1, stride, name=name, data_format=data_format) + else: + return input + + def bottleneck_block(self, input, num_filters, stride, name, data_format): + conv0 = self.conv_bn_layer( + input=input, + num_filters=num_filters, + filter_size=1, + act='relu', + name=name + "_branch2a", + data_format=data_format) + conv1 = self.conv_bn_layer( + input=conv0, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu', + name=name + "_branch2b", + data_format=data_format) + conv2 = self.conv_bn_layer( + input=conv1, + num_filters=num_filters * 4, + filter_size=1, + act=None, + name=name + "_branch2c", + data_format=data_format) + + short = self.shortcut( + input, + num_filters * 4, + stride, + is_first=False, + name=name + "_branch1", + data_format=data_format) + + return fluid.layers.elementwise_add( + x=short, y=conv2, act='relu', name=name + ".add.output.5") + + def basic_block(self, input, num_filters, stride, is_first, name, + data_format): + conv0 = self.conv_bn_layer( + input=input, + num_filters=num_filters, + filter_size=3, + act='relu', + stride=stride, + name=name + "_branch2a", + data_format=data_format) + conv1 = self.conv_bn_layer( + input=conv0, + num_filters=num_filters, + filter_size=3, + act=None, + name=name + "_branch2b", + data_format=data_format) + short = self.shortcut( + input, + num_filters, + stride, + is_first, + name=name + "_branch1", + data_format=data_format) + return fluid.layers.elementwise_add(x=short, y=conv1, act='relu') + + +def ResNet18(): + model = ResNet(layers=18) + return model + + +def ResNet34(): + model = ResNet(layers=34) + return model + + +def ResNet50(): + model = ResNet(layers=50) + return model + + +def ResNet101(): + model = ResNet(layers=101) + return model + + +def ResNet152(): + model = ResNet(layers=152) + return model diff --git a/ppcls/modeling/architectures/resnet_acnet.py b/ppcls/modeling/architectures/resnet_acnet.py new file mode 100644 index 000000000..e17046f0e --- /dev/null +++ b/ppcls/modeling/architectures/resnet_acnet.py @@ -0,0 +1,332 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math + +import paddle +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr + +__all__ = [ + "ResNet_ACNet", "ResNet18_ACNet", "ResNet34_ACNet", "ResNet50_ACNet", + "ResNet101_ACNet", "ResNet152_ACNet" +] + + +class ResNetACNet(object): + """ ACNet """ + + def __init__(self, layers=50, deploy=False): + """init""" + self.layers = layers + self.deploy = deploy + + def net(self, input, class_dim=1000): + """model""" + layers = self.layers + supported_layers = [18, 34, 50, 101, 152] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format(supported_layers, layers) + + if layers == 18: + depth = [2, 2, 2, 2] + elif layers == 34 or layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + num_filters = [64, 128, 256, 512] + + conv = self.conv_bn_layer( + input=input, + num_filters=64, + filter_size=7, + stride=2, + act='relu', + name="conv1") + conv = fluid.layers.pool2d( + input=conv, + pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') + if layers >= 50: + for block in range(len(depth)): + for i in range(depth[block]): + if layers in [101, 152] and block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + conv = self.bottleneck_block( + input=conv, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + name=conv_name) + else: + for block in range(len(depth)): + for i in range(depth[block]): + conv_name = "res" + str(block + 2) + chr(97 + i) + conv = self.basic_block( + input=conv, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + is_first=block == i == 0, + name=conv_name) + + pool = fluid.layers.pool2d( + input=conv, pool_size=7, pool_type='avg', global_pooling=True) + + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + out = fluid.layers.fc( + input=pool, + size=class_dim, + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv))) + return out + + def conv_bn_layer(self, **kwargs): + """ + conv_bn_layer + """ + if kwargs['filter_size'] == 1: + return self.conv_bn_layer_ori(**kwargs) + else: + return self.conv_bn_layer_ac(**kwargs) + + # conv bn+relu + def conv_bn_layer_ori(self, + input, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None): + """ + standard convbn + used for 1x1 convbn in acnet + """ + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False, + name=name + '.conv2d.output.1') + + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + return fluid.layers.batch_norm( + input=conv, + act=act, + name=bn_name + '.output.1', + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance', ) + + # conv bn+relu + def conv_bn_layer_ac(self, + input, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None): + """ ACNet conv bn """ + padding = (filter_size - 1) // 2 + + square_conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=padding, + groups=groups, + act=act if self.deploy else None, + param_attr=ParamAttr(name=name + "_acsquare_weights"), + bias_attr=ParamAttr(name=name + "_acsquare_bias") + if self.deploy else False, + name=name + '.acsquare.conv2d.output.1') + + if self.deploy: + return square_conv + else: + ver_conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=(filter_size, 1), + stride=stride, + padding=(padding, 0), + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_acver_weights"), + bias_attr=False, + name=name + '.acver.conv2d.output.1') + + hor_conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=(1, filter_size), + stride=stride, + padding=(0, padding), + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_achor_weights"), + bias_attr=False, + name=name + '.achor.conv2d.output.1') + + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + + square_bn = fluid.layers.batch_norm( + input=square_conv, + act=None, + name=bn_name + '.acsquare.output.1', + param_attr=ParamAttr(name=bn_name + '_acsquare_scale'), + bias_attr=ParamAttr(bn_name + '_acsquare_offset'), + moving_mean_name=bn_name + '_acsquare_mean', + moving_variance_name=bn_name + '_acsquare_variance', ) + + ver_bn = fluid.layers.batch_norm( + input=ver_conv, + act=None, + name=bn_name + '.acver.output.1', + param_attr=ParamAttr(name=bn_name + '_acver_scale'), + bias_attr=ParamAttr(bn_name + '_acver_offset'), + moving_mean_name=bn_name + '_acver_mean', + moving_variance_name=bn_name + '_acver_variance', ) + + hor_bn = fluid.layers.batch_norm( + input=hor_conv, + act=None, + name=bn_name + '.achor.output.1', + param_attr=ParamAttr(name=bn_name + '_achor_scale'), + bias_attr=ParamAttr(bn_name + '_achor_offset'), + moving_mean_name=bn_name + '_achor_mean', + moving_variance_name=bn_name + '_achor_variance', ) + + return fluid.layers.elementwise_add( + x=square_bn, y=ver_bn + hor_bn, act=act) + + def shortcut(self, input, ch_out, stride, is_first, name): + """ shortcut """ + ch_in = input.shape[1] + if ch_in != ch_out or stride != 1 or is_first == True: + return self.conv_bn_layer( + input=input, + num_filters=ch_out, + filter_size=1, + stride=stride, + name=name) + else: + return input + + def bottleneck_block(self, input, num_filters, stride, name): + """" bottleneck_block """ + conv0 = self.conv_bn_layer( + input=input, + num_filters=num_filters, + filter_size=1, + act='relu', + name=name + "_branch2a") + conv1 = self.conv_bn_layer( + input=conv0, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu', + name=name + "_branch2b") + conv2 = self.conv_bn_layer( + input=conv1, + num_filters=num_filters * 4, + filter_size=1, + act=None, + name=name + "_branch2c") + + short = self.shortcut( + input, + num_filters * 4, + stride, + is_first=False, + name=name + "_branch1") + + return fluid.layers.elementwise_add( + x=short, y=conv2, act='relu', name=name + ".add.output.5") + + def basic_block(self, input, num_filters, stride, is_first, name): + """ basic_block """ + conv0 = self.conv_bn_layer( + input=input, + num_filters=num_filters, + filter_size=3, + act='relu', + stride=stride, + name=name + "_branch2a") + conv1 = self.conv_bn_layer( + input=conv0, + num_filters=num_filters, + filter_size=3, + act=None, + name=name + "_branch2b") + short = self.shortcut( + input, num_filters, stride, is_first, name=name + "_branch1") + return fluid.layers.elementwise_add(x=short, y=conv1, act='relu') + + +def ResNet18_ACNet(deploy=False): + """ResNet18 + ACNet""" + model = ResNet_ACNet(layers=18, deploy=deploy) + return model + + +def ResNet34_ACNet(deploy=False): + """ResNet34 + ACNet""" + model = ResNetACNet(layers=34, deploy=deploy) + return model + + +def ResNet50_ACNet(deploy=False): + """ResNet50 + ACNet""" + model = ResNetACNet(layers=50, deploy=deploy) + return model + + +def ResNet101_ACNet(deploy=False): + """ResNet101 + ACNet""" + model = ResNetACNet(layers=101, deploy=deploy) + return model + + +def ResNet152_ACNet(deploy=False): + """ResNet152 + ACNet""" + model = ResNetACNet(layers=152, deploy=deploy) + return model diff --git a/ppcls/modeling/architectures/resnet_vc.py b/ppcls/modeling/architectures/resnet_vc.py new file mode 100644 index 000000000..36e7e5943 --- /dev/null +++ b/ppcls/modeling/architectures/resnet_vc.py @@ -0,0 +1,194 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math + +import paddle +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr + +__all__ = ["ResNet", "ResNet50_vc", "ResNet101_vc", "ResNet152_vc"] + +train_parameters = { + "input_size": [3, 224, 224], + "input_mean": [0.485, 0.456, 0.406], + "input_std": [0.229, 0.224, 0.225], + "learning_strategy": { + "name": "piecewise_decay", + "batch_size": 256, + "epochs": [30, 60, 90], + "steps": [0.1, 0.01, 0.001, 0.0001] + } +} + + +class ResNet(): + def __init__(self, layers=50): + self.params = train_parameters + self.layers = layers + + def net(self, input, class_dim=1000): + layers = self.layers + supported_layers = [50, 101, 152] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format(supported_layers, layers) + + if layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + num_filters = [64, 128, 256, 512] + + conv = self.conv_bn_layer( + input=input, + num_filters=32, + filter_size=3, + stride=2, + act='relu', + name='conv1_1') + conv = self.conv_bn_layer( + input=conv, + num_filters=32, + filter_size=3, + stride=1, + act='relu', + name='conv1_2') + conv = self.conv_bn_layer( + input=conv, + num_filters=64, + filter_size=3, + stride=1, + act='relu', + name='conv1_3') + + conv = fluid.layers.pool2d( + input=conv, + pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') + + for block in range(len(depth)): + for i in range(depth[block]): + if layers in [101, 152] and block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + conv = self.bottleneck_block( + input=conv, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + name=conv_name) + + pool = fluid.layers.pool2d( + input=conv, pool_type='avg', global_pooling=True) + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + out = fluid.layers.fc(input=pool, + size=class_dim, + param_attr=fluid.param_attr.ParamAttr( + name="fc_0.w_0", + initializer=fluid.initializer.Uniform(-stdv, + stdv)), + bias_attr=ParamAttr(name="fc_0.b_0")) + return out + + def conv_bn_layer(self, + input, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False, + name=name + '.conv2d.output.1') + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + return fluid.layers.batch_norm( + input=conv, + act=act, + name=bn_name + '.output.1', + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance', ) + + def shortcut(self, input, ch_out, stride, name): + ch_in = input.shape[1] + if ch_in != ch_out or stride != 1: + return self.conv_bn_layer(input, ch_out, 1, stride, name=name) + else: + return input + + def bottleneck_block(self, input, num_filters, stride, name): + conv0 = self.conv_bn_layer( + input=input, + num_filters=num_filters, + filter_size=1, + act='relu', + name=name + "_branch2a") + conv1 = self.conv_bn_layer( + input=conv0, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu', + name=name + "_branch2b") + conv2 = self.conv_bn_layer( + input=conv1, + num_filters=num_filters * 4, + filter_size=1, + act=None, + name=name + "_branch2c") + + short = self.shortcut( + input, num_filters * 4, stride, name=name + "_branch1") + + return fluid.layers.elementwise_add( + x=short, y=conv2, act='relu', name=name + ".add.output.5") + + +def ResNet50_vc(): + model = ResNet(layers=50) + return model + + +def ResNet101_vc(): + model = ResNet(layers=101) + return model + + +def ResNet152_vc(): + model = ResNet(layers=152) + return model diff --git a/ppcls/modeling/architectures/resnet_vd.py b/ppcls/modeling/architectures/resnet_vd.py new file mode 100644 index 000000000..8a9f99eac --- /dev/null +++ b/ppcls/modeling/architectures/resnet_vd.py @@ -0,0 +1,293 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math + +import paddle +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr + +__all__ = [ + "ResNet", "ResNet18_vd", "ResNet34_vd", "ResNet50_vd", "ResNet101_vd", + "ResNet152_vd", "ResNet200_vd" +] + + +class ResNet(): + def __init__(self, layers=50, is_3x3=False): + self.layers = layers + self.is_3x3 = is_3x3 + + def net(self, input, class_dim=1000): + is_3x3 = self.is_3x3 + layers = self.layers + supported_layers = [18, 34, 50, 101, 152, 200] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format(supported_layers, layers) + + if layers == 18: + depth = [2, 2, 2, 2] + elif layers == 34 or layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + elif layers == 200: + depth = [3, 12, 48, 3] + num_filters = [64, 128, 256, 512] + if is_3x3 == False: + conv = self.conv_bn_layer( + input=input, + num_filters=64, + filter_size=7, + stride=2, + act='relu') + else: + conv = self.conv_bn_layer( + input=input, + num_filters=32, + filter_size=3, + stride=2, + act='relu', + name='conv1_1') + conv = self.conv_bn_layer( + input=conv, + num_filters=32, + filter_size=3, + stride=1, + act='relu', + name='conv1_2') + conv = self.conv_bn_layer( + input=conv, + num_filters=64, + filter_size=3, + stride=1, + act='relu', + name='conv1_3') + + conv = fluid.layers.pool2d( + input=conv, + pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') + + if layers >= 50: + for block in range(len(depth)): + for i in range(depth[block]): + if layers in [101, 152, 200] and block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + conv = self.bottleneck_block( + input=conv, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + if_first=block == i == 0, + name=conv_name) + else: + for block in range(len(depth)): + for i in range(depth[block]): + conv_name = "res" + str(block + 2) + chr(97 + i) + conv = self.basic_block( + input=conv, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + if_first=block == i == 0, + name=conv_name) + + pool = fluid.layers.pool2d( + input=conv, pool_type='avg', global_pooling=True) + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + + out = fluid.layers.fc( + input=pool, + size=class_dim, + param_attr=fluid.param_attr.ParamAttr( + name="fc_0.w_0", + initializer=fluid.initializer.Uniform(-stdv, stdv)), + bias_attr=ParamAttr(name="fc_0.b_0")) + + return out + + def conv_bn_layer(self, + input, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + return fluid.layers.batch_norm( + input=conv, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def conv_bn_layer_new(self, + input, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None): + pool = fluid.layers.pool2d( + input=input, + pool_size=2, + pool_stride=2, + pool_padding=0, + pool_type='avg', + ceil_mode=True) + + conv = fluid.layers.conv2d( + input=pool, + num_filters=num_filters, + filter_size=filter_size, + stride=1, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + return fluid.layers.batch_norm( + input=conv, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def shortcut(self, input, ch_out, stride, name, if_first=False): + ch_in = input.shape[1] + if ch_in != ch_out or stride != 1: + if if_first: + return self.conv_bn_layer(input, ch_out, 1, stride, name=name) + else: + return self.conv_bn_layer_new( + input, ch_out, 1, stride, name=name) + elif if_first: + return self.conv_bn_layer(input, ch_out, 1, stride, name=name) + else: + return input + + def bottleneck_block(self, input, num_filters, stride, name, if_first): + conv0 = self.conv_bn_layer( + input=input, + num_filters=num_filters, + filter_size=1, + act='relu', + name=name + "_branch2a") + conv1 = self.conv_bn_layer( + input=conv0, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu', + name=name + "_branch2b") + conv2 = self.conv_bn_layer( + input=conv1, + num_filters=num_filters * 4, + filter_size=1, + act=None, + name=name + "_branch2c") + + short = self.shortcut( + input, + num_filters * 4, + stride, + if_first=if_first, + name=name + "_branch1") + + return fluid.layers.elementwise_add(x=short, y=conv2, act='relu') + + def basic_block(self, input, num_filters, stride, name, if_first): + conv0 = self.conv_bn_layer( + input=input, + num_filters=num_filters, + filter_size=3, + act='relu', + stride=stride, + name=name + "_branch2a") + conv1 = self.conv_bn_layer( + input=conv0, + num_filters=num_filters, + filter_size=3, + act=None, + name=name + "_branch2b") + short = self.shortcut( + input, + num_filters, + stride, + if_first=if_first, + name=name + "_branch1") + return fluid.layers.elementwise_add(x=short, y=conv1, act='relu') + + +def ResNet18_vd(): + model = ResNet(layers=18, is_3x3=True) + return model + + +def ResNet34_vd(): + model = ResNet(layers=34, is_3x3=True) + return model + + +def ResNet50_vd(): + model = ResNet(layers=50, is_3x3=True) + return model + + +def ResNet101_vd(): + model = ResNet(layers=101, is_3x3=True) + return model + + +def ResNet152_vd(): + model = ResNet(layers=152, is_3x3=True) + return model + + +def ResNet200_vd(): + model = ResNet(layers=200, is_3x3=True) + return model diff --git a/ppcls/modeling/architectures/resnext.py b/ppcls/modeling/architectures/resnext.py new file mode 100644 index 000000000..2a5f46150 --- /dev/null +++ b/ppcls/modeling/architectures/resnext.py @@ -0,0 +1,195 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math + +import paddle +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr + +__all__ = [ + "ResNeXt", "ResNeXt50_64x4d", "ResNeXt101_64x4d", "ResNeXt152_64x4d", + "ResNeXt50_32x4d", "ResNeXt101_32x4d", "ResNeXt152_32x4d" +] + + +class ResNeXt(): + def __init__(self, layers=50, cardinality=64): + self.layers = layers + self.cardinality = cardinality + + def net(self, input, class_dim=1000): + layers = self.layers + cardinality = self.cardinality + supported_layers = [50, 101, 152] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format(supported_layers, layers) + + if layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + + num_filters1 = [256, 512, 1024, 2048] + num_filters2 = [128, 256, 512, 1024] + + conv = self.conv_bn_layer( + input=input, + num_filters=64, + filter_size=7, + stride=2, + act='relu', + name="res_conv1") #debug + conv = fluid.layers.pool2d( + input=conv, + pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') + + for block in range(len(depth)): + for i in range(depth[block]): + if layers in [101, 152] and block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + conv = self.bottleneck_block( + input=conv, + num_filters=num_filters1[block] + if cardinality == 64 else num_filters2[block], + stride=2 if i == 0 and block != 0 else 1, + cardinality=cardinality, + name=conv_name) + + pool = fluid.layers.pool2d( + input=conv, pool_type='avg', global_pooling=True) + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + out = fluid.layers.fc( + input=pool, + size=class_dim, + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name='fc_weights'), + bias_attr=fluid.param_attr.ParamAttr(name='fc_offset')) + return out + + def conv_bn_layer(self, + input, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False, + name=name + '.conv2d.output.1') + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + return fluid.layers.batch_norm( + input=conv, + act=act, + name=bn_name + '.output.1', + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance', ) + + def shortcut(self, input, ch_out, stride, name): + ch_in = input.shape[1] + if ch_in != ch_out or stride != 1: + return self.conv_bn_layer(input, ch_out, 1, stride, name=name) + else: + return input + + def bottleneck_block(self, input, num_filters, stride, cardinality, name): + cardinality = self.cardinality + conv0 = self.conv_bn_layer( + input=input, + num_filters=num_filters, + filter_size=1, + act='relu', + name=name + "_branch2a") + conv1 = self.conv_bn_layer( + input=conv0, + num_filters=num_filters, + filter_size=3, + stride=stride, + groups=cardinality, + act='relu', + name=name + "_branch2b") + conv2 = self.conv_bn_layer( + input=conv1, + num_filters=num_filters if cardinality == 64 else num_filters * 2, + filter_size=1, + act=None, + name=name + "_branch2c") + + short = self.shortcut( + input, + num_filters if cardinality == 64 else num_filters * 2, + stride, + name=name + "_branch1") + + return fluid.layers.elementwise_add( + x=short, y=conv2, act='relu', name=name + ".add.output.5") + + +def ResNeXt50_64x4d(): + model = ResNeXt(layers=50, cardinality=64) + return model + + +def ResNeXt50_32x4d(): + model = ResNeXt(layers=50, cardinality=32) + return model + + +def ResNeXt101_64x4d(): + model = ResNeXt(layers=101, cardinality=64) + return model + + +def ResNeXt101_32x4d(): + model = ResNeXt(layers=101, cardinality=32) + return model + + +def ResNeXt152_64x4d(): + model = ResNeXt(layers=152, cardinality=64) + return model + + +def ResNeXt152_32x4d(): + model = ResNeXt(layers=152, cardinality=32) + return model diff --git a/ppcls/modeling/architectures/resnext101_wsl.py b/ppcls/modeling/architectures/resnext101_wsl.py new file mode 100644 index 000000000..ba27c671e --- /dev/null +++ b/ppcls/modeling/architectures/resnext101_wsl.py @@ -0,0 +1,182 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math + +import paddle +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr + +__all__ = [ + "ResNeXt101_32x8d_wsl", "ResNeXt101_32x16d_wsl", "ResNeXt101_32x32d_wsl", + "ResNeXt101_32x48d_wsl", "Fix_ResNeXt101_32x48d_wsl" +] + + +class ResNeXt101_wsl(): + def __init__(self, layers=101, cardinality=32, width=48): + self.layers = layers + self.cardinality = cardinality + self.width = width + + def net(self, input, class_dim=1000): + layers = self.layers + cardinality = self.cardinality + width = self.width + + depth = [3, 4, 23, 3] + base_width = cardinality * width + num_filters = [base_width * i for i in [1, 2, 4, 8]] + + conv = self.conv_bn_layer( + input=input, + num_filters=64, + filter_size=7, + stride=2, + act='relu', + name="conv1") #debug + conv = fluid.layers.pool2d( + input=conv, + pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') + + for block in range(len(depth)): + for i in range(depth[block]): + conv_name = 'layer' + str(block + 1) + "." + str(i) + conv = self.bottleneck_block( + input=conv, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + cardinality=cardinality, + name=conv_name) + + pool = fluid.layers.pool2d( + input=conv, pool_type='avg', global_pooling=True) + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + out = fluid.layers.fc( + input=pool, + size=class_dim, + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name='fc.weight'), + bias_attr=fluid.param_attr.ParamAttr(name='fc.bias')) + return out + + def conv_bn_layer(self, + input, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None): + if "downsample" in name: + conv_name = name + '.0' + else: + conv_name = name + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=conv_name + ".weight"), + bias_attr=False) + if "downsample" in name: + bn_name = name[:9] + 'downsample' + '.1' + else: + if "conv1" == name: + bn_name = 'bn' + name[-1] + else: + bn_name = (name[:10] if name[7:9].isdigit() else name[:9] + ) + 'bn' + name[-1] + return fluid.layers.batch_norm( + input=conv, + act=act, + param_attr=ParamAttr(name=bn_name + '.weight'), + bias_attr=ParamAttr(bn_name + '.bias'), + moving_mean_name=bn_name + '.running_mean', + moving_variance_name=bn_name + '.running_var', ) + + def shortcut(self, input, ch_out, stride, name): + ch_in = input.shape[1] + if ch_in != ch_out or stride != 1: + return self.conv_bn_layer(input, ch_out, 1, stride, name=name) + else: + return input + + def bottleneck_block(self, input, num_filters, stride, cardinality, name): + cardinality = self.cardinality + width = self.width + conv0 = self.conv_bn_layer( + input=input, + num_filters=num_filters, + filter_size=1, + act='relu', + name=name + ".conv1") + conv1 = self.conv_bn_layer( + input=conv0, + num_filters=num_filters, + filter_size=3, + stride=stride, + groups=cardinality, + act='relu', + name=name + ".conv2") + conv2 = self.conv_bn_layer( + input=conv1, + num_filters=num_filters // (width // 8), + filter_size=1, + act=None, + name=name + ".conv3") + + short = self.shortcut( + input, + num_filters // (width // 8), + stride, + name=name + ".downsample") + + return fluid.layers.elementwise_add(x=short, y=conv2, act='relu') + + +def ResNeXt101_32x8d_wsl(): + model = ResNeXt101_wsl(cardinality=32, width=8) + return model + + +def ResNeXt101_32x16d_wsl(): + model = ResNeXt101_wsl(cardinality=32, width=16) + return model + + +def ResNeXt101_32x32d_wsl(): + model = ResNeXt101_wsl(cardinality=32, width=32) + return model + + +def ResNeXt101_32x48d_wsl(): + model = ResNeXt101_wsl(cardinality=32, width=48) + return model + + +def Fix_ResNeXt101_32x48d_wsl(): + model = ResNeXt101_wsl(cardinality=32, width=48) + return model diff --git a/ppcls/modeling/architectures/resnext_vd.py b/ppcls/modeling/architectures/resnext_vd.py new file mode 100644 index 000000000..b0a2fe6c9 --- /dev/null +++ b/ppcls/modeling/architectures/resnext_vd.py @@ -0,0 +1,257 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import paddle +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr +import math + +__all__ = [ + "ResNeXt", "ResNeXt50_vd_64x4d", "ResNeXt101_vd_64x4d", + "ResNeXt152_vd_64x4d", "ResNeXt50_vd_32x4d", "ResNeXt101_vd_32x4d", + "ResNeXt152_vd_32x4d" +] + + +class ResNeXt(): + def __init__(self, layers=50, is_3x3=False, cardinality=64): + self.layers = layers + self.is_3x3 = is_3x3 + self.cardinality = cardinality + + def net(self, input, class_dim=1000): + is_3x3 = self.is_3x3 + layers = self.layers + cardinality = self.cardinality + supported_layers = [50, 101, 152] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format(supported_layers, layers) + + if layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + num_filters1 = [256, 512, 1024, 2048] + num_filters2 = [128, 256, 512, 1024] + + if is_3x3 == False: + conv = self.conv_bn_layer( + input=input, + num_filters=64, + filter_size=7, + stride=2, + act='relu') + else: + conv = self.conv_bn_layer( + input=input, + num_filters=32, + filter_size=3, + stride=2, + act='relu', + name='conv1_1') + conv = self.conv_bn_layer( + input=conv, + num_filters=32, + filter_size=3, + stride=1, + act='relu', + name='conv1_2') + conv = self.conv_bn_layer( + input=conv, + num_filters=64, + filter_size=3, + stride=1, + act='relu', + name='conv1_3') + + conv = fluid.layers.pool2d( + input=conv, + pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') + + for block in range(len(depth)): + for i in range(depth[block]): + if layers in [101, 152, 200] and block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + conv = self.bottleneck_block( + input=conv, + num_filters=num_filters1[block] + if cardinality == 64 else num_filters2[block], + stride=2 if i == 0 and block != 0 else 1, + cardinality=cardinality, + if_first=block == 0, + name=conv_name) + + pool = fluid.layers.pool2d( + input=conv, pool_type='avg', global_pooling=True) + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + out = fluid.layers.fc( + input=pool, + size=class_dim, + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name='fc_weights'), + bias_attr=fluid.param_attr.ParamAttr(name='fc_offset')) + + return out + + def conv_bn_layer(self, + input, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + return fluid.layers.batch_norm( + input=conv, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def conv_bn_layer_new(self, + input, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None): + pool = fluid.layers.pool2d( + input=input, + pool_size=2, + pool_stride=2, + pool_padding=0, + pool_type='avg', + ceil_mode=True) + + conv = fluid.layers.conv2d( + input=pool, + num_filters=num_filters, + filter_size=filter_size, + stride=1, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + return fluid.layers.batch_norm( + input=conv, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def shortcut(self, input, ch_out, stride, name, if_first=False): + ch_in = input.shape[1] + if ch_in != ch_out or stride != 1: + if if_first: + return self.conv_bn_layer(input, ch_out, 1, stride, name=name) + else: + return self.conv_bn_layer_new( + input, ch_out, 1, stride, name=name) + else: + return input + + def bottleneck_block(self, input, num_filters, stride, cardinality, name, + if_first): + conv0 = self.conv_bn_layer( + input=input, + num_filters=num_filters, + filter_size=1, + act='relu', + name=name + "_branch2a") + conv1 = self.conv_bn_layer( + input=conv0, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu', + groups=cardinality, + name=name + "_branch2b") + conv2 = self.conv_bn_layer( + input=conv1, + num_filters=num_filters if cardinality == 64 else num_filters * 2, + filter_size=1, + act=None, + name=name + "_branch2c") + + short = self.shortcut( + input, + num_filters if cardinality == 64 else num_filters * 2, + stride, + if_first=if_first, + name=name + "_branch1") + + return fluid.layers.elementwise_add(x=short, y=conv2, act='relu') + + +def ResNeXt50_vd_64x4d(): + model = ResNeXt(layers=50, is_3x3=True) + return model + + +def ResNeXt50_vd_32x4d(): + model = ResNeXt(layers=50, cardinality=32, is_3x3=True) + return model + + +def ResNeXt101_vd_64x4d(): + model = ResNeXt(layers=101, is_3x3=True) + return model + + +def ResNeXt101_vd_32x4d(): + model = ResNeXt(layers=101, cardinality=32, is_3x3=True) + return model + + +def ResNeXt152_vd_64x4d(): + model = ResNeXt(layers=152, is_3x3=True) + return model + + +def ResNeXt152_vd_32x4d(): + model = ResNeXt(layers=152, cardinality=32, is_3x3=True) + return model diff --git a/ppcls/modeling/architectures/se_resnet_vd.py b/ppcls/modeling/architectures/se_resnet_vd.py new file mode 100644 index 000000000..fbe961198 --- /dev/null +++ b/ppcls/modeling/architectures/se_resnet_vd.py @@ -0,0 +1,336 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math + +import paddle +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr + +__all__ = [ + "SE_ResNet_vd", "SE_ResNet18_vd", "SE_ResNet34_vd", "SE_ResNet50_vd", + "SE_ResNet101_vd", "SE_ResNet152_vd", "SE_ResNet200_vd" +] + + +class SE_ResNet_vd(): + def __init__(self, layers=50, is_3x3=False): + self.layers = layers + self.is_3x3 = is_3x3 + + def net(self, input, class_dim=1000): + is_3x3 = self.is_3x3 + layers = self.layers + supported_layers = [18, 34, 50, 101, 152, 200] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format(supported_layers, layers) + + if layers == 18: + depth = [2, 2, 2, 2] + elif layers == 34 or layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + elif layers == 200: + depth = [3, 12, 48, 3] + num_filters = [64, 128, 256, 512] + reduction_ratio = 16 + if is_3x3 == False: + conv = self.conv_bn_layer( + input=input, + num_filters=64, + filter_size=7, + stride=2, + act='relu') + else: + conv = self.conv_bn_layer( + input=input, + num_filters=32, + filter_size=3, + stride=2, + act='relu', + name='conv1_1') + conv = self.conv_bn_layer( + input=conv, + num_filters=32, + filter_size=3, + stride=1, + act='relu', + name='conv1_2') + conv = self.conv_bn_layer( + input=conv, + num_filters=64, + filter_size=3, + stride=1, + act='relu', + name='conv1_3') + + conv = fluid.layers.pool2d( + input=conv, + pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') + if layers >= 50: + for block in range(len(depth)): + for i in range(depth[block]): + if layers in [101, 152, 200] and block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + conv = self.bottleneck_block( + input=conv, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + if_first=block == i == 0, + reduction_ratio=reduction_ratio, + name=conv_name) + + else: + for block in range(len(depth)): + for i in range(depth[block]): + conv_name = "res" + str(block + 2) + chr(97 + i) + conv = self.basic_block( + input=conv, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + if_first=block == i == 0, + reduction_ratio=reduction_ratio, + name=conv_name) + + pool = fluid.layers.pool2d( + input=conv, pool_size=7, pool_type='avg', global_pooling=True) + + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + out = fluid.layers.fc( + input=pool, + size=class_dim, + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name='fc6_weights'), + bias_attr=ParamAttr(name='fc6_offset')) + + return out + + def conv_bn_layer(self, + input, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + return fluid.layers.batch_norm( + input=conv, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def conv_bn_layer_new(self, + input, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None): + pool = fluid.layers.pool2d( + input=input, + pool_size=2, + pool_stride=2, + pool_padding=0, + pool_type='avg', + ceil_mode=True) + + conv = fluid.layers.conv2d( + input=pool, + num_filters=num_filters, + filter_size=filter_size, + stride=1, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + return fluid.layers.batch_norm( + input=conv, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def shortcut(self, input, ch_out, stride, name, if_first=False): + ch_in = input.shape[1] + if ch_in != ch_out or stride != 1: + if if_first: + return self.conv_bn_layer(input, ch_out, 1, stride, name=name) + else: + return self.conv_bn_layer_new( + input, ch_out, 1, stride, name=name) + elif if_first: + return self.conv_bn_layer(input, ch_out, 1, stride, name=name) + else: + return input + + def bottleneck_block(self, input, num_filters, stride, name, if_first, + reduction_ratio): + conv0 = self.conv_bn_layer( + input=input, + num_filters=num_filters, + filter_size=1, + act='relu', + name=name + "_branch2a") + conv1 = self.conv_bn_layer( + input=conv0, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu', + name=name + "_branch2b") + conv2 = self.conv_bn_layer( + input=conv1, + num_filters=num_filters * 4, + filter_size=1, + act=None, + name=name + "_branch2c") + scale = self.squeeze_excitation( + input=conv2, + num_channels=num_filters * 4, + reduction_ratio=reduction_ratio, + name='fc_' + name) + + short = self.shortcut( + input, + num_filters * 4, + stride, + if_first=if_first, + name=name + "_branch1") + + return fluid.layers.elementwise_add(x=short, y=scale, act='relu') + + def basic_block(self, input, num_filters, stride, name, if_first, + reduction_ratio): + conv0 = self.conv_bn_layer( + input=input, + num_filters=num_filters, + filter_size=3, + act='relu', + stride=stride, + name=name + "_branch2a") + conv1 = self.conv_bn_layer( + input=conv0, + num_filters=num_filters, + filter_size=3, + act=None, + name=name + "_branch2b") + scale = self.squeeze_excitation( + input=conv1, + num_channels=num_filters, + reduction_ratio=reduction_ratio, + name='fc_' + name) + short = self.shortcut( + input, + num_filters, + stride, + if_first=if_first, + name=name + "_branch1") + return fluid.layers.elementwise_add(x=short, y=scale, act='relu') + + def squeeze_excitation(self, + input, + num_channels, + reduction_ratio, + name=None): + pool = fluid.layers.pool2d( + input=input, pool_size=0, pool_type='avg', global_pooling=True) + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + squeeze = fluid.layers.fc( + input=pool, + size=num_channels // reduction_ratio, + act='relu', + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=name + '_sqz_weights'), + bias_attr=ParamAttr(name=name + '_sqz_offset')) + stdv = 1.0 / math.sqrt(squeeze.shape[1] * 1.0) + excitation = fluid.layers.fc( + input=squeeze, + size=num_channels, + act='sigmoid', + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=name + '_exc_weights'), + bias_attr=ParamAttr(name=name + '_exc_offset')) + scale = fluid.layers.elementwise_mul(x=input, y=excitation, axis=0) + return scale + + +def SE_ResNet18_vd(): + model = SE_ResNet_vd(layers=18, is_3x3=True) + return model + + +def SE_ResNet34_vd(): + model = SE_ResNet_vd(layers=34, is_3x3=True) + return model + + +def SE_ResNet50_vd(): + model = SE_ResNet_vd(layers=50, is_3x3=True) + return model + + +def SE_ResNet101_vd(): + model = SE_ResNet_vd(layers=101, is_3x3=True) + return model + + +def SE_ResNet152_vd(): + model = SE_ResNet_vd(layers=152, is_3x3=True) + return model + + +def SE_ResNet200_vd(): + model = SE_ResNet_vd(layers=200, is_3x3=True) + return model diff --git a/ppcls/modeling/architectures/se_resnext.py b/ppcls/modeling/architectures/se_resnext.py new file mode 100644 index 000000000..0ddf05353 --- /dev/null +++ b/ppcls/modeling/architectures/se_resnext.py @@ -0,0 +1,253 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math + +import paddle +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr + +__all__ = [ + "SE_ResNeXt", "SE_ResNeXt50_32x4d", "SE_ResNeXt101_32x4d", + "SE_ResNeXt152_32x4d" +] + + +class SE_ResNeXt(): + def __init__(self, layers=50): + self.layers = layers + + def net(self, input, class_dim=1000): + layers = self.layers + supported_layers = [50, 101, 152] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format(supported_layers, layers) + if layers == 50: + cardinality = 32 + reduction_ratio = 16 + depth = [3, 4, 6, 3] + num_filters = [128, 256, 512, 1024] + + conv = self.conv_bn_layer( + input=input, + num_filters=64, + filter_size=7, + stride=2, + act='relu', + name='conv1', ) + conv = fluid.layers.pool2d( + input=conv, + pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max', + use_cudnn=False) + elif layers == 101: + cardinality = 32 + reduction_ratio = 16 + depth = [3, 4, 23, 3] + num_filters = [128, 256, 512, 1024] + + conv = self.conv_bn_layer( + input=input, + num_filters=64, + filter_size=7, + stride=2, + act='relu', + name="conv1", ) + conv = fluid.layers.pool2d( + input=conv, + pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max', + use_cudnn=False) + elif layers == 152: + cardinality = 64 + reduction_ratio = 16 + depth = [3, 8, 36, 3] + num_filters = [128, 256, 512, 1024] + + conv = self.conv_bn_layer( + input=input, + num_filters=64, + filter_size=3, + stride=2, + act='relu', + name='conv1') + conv = self.conv_bn_layer( + input=conv, + num_filters=64, + filter_size=3, + stride=1, + act='relu', + name='conv2') + conv = self.conv_bn_layer( + input=conv, + num_filters=128, + filter_size=3, + stride=1, + act='relu', + name='conv3') + conv = fluid.layers.pool2d( + input=conv, pool_size=3, pool_stride=2, pool_padding=1, \ + pool_type='max', use_cudnn=False) + n = 1 if layers == 50 or layers == 101 else 3 + for block in range(len(depth)): + n += 1 + for i in range(depth[block]): + conv = self.bottleneck_block( + input=conv, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + cardinality=cardinality, + reduction_ratio=reduction_ratio, + name=str(n) + '_' + str(i + 1)) + + pool = fluid.layers.pool2d( + input=conv, pool_type='avg', global_pooling=True, use_cudnn=False) + drop = fluid.layers.dropout(x=pool, dropout_prob=0.5) + stdv = 1.0 / math.sqrt(drop.shape[1] * 1.0) + out = fluid.layers.fc( + input=drop, + size=class_dim, + param_attr=ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name='fc6_weights'), + bias_attr=ParamAttr(name='fc6_offset')) + return out + + def shortcut(self, input, ch_out, stride, name): + ch_in = input.shape[1] + if ch_in != ch_out or stride != 1: + filter_size = 1 + return self.conv_bn_layer( + input, + ch_out, + filter_size, + stride, + name='conv' + name + '_prj') + else: + return input + + def bottleneck_block(self, + input, + num_filters, + stride, + cardinality, + reduction_ratio, + name=None): + conv0 = self.conv_bn_layer( + input=input, + num_filters=num_filters, + filter_size=1, + act='relu', + name='conv' + name + '_x1') + conv1 = self.conv_bn_layer( + input=conv0, + num_filters=num_filters, + filter_size=3, + stride=stride, + groups=cardinality, + act='relu', + name='conv' + name + '_x2') + conv2 = self.conv_bn_layer( + input=conv1, + num_filters=num_filters * 2, + filter_size=1, + act=None, + name='conv' + name + '_x3') + scale = self.squeeze_excitation( + input=conv2, + num_channels=num_filters * 2, + reduction_ratio=reduction_ratio, + name='fc' + name) + + short = self.shortcut(input, num_filters * 2, stride, name=name) + + return fluid.layers.elementwise_add(x=short, y=scale, act='relu') + + def conv_bn_layer(self, + input, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + bias_attr=False, + param_attr=ParamAttr(name=name + '_weights'), ) + bn_name = name + "_bn" + return fluid.layers.batch_norm( + input=conv, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def squeeze_excitation(self, + input, + num_channels, + reduction_ratio, + name=None): + pool = fluid.layers.pool2d( + input=input, pool_type='avg', global_pooling=True, use_cudnn=False) + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + squeeze = fluid.layers.fc( + input=pool, + size=num_channels // reduction_ratio, + act='relu', + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=name + '_sqz_weights'), + bias_attr=ParamAttr(name=name + '_sqz_offset')) + stdv = 1.0 / math.sqrt(squeeze.shape[1] * 1.0) + excitation = fluid.layers.fc( + input=squeeze, + size=num_channels, + act='sigmoid', + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=name + '_exc_weights'), + bias_attr=ParamAttr(name=name + '_exc_offset')) + scale = fluid.layers.elementwise_mul(x=input, y=excitation, axis=0) + return scale + + +def SE_ResNeXt50_32x4d(): + model = SE_ResNeXt(layers=50) + return model + + +def SE_ResNeXt101_32x4d(): + model = SE_ResNeXt(layers=101) + return model + + +def SE_ResNeXt152_32x4d(): + model = SE_ResNeXt(layers=152) + return model diff --git a/ppcls/modeling/architectures/se_resnext_vd.py b/ppcls/modeling/architectures/se_resnext_vd.py new file mode 100644 index 000000000..8afb39415 --- /dev/null +++ b/ppcls/modeling/architectures/se_resnext_vd.py @@ -0,0 +1,329 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math + +import paddle +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr + +__all__ = [ + "SE_ResNeXt_vd", "SE_ResNeXt50_32x4d_vd", "SE_ResNeXt101_32x4d_vd", + "SENet154_vd" +] + + +class SE_ResNeXt_vd(): + def __init__(self, layers=50): + self.layers = layers + + def net(self, input, class_dim=1000): + layers = self.layers + supported_layers = [50, 101, 152] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format(supported_layers, layers) + if layers == 50: + cardinality = 32 + reduction_ratio = 16 + depth = [3, 4, 6, 3] + num_filters = [128, 256, 512, 1024] + + conv = self.conv_bn_layer( + input=input, + num_filters=64, + filter_size=3, + stride=2, + act='relu', + name='conv1_1') + conv = self.conv_bn_layer( + input=conv, + num_filters=64, + filter_size=3, + stride=1, + act='relu', + name='conv1_2') + conv = self.conv_bn_layer( + input=conv, + num_filters=128, + filter_size=3, + stride=1, + act='relu', + name='conv1_3') + conv = fluid.layers.pool2d( + input=conv, + pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') + elif layers == 101: + cardinality = 32 + reduction_ratio = 16 + depth = [3, 4, 23, 3] + num_filters = [128, 256, 512, 1024] + + conv = self.conv_bn_layer( + input=input, + num_filters=64, + filter_size=3, + stride=2, + act='relu', + name='conv1_1') + conv = self.conv_bn_layer( + input=conv, + num_filters=64, + filter_size=3, + stride=1, + act='relu', + name='conv1_2') + conv = self.conv_bn_layer( + input=conv, + num_filters=128, + filter_size=3, + stride=1, + act='relu', + name='conv1_3') + conv = fluid.layers.pool2d( + input=conv, + pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') + elif layers == 152: + cardinality = 64 + reduction_ratio = 16 + depth = [3, 8, 36, 3] + num_filters = [256, 512, 1024, 2048] + + conv = self.conv_bn_layer( + input=input, + num_filters=64, + filter_size=3, + stride=2, + act='relu', + name='conv1_1') + conv = self.conv_bn_layer( + input=conv, + num_filters=64, + filter_size=3, + stride=1, + act='relu', + name='conv1_2') + conv = self.conv_bn_layer( + input=conv, + num_filters=128, + filter_size=3, + stride=1, + act='relu', + name='conv1_3') + conv = fluid.layers.pool2d( + input=conv, pool_size=3, pool_stride=2, pool_padding=1, \ + pool_type='max') + n = 1 if layers == 50 or layers == 101 else 3 + for block in range(len(depth)): + n += 1 + for i in range(depth[block]): + conv = self.bottleneck_block( + input=conv, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + cardinality=cardinality, + reduction_ratio=reduction_ratio, + if_first=block == 0, + name=str(n) + '_' + str(i + 1)) + + pool = fluid.layers.pool2d( + input=conv, pool_type='avg', global_pooling=True) + if layers == 152: + pool = fluid.layers.dropout(x=pool, dropout_prob=0.2) + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + out = fluid.layers.fc( + input=pool, + size=class_dim, + param_attr=ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name='fc6_weights'), + bias_attr=ParamAttr(name='fc6_offset')) + + return out + + def shortcut(self, input, ch_out, stride, name, if_first=False): + ch_in = input.shape[1] + if ch_in != ch_out or stride != 1: + filter_size = 1 + if if_first: + return self.conv_bn_layer( + input, + ch_out, + filter_size, + stride, + name='conv' + name + '_prj') + else: + return self.conv_bn_layer_new( + input, + ch_out, + filter_size, + stride, + name='conv' + name + '_prj') + else: + return input + + def bottleneck_block(self, + input, + num_filters, + stride, + cardinality, + reduction_ratio, + if_first, + name=None): + conv0 = self.conv_bn_layer( + input=input, + num_filters=num_filters, + filter_size=1, + act='relu', + name='conv' + name + '_x1') + conv1 = self.conv_bn_layer( + input=conv0, + num_filters=num_filters, + filter_size=3, + stride=stride, + groups=cardinality, + act='relu', + name='conv' + name + '_x2') + if cardinality == 64: + num_filters = num_filters // 2 + conv2 = self.conv_bn_layer( + input=conv1, + num_filters=num_filters * 2, + filter_size=1, + act=None, + name='conv' + name + '_x3') + scale = self.squeeze_excitation( + input=conv2, + num_channels=num_filters * 2, + reduction_ratio=reduction_ratio, + name='fc' + name) + + short = self.shortcut( + input, num_filters * 2, stride, if_first=if_first, name=name) + + return fluid.layers.elementwise_add(x=short, y=scale, act='relu') + + def conv_bn_layer(self, + input, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + bias_attr=False, + param_attr=ParamAttr(name=name + '_weights'), ) + bn_name = name + "_bn" + return fluid.layers.batch_norm( + input=conv, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def conv_bn_layer_new(self, + input, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None): + pool = fluid.layers.pool2d( + input=input, + pool_size=2, + pool_stride=2, + pool_padding=0, + pool_type='avg', + ceil_mode=True) + + conv = fluid.layers.conv2d( + input=pool, + num_filters=num_filters, + filter_size=filter_size, + stride=1, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + bn_name = name + "_bn" + return fluid.layers.batch_norm( + input=conv, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def squeeze_excitation(self, + input, + num_channels, + reduction_ratio, + name=None): + pool = fluid.layers.pool2d( + input=input, pool_type='avg', global_pooling=True) + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + squeeze = fluid.layers.fc( + input=pool, + size=num_channels // reduction_ratio, + act='relu', + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=name + '_sqz_weights'), + bias_attr=ParamAttr(name=name + '_sqz_offset')) + stdv = 1.0 / math.sqrt(squeeze.shape[1] * 1.0) + excitation = fluid.layers.fc( + input=squeeze, + size=num_channels, + act='sigmoid', + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=name + '_exc_weights'), + bias_attr=ParamAttr(name=name + '_exc_offset')) + scale = fluid.layers.elementwise_mul(x=input, y=excitation, axis=0) + return scale + + +def SE_ResNeXt50_vd_32x4d(): + model = SE_ResNeXt_vd(layers=50) + return model + + +def SE_ResNeXt101_vd_32x4d(): + model = SE_ResNeXt_vd(layers=101) + return model + + +def SENet154_vd(): + model = SE_ResNeXt_vd(layers=152) + return model diff --git a/ppcls/modeling/architectures/shufflenet_v2.py b/ppcls/modeling/architectures/shufflenet_v2.py new file mode 100644 index 000000000..b63a26c83 --- /dev/null +++ b/ppcls/modeling/architectures/shufflenet_v2.py @@ -0,0 +1,307 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math + +import paddle.fluid as fluid +from paddle.fluid.initializer import MSRA +from paddle.fluid.param_attr import ParamAttr + +__all__ = [ + 'ShuffleNetV2_x0_25', 'ShuffleNetV2_x0_33', 'ShuffleNetV2_x0_5', + 'ShuffleNetV2_x1_0', 'ShuffleNetV2_x1_5', 'ShuffleNetV2_x2_0', + 'ShuffleNetV2' +] + + +class ShuffleNetV2(): + def __init__(self, scale=1.0): + self.scale = scale + + def net(self, input, class_dim=1000): + scale = self.scale + stage_repeats = [4, 8, 4] + + if scale == 0.25: + stage_out_channels = [-1, 24, 24, 48, 96, 512] + elif scale == 0.33: + stage_out_channels = [-1, 24, 32, 64, 128, 512] + elif scale == 0.5: + stage_out_channels = [-1, 24, 48, 96, 192, 1024] + elif scale == 1.0: + stage_out_channels = [-1, 24, 116, 232, 464, 1024] + elif scale == 1.5: + stage_out_channels = [-1, 24, 176, 352, 704, 1024] + elif scale == 2.0: + stage_out_channels = [-1, 24, 224, 488, 976, 2048] + else: + raise NotImplementedError("This scale size:[" + str(scale) + + "] is not implemented!") + #conv1 + + input_channel = stage_out_channels[1] + conv1 = self.conv_bn_layer( + input=input, + filter_size=3, + num_filters=input_channel, + padding=1, + stride=2, + name='stage1_conv') + pool1 = fluid.layers.pool2d( + input=conv1, + pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') + conv = pool1 + # bottleneck sequences + for idxstage in range(len(stage_repeats)): + numrepeat = stage_repeats[idxstage] + output_channel = stage_out_channels[idxstage + 2] + for i in range(numrepeat): + if i == 0: + conv = self.inverted_residual_unit( + input=conv, + num_filters=output_channel, + stride=2, + benchmodel=2, + name=str(idxstage + 2) + '_' + str(i + 1)) + else: + conv = self.inverted_residual_unit( + input=conv, + num_filters=output_channel, + stride=1, + benchmodel=1, + name=str(idxstage + 2) + '_' + str(i + 1)) + + conv_last = self.conv_bn_layer( + input=conv, + filter_size=1, + num_filters=stage_out_channels[-1], + padding=0, + stride=1, + name='conv5') + pool_last = fluid.layers.pool2d( + input=conv_last, + pool_size=7, + pool_stride=1, + pool_padding=0, + pool_type='avg') + + output = fluid.layers.fc(input=pool_last, + size=class_dim, + param_attr=ParamAttr( + initializer=MSRA(), name='fc6_weights'), + bias_attr=ParamAttr(name='fc6_offset')) + return output + + def conv_bn_layer(self, + input, + filter_size, + num_filters, + stride, + padding, + num_groups=1, + use_cudnn=True, + if_act=True, + name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=padding, + groups=num_groups, + act=None, + use_cudnn=use_cudnn, + param_attr=ParamAttr( + initializer=MSRA(), name=name + '_weights'), + bias_attr=False) + out = int((input.shape[2] - 1) / float(stride) + 1) + bn_name = name + '_bn' + if if_act: + return fluid.layers.batch_norm( + input=conv, + act='relu', + param_attr=ParamAttr(name=bn_name + "_scale"), + bias_attr=ParamAttr(name=bn_name + "_offset"), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + else: + return fluid.layers.batch_norm( + input=conv, + param_attr=ParamAttr(name=bn_name + "_scale"), + bias_attr=ParamAttr(name=bn_name + "_offset"), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def channel_shuffle(self, x, groups): + batchsize, num_channels, height, width = x.shape[0], x.shape[ + 1], x.shape[2], x.shape[3] + channels_per_group = num_channels // groups + + # reshape + x = fluid.layers.reshape( + x=x, shape=[batchsize, groups, channels_per_group, height, width]) + + x = fluid.layers.transpose(x=x, perm=[0, 2, 1, 3, 4]) + + # flatten + x = fluid.layers.reshape( + x=x, shape=[batchsize, num_channels, height, width]) + + return x + + def inverted_residual_unit(self, + input, + num_filters, + stride, + benchmodel, + name=None): + assert stride in [1, 2], \ + "supported stride are {} but your stride is {}".format([1,2], stride) + + oup_inc = num_filters // 2 + inp = input.shape[1] + + if benchmodel == 1: + x1, x2 = fluid.layers.split( + input, + num_or_sections=[input.shape[1] // 2, input.shape[1] // 2], + dim=1) + + conv_pw = self.conv_bn_layer( + input=x2, + num_filters=oup_inc, + filter_size=1, + stride=1, + padding=0, + num_groups=1, + if_act=True, + name='stage_' + name + '_conv1') + + conv_dw = self.conv_bn_layer( + input=conv_pw, + num_filters=oup_inc, + filter_size=3, + stride=stride, + padding=1, + num_groups=oup_inc, + if_act=False, + use_cudnn=False, + name='stage_' + name + '_conv2') + + conv_linear = self.conv_bn_layer( + input=conv_dw, + num_filters=oup_inc, + filter_size=1, + stride=1, + padding=0, + num_groups=1, + if_act=True, + name='stage_' + name + '_conv3') + + out = fluid.layers.concat([x1, conv_linear], axis=1) + + else: + #branch1 + conv_dw_1 = self.conv_bn_layer( + input=input, + num_filters=inp, + filter_size=3, + stride=stride, + padding=1, + num_groups=inp, + if_act=False, + use_cudnn=False, + name='stage_' + name + '_conv4') + + conv_linear_1 = self.conv_bn_layer( + input=conv_dw_1, + num_filters=oup_inc, + filter_size=1, + stride=1, + padding=0, + num_groups=1, + if_act=True, + name='stage_' + name + '_conv5') + + #branch2 + conv_pw_2 = self.conv_bn_layer( + input=input, + num_filters=oup_inc, + filter_size=1, + stride=1, + padding=0, + num_groups=1, + if_act=True, + name='stage_' + name + '_conv1') + + conv_dw_2 = self.conv_bn_layer( + input=conv_pw_2, + num_filters=oup_inc, + filter_size=3, + stride=stride, + padding=1, + num_groups=oup_inc, + if_act=False, + use_cudnn=False, + name='stage_' + name + '_conv2') + + conv_linear_2 = self.conv_bn_layer( + input=conv_dw_2, + num_filters=oup_inc, + filter_size=1, + stride=1, + padding=0, + num_groups=1, + if_act=True, + name='stage_' + name + '_conv3') + out = fluid.layers.concat([conv_linear_1, conv_linear_2], axis=1) + + return self.channel_shuffle(out, 2) + + +def ShuffleNetV2_x0_25(): + model = ShuffleNetV2(scale=0.25) + return model + + +def ShuffleNetV2_x0_33(): + model = ShuffleNetV2(scale=0.33) + return model + + +def ShuffleNetV2_x0_5(): + model = ShuffleNetV2(scale=0.5) + return model + + +def ShuffleNetV2_x1_0(): + model = ShuffleNetV2(scale=1.0) + return model + + +def ShuffleNetV2_x1_5(): + model = ShuffleNetV2(scale=1.5) + return model + + +def ShuffleNetV2_x2_0(): + model = ShuffleNetV2(scale=2.0) + return model diff --git a/ppcls/modeling/architectures/shufflenet_v2_swish.py b/ppcls/modeling/architectures/shufflenet_v2_swish.py new file mode 100644 index 000000000..8683dfc08 --- /dev/null +++ b/ppcls/modeling/architectures/shufflenet_v2_swish.py @@ -0,0 +1,293 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math + +import paddle.fluid as fluid +from paddle.fluid.initializer import MSRA +from paddle.fluid.param_attr import ParamAttr + +__all__ = [ + 'ShuffleNetV2_x0_5_swish', 'ShuffleNetV2_x1_0_swish', + 'ShuffleNetV2_x1_5_swish', 'ShuffleNetV2_x2_0_swish', 'ShuffleNetV2_swish' +] + + +class ShuffleNetV2_swish(): + def __init__(self, scale=1.0): + self.scale = scale + + def net(self, input, class_dim=1000): + scale = self.scale + stage_repeats = [4, 8, 4] + + if scale == 0.5: + stage_out_channels = [-1, 24, 48, 96, 192, 1024] + elif scale == 1.0: + stage_out_channels = [-1, 24, 116, 232, 464, 1024] + elif scale == 1.5: + stage_out_channels = [-1, 24, 176, 352, 704, 1024] + elif scale == 2.0: + stage_out_channels = [-1, 24, 224, 488, 976, 2048] + else: + raise ValueError("""{} groups is not supported for + 1x1 Grouped Convolutions""".format(num_groups)) + + #conv1 + + input_channel = stage_out_channels[1] + conv1 = self.conv_bn_layer( + input=input, + filter_size=3, + num_filters=input_channel, + padding=1, + stride=2, + name='stage1_conv') + pool1 = fluid.layers.pool2d( + input=conv1, + pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') + conv = pool1 + # bottleneck sequences + for idxstage in range(len(stage_repeats)): + numrepeat = stage_repeats[idxstage] + output_channel = stage_out_channels[idxstage + 2] + for i in range(numrepeat): + if i == 0: + conv = self.inverted_residual_unit( + input=conv, + num_filters=output_channel, + stride=2, + benchmodel=2, + name=str(idxstage + 2) + '_' + str(i + 1)) + else: + conv = self.inverted_residual_unit( + input=conv, + num_filters=output_channel, + stride=1, + benchmodel=1, + name=str(idxstage + 2) + '_' + str(i + 1)) + + conv_last = self.conv_bn_layer( + input=conv, + filter_size=1, + num_filters=stage_out_channels[-1], + padding=0, + stride=1, + name='conv5') + pool_last = fluid.layers.pool2d( + input=conv_last, + pool_size=7, + pool_stride=1, + pool_padding=0, + pool_type='avg') + + output = fluid.layers.fc(input=pool_last, + size=class_dim, + param_attr=ParamAttr( + initializer=MSRA(), name='fc6_weights'), + bias_attr=ParamAttr(name='fc6_offset')) + return output + + def conv_bn_layer(self, + input, + filter_size, + num_filters, + stride, + padding, + num_groups=1, + use_cudnn=True, + if_act=True, + name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=padding, + groups=num_groups, + act=None, + use_cudnn=use_cudnn, + param_attr=ParamAttr( + initializer=MSRA(), name=name + '_weights'), + bias_attr=False) + out = int((input.shape[2] - 1) / float(stride) + 1) + bn_name = name + '_bn' + if if_act: + return fluid.layers.batch_norm( + input=conv, + act='swish', + param_attr=ParamAttr(name=bn_name + "_scale"), + bias_attr=ParamAttr(name=bn_name + "_offset"), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + else: + return fluid.layers.batch_norm( + input=conv, + param_attr=ParamAttr(name=bn_name + "_scale"), + bias_attr=ParamAttr(name=bn_name + "_offset"), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def channel_shuffle(self, x, groups): + batchsize, num_channels, height, width = x.shape[0], x.shape[ + 1], x.shape[2], x.shape[3] + channels_per_group = num_channels // groups + + # reshape + x = fluid.layers.reshape( + x=x, shape=[batchsize, groups, channels_per_group, height, width]) + + x = fluid.layers.transpose(x=x, perm=[0, 2, 1, 3, 4]) + + # flatten + x = fluid.layers.reshape( + x=x, shape=[batchsize, num_channels, height, width]) + + return x + + def inverted_residual_unit(self, + input, + num_filters, + stride, + benchmodel, + name=None): + assert stride in [1, 2], \ + "supported stride are {} but your stride is {}".format([1,2], stride) + + oup_inc = num_filters // 2 + inp = input.shape[1] + + if benchmodel == 1: + x1, x2 = fluid.layers.split( + input, + num_or_sections=[input.shape[1] // 2, input.shape[1] // 2], + dim=1) + + conv_pw = self.conv_bn_layer( + input=x2, + num_filters=oup_inc, + filter_size=1, + stride=1, + padding=0, + num_groups=1, + if_act=True, + name='stage_' + name + '_conv1') + + conv_dw = self.conv_bn_layer( + input=conv_pw, + num_filters=oup_inc, + filter_size=3, + stride=stride, + padding=1, + num_groups=oup_inc, + if_act=False, + use_cudnn=False, + name='stage_' + name + '_conv2') + + conv_linear = self.conv_bn_layer( + input=conv_dw, + num_filters=oup_inc, + filter_size=1, + stride=1, + padding=0, + num_groups=1, + if_act=True, + name='stage_' + name + '_conv3') + + out = fluid.layers.concat([x1, conv_linear], axis=1) + + else: + #branch1 + conv_dw_1 = self.conv_bn_layer( + input=input, + num_filters=inp, + filter_size=3, + stride=stride, + padding=1, + num_groups=inp, + if_act=False, + use_cudnn=False, + name='stage_' + name + '_conv4') + + conv_linear_1 = self.conv_bn_layer( + input=conv_dw_1, + num_filters=oup_inc, + filter_size=1, + stride=1, + padding=0, + num_groups=1, + if_act=True, + name='stage_' + name + '_conv5') + + #branch2 + conv_pw_2 = self.conv_bn_layer( + input=input, + num_filters=oup_inc, + filter_size=1, + stride=1, + padding=0, + num_groups=1, + if_act=True, + name='stage_' + name + '_conv1') + + conv_dw_2 = self.conv_bn_layer( + input=conv_pw_2, + num_filters=oup_inc, + filter_size=3, + stride=stride, + padding=1, + num_groups=oup_inc, + if_act=False, + use_cudnn=False, + name='stage_' + name + '_conv2') + + conv_linear_2 = self.conv_bn_layer( + input=conv_dw_2, + num_filters=oup_inc, + filter_size=1, + stride=1, + padding=0, + num_groups=1, + if_act=True, + name='stage_' + name + '_conv3') + out = fluid.layers.concat([conv_linear_1, conv_linear_2], axis=1) + + return self.channel_shuffle(out, 2) + + +def ShuffleNetV2_x0_5_swish(): + model = ShuffleNetV2_swish(scale=0.5) + return model + + +def ShuffleNetV2_x1_0_swish(): + model = ShuffleNetV2_swish(scale=1.0) + return model + + +def ShuffleNetV2_x1_5_swish(): + model = ShuffleNetV2_swish(scale=1.5) + return model + + +def ShuffleNetV2_x2_0_swish(): + model = ShuffleNetV2_swish(scale=2.0) + return model diff --git a/ppcls/modeling/architectures/squeezenet.py b/ppcls/modeling/architectures/squeezenet.py new file mode 100644 index 000000000..9e21b3102 --- /dev/null +++ b/ppcls/modeling/architectures/squeezenet.py @@ -0,0 +1,133 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math +import paddle +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr + +__all__ = ["SqueezeNet", "SqueezeNet1_0", "SqueezeNet1_1"] + + +class SqueezeNet(): + def __init__(self, version='1.0'): + self.version = version + + def net(self, input, class_dim=1000): + version = self.version + assert version in ['1.0', '1.1'], \ + "supported version are {} but input version is {}".format(['1.0', '1.1'], version) + if version == '1.0': + conv = fluid.layers.conv2d( + input, + num_filters=96, + filter_size=7, + stride=2, + act='relu', + param_attr=fluid.param_attr.ParamAttr(name="conv1_weights"), + bias_attr=ParamAttr(name='conv1_offset')) + conv = fluid.layers.pool2d( + conv, pool_size=3, pool_stride=2, pool_type='max') + conv = self.make_fire(conv, 16, 64, 64, name='fire2') + conv = self.make_fire(conv, 16, 64, 64, name='fire3') + conv = self.make_fire(conv, 32, 128, 128, name='fire4') + conv = fluid.layers.pool2d( + conv, pool_size=3, pool_stride=2, pool_type='max') + conv = self.make_fire(conv, 32, 128, 128, name='fire5') + conv = self.make_fire(conv, 48, 192, 192, name='fire6') + conv = self.make_fire(conv, 48, 192, 192, name='fire7') + conv = self.make_fire(conv, 64, 256, 256, name='fire8') + conv = fluid.layers.pool2d( + conv, pool_size=3, pool_stride=2, pool_type='max') + conv = self.make_fire(conv, 64, 256, 256, name='fire9') + else: + conv = fluid.layers.conv2d( + input, + num_filters=64, + filter_size=3, + stride=2, + padding=1, + act='relu', + param_attr=fluid.param_attr.ParamAttr(name="conv1_weights"), + bias_attr=ParamAttr(name='conv1_offset')) + conv = fluid.layers.pool2d( + conv, pool_size=3, pool_stride=2, pool_type='max') + conv = self.make_fire(conv, 16, 64, 64, name='fire2') + conv = self.make_fire(conv, 16, 64, 64, name='fire3') + conv = fluid.layers.pool2d( + conv, pool_size=3, pool_stride=2, pool_type='max') + conv = self.make_fire(conv, 32, 128, 128, name='fire4') + conv = self.make_fire(conv, 32, 128, 128, name='fire5') + conv = fluid.layers.pool2d( + conv, pool_size=3, pool_stride=2, pool_type='max') + conv = self.make_fire(conv, 48, 192, 192, name='fire6') + conv = self.make_fire(conv, 48, 192, 192, name='fire7') + conv = self.make_fire(conv, 64, 256, 256, name='fire8') + conv = self.make_fire(conv, 64, 256, 256, name='fire9') + conv = fluid.layers.dropout(conv, dropout_prob=0.5) + conv = fluid.layers.conv2d( + conv, + num_filters=class_dim, + filter_size=1, + act='relu', + param_attr=fluid.param_attr.ParamAttr(name="conv10_weights"), + bias_attr=ParamAttr(name='conv10_offset')) + conv = fluid.layers.pool2d(conv, pool_type='avg', global_pooling=True) + out = fluid.layers.flatten(conv) + return out + + def make_fire_conv(self, + input, + num_filters, + filter_size, + padding=0, + name=None): + conv = fluid.layers.conv2d( + input, + num_filters=num_filters, + filter_size=filter_size, + padding=padding, + act='relu', + param_attr=fluid.param_attr.ParamAttr(name=name + "_weights"), + bias_attr=ParamAttr(name=name + '_offset')) + return conv + + def make_fire(self, + input, + squeeze_channels, + expand1x1_channels, + expand3x3_channels, + name=None): + conv = self.make_fire_conv( + input, squeeze_channels, 1, name=name + '_squeeze1x1') + conv_path1 = self.make_fire_conv( + conv, expand1x1_channels, 1, name=name + '_expand1x1') + conv_path2 = self.make_fire_conv( + conv, expand3x3_channels, 3, 1, name=name + '_expand3x3') + out = fluid.layers.concat([conv_path1, conv_path2], axis=1) + return out + + +def SqueezeNet1_0(): + model = SqueezeNet(version='1.0') + return model + + +def SqueezeNet1_1(): + model = SqueezeNet(version='1.1') + return model diff --git a/ppcls/modeling/architectures/vgg.py b/ppcls/modeling/architectures/vgg.py new file mode 100644 index 000000000..2c5b77ea6 --- /dev/null +++ b/ppcls/modeling/architectures/vgg.py @@ -0,0 +1,108 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import paddle +import paddle.fluid as fluid + +__all__ = ["VGGNet", "VGG11", "VGG13", "VGG16", "VGG19"] + + +class VGGNet(): + def __init__(self, layers=16): + self.layers = layers + + def net(self, input, class_dim=1000): + layers = self.layers + vgg_spec = { + 11: ([1, 1, 2, 2, 2]), + 13: ([2, 2, 2, 2, 2]), + 16: ([2, 2, 3, 3, 3]), + 19: ([2, 2, 4, 4, 4]) + } + assert layers in vgg_spec.keys(), \ + "supported layers are {} but input layer is {}".format(vgg_spec.keys(), layers) + + nums = vgg_spec[layers] + conv1 = self.conv_block(input, 64, nums[0], name="conv1_") + conv2 = self.conv_block(conv1, 128, nums[1], name="conv2_") + conv3 = self.conv_block(conv2, 256, nums[2], name="conv3_") + conv4 = self.conv_block(conv3, 512, nums[3], name="conv4_") + conv5 = self.conv_block(conv4, 512, nums[4], name="conv5_") + + fc_dim = 4096 + fc_name = ["fc6", "fc7", "fc8"] + fc1 = fluid.layers.fc( + input=conv5, + size=fc_dim, + act='relu', + param_attr=fluid.param_attr.ParamAttr( + name=fc_name[0] + "_weights"), + bias_attr=fluid.param_attr.ParamAttr(name=fc_name[0] + "_offset")) + fc1 = fluid.layers.dropout(x=fc1, dropout_prob=0.5) + fc2 = fluid.layers.fc( + input=fc1, + size=fc_dim, + act='relu', + param_attr=fluid.param_attr.ParamAttr( + name=fc_name[1] + "_weights"), + bias_attr=fluid.param_attr.ParamAttr(name=fc_name[1] + "_offset")) + fc2 = fluid.layers.dropout(x=fc2, dropout_prob=0.5) + out = fluid.layers.fc( + input=fc2, + size=class_dim, + param_attr=fluid.param_attr.ParamAttr( + name=fc_name[2] + "_weights"), + bias_attr=fluid.param_attr.ParamAttr(name=fc_name[2] + "_offset")) + + return out + + def conv_block(self, input, num_filter, groups, name=None): + conv = input + for i in range(groups): + conv = fluid.layers.conv2d( + input=conv, + num_filters=num_filter, + filter_size=3, + stride=1, + padding=1, + act='relu', + param_attr=fluid.param_attr.ParamAttr( + name=name + str(i + 1) + "_weights"), + bias_attr=False) + return fluid.layers.pool2d( + input=conv, pool_size=2, pool_type='max', pool_stride=2) + + +def VGG11(): + model = VGGNet(layers=11) + return model + + +def VGG13(): + model = VGGNet(layers=13) + return model + + +def VGG16(): + model = VGGNet(layers=16) + return model + + +def VGG19(): + model = VGGNet(layers=19) + return model diff --git a/ppcls/modeling/architectures/xception.py b/ppcls/modeling/architectures/xception.py new file mode 100644 index 000000000..2f294791c --- /dev/null +++ b/ppcls/modeling/architectures/xception.py @@ -0,0 +1,281 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math +import sys + +import paddle +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr + +__all__ = ['Xception', 'Xception41', 'Xception65', 'Xception71'] + + +class Xception(object): + """Xception""" + + def __init__(self, entry_flow_block_num=3, middle_flow_block_num=8): + self.entry_flow_block_num = entry_flow_block_num + self.middle_flow_block_num = middle_flow_block_num + return + + def net(self, input, class_dim=1000): + conv = self.entry_flow(input, self.entry_flow_block_num) + conv = self.middle_flow(conv, self.middle_flow_block_num) + conv = self.exit_flow(conv, class_dim) + + return conv + + def entry_flow(self, input, block_num=3): + '''xception entry_flow''' + name = "entry_flow" + conv = self.conv_bn_layer( + input=input, + num_filters=32, + filter_size=3, + stride=2, + act='relu', + name=name + "_conv1") + conv = self.conv_bn_layer( + input=conv, + num_filters=64, + filter_size=3, + stride=1, + act='relu', + name=name + "_conv2") + + if block_num == 3: + relu_first = [False, True, True] + num_filters = [128, 256, 728] + stride = [2, 2, 2] + elif block_num == 5: + relu_first = [False, True, True, True, True] + num_filters = [128, 256, 256, 728, 728] + stride = [2, 1, 2, 1, 2] + else: + sys.exit(-1) + + for block in range(block_num): + curr_name = "{}_{}".format(name, block) + conv = self.entry_flow_bottleneck_block( + conv, + num_filters=num_filters[block], + name=curr_name, + stride=stride[block], + relu_first=relu_first[block]) + + return conv + + def entry_flow_bottleneck_block(self, + input, + num_filters, + name, + stride=2, + relu_first=False): + '''entry_flow_bottleneck_block''' + short = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=1, + stride=stride, + padding=0, + act=None, + param_attr=ParamAttr(name + "_branch1_weights"), + bias_attr=False) + + conv0 = input + if relu_first: + conv0 = fluid.layers.relu(conv0) + + conv1 = self.separable_conv( + conv0, num_filters, stride=1, name=name + "_branch2a_weights") + + conv2 = fluid.layers.relu(conv1) + conv2 = self.separable_conv( + conv2, num_filters, stride=1, name=name + "_branch2b_weights") + + pool = fluid.layers.pool2d( + input=conv2, + pool_size=3, + pool_stride=stride, + pool_padding=1, + pool_type='max') + + return fluid.layers.elementwise_add(x=short, y=pool) + + def middle_flow(self, input, block_num=8): + '''xception middle_flow''' + num_filters = 728 + conv = input + for block in range(block_num): + name = "middle_flow_{}".format(block) + conv = self.middle_flow_bottleneck_block(conv, num_filters, name) + + return conv + + def middle_flow_bottleneck_block(self, input, num_filters, name): + '''middle_flow_bottleneck_block''' + conv0 = fluid.layers.relu(input) + conv0 = self.separable_conv( + conv0, + num_filters=num_filters, + stride=1, + name=name + "_branch2a_weights") + + conv1 = fluid.layers.relu(conv0) + conv1 = self.separable_conv( + conv1, + num_filters=num_filters, + stride=1, + name=name + "_branch2b_weights") + + conv2 = fluid.layers.relu(conv1) + conv2 = self.separable_conv( + conv2, + num_filters=num_filters, + stride=1, + name=name + "_branch2c_weights") + + return fluid.layers.elementwise_add(x=input, y=conv2) + + def exit_flow(self, input, class_dim): + '''xception exit flow''' + name = "exit_flow" + num_filters1 = 728 + num_filters2 = 1024 + conv0 = self.exit_flow_bottleneck_block( + input, num_filters1, num_filters2, name=name + "_1") + + conv1 = self.separable_conv( + conv0, num_filters=1536, stride=1, name=name + "_2") + conv1 = fluid.layers.relu(conv1) + + conv2 = self.separable_conv( + conv1, num_filters=2048, stride=1, name=name + "_3") + conv2 = fluid.layers.relu(conv2) + + pool = fluid.layers.pool2d( + input=conv2, pool_type='avg', global_pooling=True) + + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + out = fluid.layers.fc( + input=pool, + size=class_dim, + param_attr=fluid.param_attr.ParamAttr( + name='fc_weights', + initializer=fluid.initializer.Uniform(-stdv, stdv)), + bias_attr=fluid.param_attr.ParamAttr(name='fc_offset')) + + return out + + def exit_flow_bottleneck_block(self, input, num_filters1, num_filters2, + name): + '''entry_flow_bottleneck_block''' + short = fluid.layers.conv2d( + input=input, + num_filters=num_filters2, + filter_size=1, + stride=2, + padding=0, + act=None, + param_attr=ParamAttr(name + "_branch1_weights"), + bias_attr=False) + + conv0 = fluid.layers.relu(input) + conv1 = self.separable_conv( + conv0, num_filters1, stride=1, name=name + "_branch2a_weights") + + conv2 = fluid.layers.relu(conv1) + conv2 = self.separable_conv( + conv2, num_filters2, stride=1, name=name + "_branch2b_weights") + + pool = fluid.layers.pool2d( + input=conv2, + pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') + + return fluid.layers.elementwise_add(x=short, y=pool) + + def separable_conv(self, input, num_filters, stride=1, name=None): + """separable_conv""" + pointwise_conv = self.conv_bn_layer( + input=input, + filter_size=1, + num_filters=num_filters, + stride=1, + name=name + "_sep") + + depthwise_conv = self.conv_bn_layer( + input=pointwise_conv, + filter_size=3, + num_filters=num_filters, + stride=stride, + groups=num_filters, + use_cudnn=False, + name=name + "_dw") + + return depthwise_conv + + def conv_bn_layer(self, + input, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + use_cudnn=True, + name=None): + """conv_bn_layer""" + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False, + use_cudnn=use_cudnn) + + bn_name = "bn_" + name + + return fluid.layers.batch_norm( + input=conv, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + +def Xception41(): + model = Xception(entry_flow_block_num=3, middle_flow_block_num=8) + return model + + +def Xception65(): + model = Xception(entry_flow_block_num=3, middle_flow_block_num=16) + return model + + +def Xception71(): + model = Xception(entry_flow_block_num=5, middle_flow_block_num=16) + return model diff --git a/ppcls/modeling/architectures/xception_deeplab.py b/ppcls/modeling/architectures/xception_deeplab.py new file mode 100644 index 000000000..b76375ed6 --- /dev/null +++ b/ppcls/modeling/architectures/xception_deeplab.py @@ -0,0 +1,320 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import contextlib +import paddle +import math + +import paddle.fluid as fluid + +from .model_libs import scope, name_scope +from .model_libs import bn, bn_relu, relu +from .model_libs import conv +from .model_libs import seperate_conv + +__all__ = ['Xception41_deeplab', 'Xception65_deeplab', 'Xception71_deeplab'] + + +def check_data(data, number): + if type(data) == int: + return [data] * number + assert len(data) == number + return data + + +def check_stride(s, os): + if s <= os: + return True + else: + return False + + +def check_points(count, points): + if points is None: + return False + else: + if isinstance(points, list): + return (True if count in points else False) + else: + return (True if count == points else False) + + +class Xception(): + def __init__(self, backbone="xception_65"): + self.bottleneck_params = self.gen_bottleneck_params(backbone) + self.backbone = backbone + + def gen_bottleneck_params(self, backbone='xception_65'): + if backbone == 'xception_65': + bottleneck_params = { + "entry_flow": (3, [2, 2, 2], [128, 256, 728]), + "middle_flow": (16, 1, 728), + "exit_flow": + (2, [2, 1], [[728, 1024, 1024], [1536, 1536, 2048]]) + } + elif backbone == 'xception_41': + bottleneck_params = { + "entry_flow": (3, [2, 2, 2], [128, 256, 728]), + "middle_flow": (8, 1, 728), + "exit_flow": + (2, [2, 1], [[728, 1024, 1024], [1536, 1536, 2048]]) + } + elif backbone == 'xception_71': + bottleneck_params = { + "entry_flow": (5, [2, 1, 2, 1, 2], [128, 256, 256, 728, 728]), + "middle_flow": (16, 1, 728), + "exit_flow": + (2, [2, 1], [[728, 1024, 1024], [1536, 1536, 2048]]) + } + else: + raise Exception( + "xception backbont only support xception_41/xception_65/xception_71" + ) + return bottleneck_params + + def net(self, + input, + output_stride=32, + class_dim=1000, + end_points=None, + decode_points=None): + self.stride = 2 + self.block_point = 0 + self.output_stride = output_stride + self.decode_points = decode_points + self.short_cuts = dict() + with scope(self.backbone): + # Entry flow + data = self.entry_flow(input) + if check_points(self.block_point, end_points): + return data, self.short_cuts + + # Middle flow + data = self.middle_flow(data) + if check_points(self.block_point, end_points): + return data, self.short_cuts + + # Exit flow + data = self.exit_flow(data) + if check_points(self.block_point, end_points): + return data, self.short_cuts + + data = fluid.layers.reduce_mean(data, [2, 3], keep_dim=True) + data = fluid.layers.dropout(data, 0.5) + stdv = 1.0 / math.sqrt(data.shape[1] * 1.0) + with scope("logit"): + out = fluid.layers.fc( + input=data, + size=class_dim, + param_attr=fluid.param_attr.ParamAttr( + name='fc_weights', + initializer=fluid.initializer.Uniform(-stdv, stdv)), + bias_attr=fluid.param_attr.ParamAttr(name='fc_bias')) + + return out + + def entry_flow(self, data): + param_attr = fluid.ParamAttr( + name=name_scope + 'weights', + regularizer=None, + initializer=fluid.initializer.TruncatedNormal( + loc=0.0, scale=0.09)) + with scope("entry_flow"): + with scope("conv1"): + data = bn_relu( + conv( + data, + 32, + 3, + stride=2, + padding=1, + param_attr=param_attr)) + with scope("conv2"): + data = bn_relu( + conv( + data, + 64, + 3, + stride=1, + padding=1, + param_attr=param_attr)) + + # get entry flow params + block_num = self.bottleneck_params["entry_flow"][0] + strides = self.bottleneck_params["entry_flow"][1] + chns = self.bottleneck_params["entry_flow"][2] + strides = check_data(strides, block_num) + chns = check_data(chns, block_num) + + # params to control your flow + s = self.stride + block_point = self.block_point + output_stride = self.output_stride + with scope("entry_flow"): + for i in range(block_num): + block_point = block_point + 1 + with scope("block" + str(i + 1)): + stride = strides[i] if check_stride(s * strides[i], + output_stride) else 1 + data, short_cuts = self.xception_block(data, chns[i], + [1, 1, stride]) + s = s * stride + if check_points(block_point, self.decode_points): + self.short_cuts[block_point] = short_cuts[1] + + self.stride = s + self.block_point = block_point + return data + + def middle_flow(self, data): + block_num = self.bottleneck_params["middle_flow"][0] + strides = self.bottleneck_params["middle_flow"][1] + chns = self.bottleneck_params["middle_flow"][2] + strides = check_data(strides, block_num) + chns = check_data(chns, block_num) + + # params to control your flow + s = self.stride + block_point = self.block_point + output_stride = self.output_stride + with scope("middle_flow"): + for i in range(block_num): + block_point = block_point + 1 + with scope("block" + str(i + 1)): + stride = strides[i] if check_stride(s * strides[i], + output_stride) else 1 + data, short_cuts = self.xception_block( + data, chns[i], [1, 1, strides[i]], skip_conv=False) + s = s * stride + if check_points(block_point, self.decode_points): + self.short_cuts[block_point] = short_cuts[1] + + self.stride = s + self.block_point = block_point + return data + + def exit_flow(self, data): + block_num = self.bottleneck_params["exit_flow"][0] + strides = self.bottleneck_params["exit_flow"][1] + chns = self.bottleneck_params["exit_flow"][2] + strides = check_data(strides, block_num) + chns = check_data(chns, block_num) + + assert (block_num == 2) + # params to control your flow + s = self.stride + block_point = self.block_point + output_stride = self.output_stride + with scope("exit_flow"): + with scope('block1'): + block_point += 1 + stride = strides[0] if check_stride(s * strides[0], + output_stride) else 1 + data, short_cuts = self.xception_block(data, chns[0], + [1, 1, stride]) + s = s * stride + if check_points(block_point, self.decode_points): + self.short_cuts[block_point] = short_cuts[1] + with scope('block2'): + block_point += 1 + stride = strides[1] if check_stride(s * strides[1], + output_stride) else 1 + data, short_cuts = self.xception_block( + data, + chns[1], [1, 1, stride], + dilation=2, + has_skip=False, + activation_fn_in_separable_conv=True) + s = s * stride + if check_points(block_point, self.decode_points): + self.short_cuts[block_point] = short_cuts[1] + + self.stride = s + self.block_point = block_point + return data + + def xception_block(self, + input, + channels, + strides=1, + filters=3, + dilation=1, + skip_conv=True, + has_skip=True, + activation_fn_in_separable_conv=False): + repeat_number = 3 + channels = check_data(channels, repeat_number) + filters = check_data(filters, repeat_number) + strides = check_data(strides, repeat_number) + data = input + results = [] + for i in range(repeat_number): + with scope('separable_conv' + str(i + 1)): + if not activation_fn_in_separable_conv: + data = relu(data) + data = seperate_conv( + data, + channels[i], + strides[i], + filters[i], + dilation=dilation) + else: + data = seperate_conv( + data, + channels[i], + strides[i], + filters[i], + dilation=dilation, + act=relu) + results.append(data) + if not has_skip: + return data, results + if skip_conv: + param_attr = fluid.ParamAttr( + name=name_scope + 'weights', + regularizer=None, + initializer=fluid.initializer.TruncatedNormal( + loc=0.0, scale=0.09)) + with scope('shortcut'): + skip = bn( + conv( + input, + channels[-1], + 1, + strides[-1], + groups=1, + padding=0, + param_attr=param_attr)) + else: + skip = input + return data + skip, results + + +def Xception41_deeplab(): + model = Xception("xception_41") + return model + + +def Xception65_deeplab(): + model = Xception("xception_65") + return model + + +def Xception71_deeplab(): + model = Xception("xception_71") + return model diff --git a/ppcls/modeling/loss.py b/ppcls/modeling/loss.py new file mode 100644 index 000000000..20825f82d --- /dev/null +++ b/ppcls/modeling/loss.py @@ -0,0 +1,99 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import paddle +import paddle.fluid as fluid + +__all__ = ['CELoss', 'MixCELoss', 'GoogLeNetLoss'] + + +class Loss(object): + """ + Loss + """ + + def __init__(self, class_dim=1000, epsilon=None): + assert class_dim > 1, "class_dim=%d is not larger than 1" % (class_dim) + self._class_dim = class_dim + if epsilon and epsilon >= 0.0 and epsilon <= 1.0: + self._epsilon = epsilon + self._label_smoothing = True + else: + self._epsilon = None + self._label_smoothing = False + + def _labelsmoothing(self, target): + one_hot_target = fluid.layers.one_hot( + input=target, depth=self._class_dim) + soft_target = fluid.layers.label_smooth( + label=one_hot_target, epsilon=self._epsilon, dtype="float32") + return soft_target + + def _crossentropy(self, input, target): + if self._label_smoothing: + target = self._labelsmoothing(target) + softmax_out = fluid.layers.softmax(input, use_cudnn=False) + cost = fluid.layers.cross_entropy( + input=softmax_out, label=target, soft_label=self._label_smoothing) + avg_cost = fluid.layers.mean(cost) + return avg_cost + + def __call__(self, input, target): + pass + + +class CELoss(Loss): + """ + Cross entropy loss + """ + + def __init__(self, class_dim=1000, epsilon=None): + super(CELoss, self).__init__(class_dim, epsilon) + + def __call__(self, input, target): + cost = self._crossentropy(input, target) + return cost + + +class MixCELoss(Loss): + """ + Cross entropy loss with mix(mixup, cutmix, fixmix) + """ + + def __init__(self, class_dim=1000, epsilon=None): + super(MixCELoss, self).__init__(class_dim, epsilon) + + def __call__(self, input, target0, target1, lam): + cost0 = self._crossentropy(input, target0) + cost1 = self._crossentropy(input, target1) + cost = lam * cost0 + (1.0 - lam) * cost1 + avg_cost = fluid.layers.mean(cost) + return avg_cost + + +class GoogLeNetLoss(Loss): + """ + Cross entropy loss used after googlenet + """ + + def __init__(self, class_dim=1000, epsilon=None): + super(GoogLeNetLoss, self).__init__(class_dim, epsilon) + + def __call__(self, input0, input1, input2, target): + cost0 = self._crossentropy(input0, target) + cost1 = self._crossentropy(input1, target) + cost2 = self._crossentropy(input2, target) + cost = cost0 + 0.3 * cost1 + 0.3 * cost2 + avg_cost = fluid.layers.mean(cost) + return avg_cost diff --git a/ppcls/modeling/utils.py b/ppcls/modeling/utils.py new file mode 100644 index 000000000..b239b9e41 --- /dev/null +++ b/ppcls/modeling/utils.py @@ -0,0 +1,43 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import types +import architectures +from difflib import SequenceMatcher + + +def get_architectures(): + """ + get all of model architectures + """ + names = [] + for k, v in architectures.__dict__.items(): + if isinstance(v, (types.FunctionType, types.ClassType)): + names.append(k) + return names + + +def similar_architectures(name='', thresh=0.1, topk=10): + """ + inferred similar architectures + """ + scores = [] + names = get_architectures() + for idx, n in enumerate(names): + if n[:2] == '__': continue + score = SequenceMatcher(None, n.lower(), name.lower()).quick_ratio() + if score > thresh: scores.append((idx, score)) + scores.sort(key=lambda x: x[1], reverse=True) + similar_names = [names[s[0]] for s in scores[:min(topk, len(scores))]] + return similar_names diff --git a/ppcls/optimizer/__init__.py b/ppcls/optimizer/__init__.py new file mode 100644 index 000000000..9a192d9a1 --- /dev/null +++ b/ppcls/optimizer/__init__.py @@ -0,0 +1,19 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import optimizer +from . import learning_rate + +from optimizer import OptimizerBuilder +from learning_rate import LearningRateBuilder diff --git a/ppcls/optimizer/learning_rate.py b/ppcls/optimizer/learning_rate.py new file mode 100644 index 000000000..197f8af14 --- /dev/null +++ b/ppcls/optimizer/learning_rate.py @@ -0,0 +1,169 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import sys +import math + +import paddle.fluid as fluid +import paddle.fluid.layers.ops as ops +from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter + +__all__ = ['LearningRateBuilder'] + + +class Linear(object): + """ + Linear learning rate decay + + Args: + lr(float): initial learning rate + steps(int): total decay steps + end_lr(float): end learning rate, default: 0.0. + """ + + def __init__(self, lr, steps, end_lr=0.0, **kwargs): + super(Linear, self).__init__() + self.lr = lr + self.steps = steps + self.end_lr = end_lr + + def __call__(self): + learning_rate = fluid.layers.polynomial_decay( + self.lr, self.steps, self.end_lr, power=1) + return learning_rate + + +class Cosine(object): + """ + Cosine learning rate decay + lr = 0.05 * (math.cos(epoch * (math.pi / epochs)) + 1) + + Args: + lr(float): initial learning rate + step_each_epoch(int): steps each epoch + epochs(int): total training epochs + """ + + def __init__(self, lr, step_each_epoch, epochs, **kwargs): + super(Cosine, self).__init__() + self.lr = lr + self.step_each_epoch = step_each_epoch + self.epochs = epochs + + def __call__(self): + learning_rate = fluid.layers.cosine_decay( + learning_rate=self.lr, + step_each_epoch=self.step_each_epoch, + epochs=self.epochs) + return learning_rate + + +class Piecewise(object): + """ + Piecewise learning rate decay + + Args: + lr(float): initial learning rate + step_each_epoch(int): steps each epoch + decay_epochs(list): piecewise decay epochs + gamma(float): decay factor + """ + + def __init__(self, lr, step_each_epoch, decay_epochs, gamma=0.1, **kwargs): + super(Piecewise, self).__init__() + self.bd = [step_each_epoch * e for e in decay_epochs] + self.lr = [lr * (gamma**i) for i in range(len(self.bd) + 1)] + + def __call__(self): + learning_rate = fluid.layers.piecewise_decay(self.bd, self.lr) + return learning_rate + + +class CosineWarmup(object): + """ + Cosine learning rate decay with warmup + [0, warmup_epoch): linear warmup + [warmup_epoch, epochs): cosine decay + + Args: + lr(float): initial learning rate + step_each_epoch(int): steps each epoch + epochs(int): total training epochs + warmup_epoch(int): epoch num of warmup + """ + + def __init__(self, lr, step_each_epoch, epochs, warmup_epoch=5, **kwargs): + super(CosineWarmup, self).__init__() + self.lr = lr + self.step_each_epoch = step_each_epoch + self.epochs = epochs + self.warmup_epoch = fluid.layers.fill_constant( + shape=[1], + value=float(warmup_epoch), + dtype='float32', + force_cpu=True) + + def __call__(self): + global_step = _decay_step_counter() + learning_rate = fluid.layers.tensor.create_global_var( + shape=[1], + value=0.0, + dtype='float32', + persistable=True, + name="learning_rate") + epoch = ops.floor(global_step / self.step_each_epoch) + with fluid.layers.control_flow.Switch() as switch: + with switch.case(epoch < self.warmup_epoch): + decayed_lr = self.lr * \ + (global_step / (self.step_each_epoch * self.warmup_epoch)) + fluid.layers.tensor.assign( + input=decayed_lr, output=learning_rate) + with switch.default(): + current_step = global_step - self.warmup_epoch * self.step_each_epoch + total_step = ( + self.epochs - self.warmup_epoch) * self.step_each_epoch + decayed_lr = self.lr * \ + (ops.cos(current_step * math.pi / total_step) + 1) / 2 + fluid.layers.tensor.assign( + input=decayed_lr, output=learning_rate) + + return learning_rate + + +class LearningRateBuilder(): + """ + Build learning rate variable + https://www.paddlepaddle.org.cn/documentation/docs/zh/api_cn/layers_cn.html + + Args: + function(str): class name of learning rate + params(dict): parameters used for init the class + """ + + def __init__(self, + function='Linear', + params={'lr': 0.1, + 'steps': 100, + 'end_lr': 0.0}): + self.function = function + self.params = params + + def __call__(self): + mod = sys.modules[__name__] + lr = getattr(mod, self.function)(**self.params)() + return lr diff --git a/ppcls/optimizer/optimizer.py b/ppcls/optimizer/optimizer.py new file mode 100644 index 000000000..707fbe9bf --- /dev/null +++ b/ppcls/optimizer/optimizer.py @@ -0,0 +1,53 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import paddle.fluid.optimizer as pfopt +import paddle.fluid.regularizer as pfreg + +__all__ = ['OptimizerBuilder'] + + +class OptimizerBuilder(object): + """ + Build optimizer with fluid api in fluid.layers.optimizer, + such as fluid.layers.optimizer.Momentum() + https://www.paddlepaddle.org.cn/documentation/docs/zh/api_cn/optimizer_cn.html + https://www.paddlepaddle.org.cn/documentation/docs/zh/api_cn/regularizer_cn.html + + Args: + function(str): optimizer name of learning rate + params(dict): parameters used for init the class + regularizer (dict): parameters used for create regularization + """ + + def __init__(self, + function='Momentum', + params={'momentum': 0.9}, + regularizer=None): + self.function = function + self.params = params + # create regularizer + if regularizer is not None: + reg_func = regularizer['function'] + 'Decay' + reg_factor = regularizer['factor'] + reg = getattr(pfreg, reg_func)(reg_factor) + self.params['regularization'] = reg + + def __call__(self, learning_rate): + opt = getattr(pfopt, self.function) + return opt(learning_rate=learning_rate, **self.params) diff --git a/ppcls/test/demo.jpeg b/ppcls/test/demo.jpeg new file mode 100644 index 000000000..fd91ab95e Binary files /dev/null and b/ppcls/test/demo.jpeg differ diff --git a/ppcls/test/test_download.py b/ppcls/test/test_download.py new file mode 100644 index 000000000..465744493 --- /dev/null +++ b/ppcls/test/test_download.py @@ -0,0 +1,39 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import dl +import os +import shutil + +url = "https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar" + + +class DownloadDecompressTestCase(unittest.TestCase): + def setUp(self): + print("Test Download and Decompress Function...") + + def test_decompress(self): + if os.path.exists('./ResNet50_vd_pretrained'): + shutil.rmtree('./ResNet50_vd_pretrained') + if os.path.exists("./ResNet50_vd_pretrained.tar"): + shutil.rmtree("./ResNet50_vd_pretrained.tar") + + dl.decompress(dl.download(url, "./")) + self.assertTrue(os.path.exists("./ResNet50_vd_pretrained")) + shutil.rmtree('./ResNet50_vd_pretrained') + + +if __name__ == "__main__": + unittest.main() diff --git a/ppcls/test/test_imaug.py b/ppcls/test/test_imaug.py new file mode 100644 index 000000000..e11e3a98d --- /dev/null +++ b/ppcls/test/test_imaug.py @@ -0,0 +1,271 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ppcls.data.imaug import DecodeImage +from ppcls.data.imaug import ResizeImage +from ppcls.data.imaug import RandCropImage +from ppcls.data.imaug import RandFlipImage +from ppcls.data.imaug import NormalizeImage +from ppcls.data.imaug import ToCHWImage + +from ppcls.data.imaug import ImageNetPolicy +from ppcls.data.imaug import RandAugment +from ppcls.data.imaug import Cutout + +from ppcls.data.imaug import HideAndSeek +from ppcls.data.imaug import RandomErasing +from ppcls.data.imaug import GridMask + +from ppcls.data.imaug import MixupOperator +from ppcls.data.imaug import CutmixOperator +from ppcls.data.imaug import FmixOperator + +from ppcls.data.imaug import transform + +import numpy as np + +fname = './test/demo.jpeg' +size = 224 +img_mean = [0.485, 0.456, 0.406] +img_std = [0.229, 0.224, 0.225] +img_scale = 1.0 / 255.0 + +decode_op = DecodeImage() +randcrop_op = RandCropImage(size=(size, size)) +randflip_op = RandFlipImage(flip_code=1) + +normalize_op = NormalizeImage( + scale=img_scale, mean=img_mean, std=img_std, order='') +tochw_op = ToCHWImage() + +data = open(fname).read() + + +def print_function_name(func): + """ print function name""" + + def wrapper(*args, **kwargs): + """ wrapper """ + print("========Test Fuction: [%s]:" % (func.__name__)) + func(*args, **kwargs) + print("========Test Fuction: [%s] done!\n" % (func.__name__)) + + return wrapper + + +@print_function_name +def test_decode(): + """ test decode operator """ + img = decode_op(data) + print('img shape is %s' % (str(img.shape))) + + +@print_function_name +def test_randcrop(): + """ test randcrop operator """ + img = decode_op(data) + img = randcrop_op(img) + assert img.shape == (size, size, 3), \ + 'image shape[%s] should be equal to [%s]' % (img.shape, (size, size, 3)) + + +@print_function_name +def test_randflip(): + """ test randflip operator """ + import cv2 + img = transform(data, [decode_op, randcrop_op]) + for i in xrange(10): + flip_img = randflip_op(img) + if np.array_equal(cv2.flip(img, 1), flip_img): + break + assert np.array_equal(cv2.flip(img, 1), + flip_img), 'you should check randcrop operator' + + +@print_function_name +def test_normalize(): + """ test normalize operator """ + img = transform(data, [decode_op, randcrop_op]) + + norm_img = normalize_op(img) + assert norm_img.dtype == np.float32, 'img.dtype should be float32 after normalizing' + assert norm_img.shape == (size, size, 3), \ + 'image shape[%s] should be equal to [%s]' % (norm_img.shape, (size, size, 3)) + print('max value of the img after normalizing is : %f' % + (np.max(norm_img.flatten()))) + print('min value of the img after normalizing is : %f' % + (np.min(norm_img.flatten()))) + + +@print_function_name +def test_tochw(): + """ test tochw operator """ + img = transform(data, [decode_op, randcrop_op, randflip_op, normalize_op]) + + tochw_img = tochw_op(img) + assert tochw_img.dtype == np.float32, 'img.dtype should be float32 after tochw' + assert tochw_img.shape == (3, size, size), \ + 'image shape[%s] should be equal to [%s]' % (tochw_img.shape, (3, size, size)) + + +@print_function_name +def test_autoaugment(): + """ test autoaugment operator """ + from PIL import Image + autoaugment_op = ImageNetPolicy() + img = transform(data, [decode_op, randcrop_op]) + + aa_img = autoaugment_op(img) + assert aa_img.dtype == np.uint8, 'img.dtype should be uint8 after autoaugment' + assert aa_img.shape == (size, size, 3), \ + 'image shape[%s] should be equal to [%s]' % (aa_img.shape, (size, size, 3)) + + +@print_function_name +def test_randaugment(): + """ test randaugment operator """ + from PIL import Image + randaugment_op = RandAugment(3, 1) + img = transform(data, [decode_op, randcrop_op]) + + ra_img = randaugment_op(img) + assert ra_img.dtype == np.uint8, 'img.dtype should be uint8 after randaugment' + assert ra_img.shape == (size, size, 3), \ + 'image shape[%s] should be equal to [%s]' % (ra_img.shape, (size, size, 3)) + + +@print_function_name +def test_cutout(): + """ test cutout operator """ + cutout_op = Cutout() + img = transform(data, [decode_op, randcrop_op]) + + cutout_img = cutout_op(img) + assert cutout_img.dtype == np.uint8, 'img.dtype should be uint8 after cutout' + assert cutout_img.shape == (size, size, 3), \ + 'image shape[%s] should be equal to [%s]' % (cutout_img.shape, (size, size, 3)) + + +@print_function_name +def test_hideandseek(): + """ test hide and seek operator """ + img = transform( + data, [decode_op, randcrop_op, randflip_op, normalize_op, tochw_op]) + + hide_and_seek_op = HideAndSeek() + hs_img = hide_and_seek_op(img) + assert hs_img.dtype == np.float32, 'img.dtype should be float32 after hide and seek' + assert hs_img.shape == (3, size, size), \ + 'image shape[%s] should be equal to [%s]' % (hs_img.shape, (3, size, size)) + + +@print_function_name +def test_randerasing(): + """ test randerasing operator """ + img = transform( + data, [decode_op, randcrop_op, randflip_op, normalize_op, tochw_op]) + + randomerasing_op = RandomErasing() + re_img = randomerasing_op(img) + assert re_img.dtype == np.float32, 'img.dtype should be float32 after randomerasing' + assert re_img.shape == (3, size, size), \ + 'image shape[%s] should be equal to [%s]' % (re_img.shape, (3, size, size)) + + +@print_function_name +def test_gridmask(): + """ test gridmask operator """ + img = transform( + data, [decode_op, randcrop_op, randflip_op, normalize_op, tochw_op]) + + gridmask_op = GridMask( + d1=96, d2=224, rotate=360, ratio=0.6, mode=1, prob=0.8) + gm_img = gridmask_op(img) + assert gm_img.dtype == np.float32, 'img.dtype should be float32 after gridmask' + assert gm_img.shape == (3, size, size), \ + 'image shape[%s] should be equal to [%s]' % (gr_img.shape, (3, size, size)) + + +def generate_batch(batch_size=32): + """ generate_batch """ + import random + ops = [decode_op, randcrop_op, randflip_op, normalize_op, tochw_op] + batch = [(transform(data, ops), random.randint(0, 1000)) + for i in xrange(batch_size)] + return batch + + +def test_batch_operator(operator, batch_size): + """ test batch operator """ + batch = generate_batch(batch_size) + assert len(batch) == batch_size, \ + 'num of samples not equal to batch_size: %d != %d' % (len(batch), batch_size) + + assert len(batch[0]) == 2, \ + 'length of sample not equal to 2: %d != 2' % (len(batch[0])) + + import time + tic = time.time() + new_batch = operator(batch) + cost = time.time() - tic + print("operator cost: %.4fms" % (cost * 1000)) + + assert len(batch) == len(new_batch), \ + 'num of samples not equal: %d != %d' % (len(batch), len(new_batch)) + assert len(new_batch[0]) == 4, \ + 'length of sample not equal to 4: %d != 4' % (len(new_batch[0])) + + +@print_function_name +def test_mixup(): + """ test mixup operator """ + batch_size = 32 + mixup_op = MixupOperator(alpha=0.2) + test_batch_operator(mixup_op, batch_size) + + +@print_function_name +def test_cutmix(): + """ test cutmix operator """ + batch_size = 32 + cutmix_op = CutmixOperator(alpha=0.2) + test_batch_operator(cutmix_op, batch_size) + + +@print_function_name +def test_fmix(): + """ test fmix operator """ + batch_size = 32 + fmix_op = FmixOperator() + test_batch_operator(fmix_op, batch_size) + + +if __name__ == '__main__': + test_decode() + test_randcrop() + test_randflip() + test_normalize() + test_tochw() + + test_autoaugment() + test_randaugment() + test_cutout() + + test_hideandseek() + test_randerasing() + test_gridmask() + + test_mixup() + test_cutmix() + test_fmix() diff --git a/ppcls/test/test_super_reader.py b/ppcls/test/test_super_reader.py new file mode 100644 index 000000000..3e7ca2be3 --- /dev/null +++ b/ppcls/test/test_super_reader.py @@ -0,0 +1,116 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ppcls.data.imaug import DecodeImage +from ppcls.data.imaug import RandCropImage +from ppcls.data.imaug import RandFlipImage +from ppcls.data.imaug import NormalizeImage +from ppcls.data.imaug import ToCHWImage + +from ppcls.data.imaug import ImageNetPolicy +from ppcls.data.imaug import RandAugment +from ppcls.data.imaug import Cutout + +from ppcls.data.imaug import HideAndSeek +from ppcls.data.imaug import RandomErasing +from ppcls.data.imaug import GridMask + +from ppcls.data.imaug import MixupOperator +from ppcls.data.imaug import CutmixOperator +from ppcls.data.imaug import FmixOperator + +from ppcls.data.imaug import transform + +import numpy as np + +fname = './test/demo.jpeg' +size = 224 +img_mean = [0.485, 0.456, 0.406] +img_std = [0.229, 0.224, 0.225] +img_scale = 1.0 / 255.0 + +# normal_ops_1 +decode_op = DecodeImage() +randcrop_op = RandCropImage(size=(size, size)) + +# trans_ops +autoaugment_op = ImageNetPolicy() +randaugment_op = RandAugment(3, 1) +cutout_op = Cutout() + +# normal_ops_2 +randflip_op = RandFlipImage(flip_code=1) +normalize_op = NormalizeImage( + scale=img_scale, mean=img_mean, std=img_std, order='') +tochw_op = ToCHWImage() + +# mask_ops +hide_and_seek_op = HideAndSeek() +randomerasing_op = RandomErasing() +gridmask_op = GridMask(d1=96, d2=224, rotate=360, ratio=0.6, mode=1, prob=0.8) + +# batch_ops +mixup_op = MixupOperator(alpha=0.2) +cutmix_op = CutmixOperator(alpha=0.2) +fmix_op = FmixOperator() + + +def fakereader(): + """ fake reader """ + import random + data = open(fname).read() + + def wrapper(): + while True: + yield (data, random.randint(0, 1000)) + + return wrapper + + +def superreader(batch_size=32): + """ super reader """ + normal_ops_1 = [decode_op, randcrop_op] + normal_ops_2 = [randflip_op, normalize_op, tochw_op] + + trans_ops = [autoaugment_op, randaugment_op, cutout_op] + trans_ops_p = [0.2, 0.3, 0.5] + mask_ops = [hide_and_seek_op, randomerasing_op, gridmask_op] + mask_ops_p = [0.1, 0.6, 0.3] + batch_ops = [mixup_op, cutmix_op, fmix_op] + batch_ops_p = [0.3, 0.3, 0.4] + + reader = fakereader() + + def wrapper(): + batch = [] + for idx, sample in enumerate(reader()): + img, label = sample + ops = normal_ops_1 + [np.random.choice(trans_ops, p=trans_ops_p)] +\ + normal_ops_2 + [np.random.choice(mask_ops, p=mask_ops_p)] + img = transform(img, ops) + batch.append((img, label)) + if (idx + 1) % batch_size == 0: + batch = transform( + batch, [np.random.choice( + batch_ops, p=batch_ops_p)]) + yield batch + batch = [] + + return wrapper + + +if __name__ == '__main__': + reader = superreader(32) + for batch in reader(): + print(len(batch), len(batch[0]), batch[0][0].shape, batch[0][1:]) diff --git a/ppcls/utils/__init__.py b/ppcls/utils/__init__.py new file mode 100644 index 000000000..3df1283ae --- /dev/null +++ b/ppcls/utils/__init__.py @@ -0,0 +1,22 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import environment +from . import model_zoo +from . import misc +from . import logger + +from .save_load import init_model, save_model +from .config import get_config +from .misc import AverageMeter diff --git a/ppcls/utils/check.py b/ppcls/utils/check.py new file mode 100644 index 000000000..3c3eadb42 --- /dev/null +++ b/ppcls/utils/check.py @@ -0,0 +1,129 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import sys + +import paddle.fluid as fluid + +from ppcls.modeling import similar_architectures +from ppcls.utils import logger + + +def check_version(): + """ + Log error and exit when the installed version of paddlepaddle is + not satisfied. + """ + err = "PaddlePaddle version 1.7 or higher is required, " \ + "or a suitable develop version is satisfied as well. \n" \ + "Please make sure the version is good with your code." \ + + try: + fluid.require_version('1.7.0') + except Exception as e: + logger.error(err) + sys.exit(1) + + +def check_gpu(): + """ + Log error and exit when using paddlepaddle cpu version. + """ + err = "You are using paddlepaddle cpu version! Please try to " \ + "install paddlepaddle-gpu to run model on GPU." + + try: + assert fluid.is_compiled_with_cuda() + except AssertionError: + logger.error(err) + sys.exit(1) + + +def check_architecture(architecture): + """ + check architecture and recommend similar architectures + """ + assert isinstance(architecture, str), \ + ("the type of architecture({}) should be str". format(architecture)) + similar_names = similar_architectures(architecture) + model_list = ', '.join(similar_names) + err = "{} is not exist! Maybe you want: [{}]" \ + "".format(architecture, model_list) + + try: + assert architecture in similar_names + except AssertionError: + logger.error(err) + sys.exit(1) + + +def check_mix(architecture, use_mix=False): + """ + check mix parameter + """ + err = "Cannot use mix processing in GoogLeNet, " \ + "please set use_mix = False." + try: + if architecture == "GoogLeNet": assert use_mix == False + except AssertionError: + logger.error(err) + sys.exit(1) + + +def check_classes_num(classes_num): + """ + check classes_num + """ + err = "classes_num({}) should be a positive integer" \ + "and larger than 1".format(classes_num) + try: + assert isinstance(classes_num, int) + assert classes_num > 1 + except AssertionError: + logger.error(err) + sys.exit(1) + + +def check_data_dir(path): + """ + check cata_dir + """ + err = "Data path is not exist, please given a right path" \ + "".format(path) + try: + assert os.isdir(path) + except AssertionError: + logger.error(err) + sys.exit(1) + + +def check_function_params(config, key): + """ + check specify config + """ + k_config = config.get(key) + assert k_config is not None, \ + ('{} is required in config'.format(key)) + + assert k_config.get('function'), \ + ('function is required {} config'.format(key)) + params = k_config.get('params') + assert params is not None, \ + ('params is required in {} config'.format(key)) + assert isinstance(params, dict), \ + ('the params in {} config should be a dict'.format(key)) diff --git a/ppcls/utils/config.py b/ppcls/utils/config.py new file mode 100644 index 000000000..98812b3c5 --- /dev/null +++ b/ppcls/utils/config.py @@ -0,0 +1,201 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import os +import yaml +from ppcls.utils import check +from ppcls.utils import logger + +__all__ = ['get_config'] + +CONFIG_SECS = ['TRAIN', 'VALID', 'OPTIMIZER', 'LEARNING_RATE'] + + +class AttrDict(dict): + def __getattr__(self, key): + return self[key] + + def __setattr__(self, key, value): + if key in self.__dict__: + self.__dict__[key] = value + else: + self[key] = value + + +def create_attr_dict(yaml_config): + from ast import literal_eval + for key, value in yaml_config.items(): + if type(value) is dict: + yaml_config[key] = value = AttrDict(value) + if isinstance(value, str): + try: + value = literal_eval(value) + except BaseException: + pass + if isinstance(value, AttrDict): + create_attr_dict(yaml_config[key]) + else: + yaml_config[key] = value + return + + +def parse_config(cfg_file): + """Load a config file into AttrDict""" + with open(cfg_file, 'r') as fopen: + yaml_config = AttrDict(yaml.load(fopen, Loader=yaml.FullLoader)) + create_attr_dict(yaml_config) + return yaml_config + + +def print_dict(d, delimiter=0): + """ + Recursively visualize a dict and + indenting acrrording by the relationship of keys. + """ + for k, v in d.items(): + if k in CONFIG_SECS: + logger.info("-" * 60) + + if isinstance(v, dict): + logger.info("{}{} : ".format(delimiter * " ", k)) + print_dict(v, delimiter + 4) + elif isinstance(v, list) and len(v) >= 1 and isinstance(v[0], dict): + logger.info("{}{} : ".format(delimiter * " ", k)) + for value in v: + print_dict(value, delimiter + 4) + else: + logger.info("{}{} : {}".format(delimiter * " ", k, v)) + + if k in CONFIG_SECS: + logger.info("-" * 60) + + +def print_config(config): + """ + visualize configs + + Arguments: + config: configs + """ + + copyright = "PaddleCLS is powered by PaddlePaddle" + ad = "https://github.com/PaddlePaddle/PaddleCLS" + + logger.info("\n" * 2) + logger.info(copyright) + logger.info(ad) + + print_dict(config) + + logger.info("-" * 60) + + +def check_config(config): + """ + Check config + """ + check.check_version() + + mode = config.get('mode', 'train') + check.check_gpu() + + architecture = config.get('architecture') + check.check_architecture(architecture) + + use_mix = config.get('use_mix') + check.check_mix(architecture, use_mix) + + classes_num = config.get('classes_num') + check.check_classes_num(classes_num) + + if mode.lower() == 'train': + check.check_function_params(config, 'LEARNING_RATE') + check.check_function_params(config, 'OPTIMIZER') + + +def override(dl, ks, v): + """ + Recursively replace dict of list + + Args: + dl(dict or list): dict or list to be replaced + ks(list): list of keys + v(str): value to be replaced + """ + + def str2num(v): + try: + return eval(v) + except Exception: + return v + + assert isinstance(dl, (list, dict)), ("{} should be a list or a dict") + assert len(ks) > 0, ('lenght of keys should larger than 0') + if isinstance(dl, list): + k = str2num(ks[0]) + if len(ks) == 1: + assert k < len(dl), ('index({}) out of range({})'.format(k, dl)) + dl[k] = str2num(v) + else: + override(dl[k], ks[1:], v) + else: + if len(ks) == 1: + assert ks[0] in dl, ('{} is not exist in {}'.format(ks[0], dl)) + dl[ks[0]] = str2num(v) + else: + override(dl[ks[0]], ks[1:], v) + + +def override_config(config, options=[]): + """ + Recursively override the config + + Args: + config(dict): dict to be replaced + options(list): list of pairs(key0.key1.idx.key2=value) + such as: [ + 'topk=2', + 'VALID.transforms.1.ResizeImage.resize_short=300' + ] + + Returns: + config(dict): replaced config + """ + for opt in options: + assert isinstance(opt, str), \ + ("option({}) should be a str".format(opt)) + assert "=" in opt, ("option({}) should contain " \ + "a = to distinguish between key and value".format(opt)) + pair = opt.split('=') + assert len(pair) == 2, ("there can be only a = in the option") + key, value = pair + keys = key.split('.') + override(config, keys, value) + + return config + + +def get_config(fname, overrides=[], show=True): + """ + Read config from file + """ + assert os.path.exists(fname), \ + ('config file({}) is not exist'.format(fname)) + config = parse_config(fname) + if show: print_config(config) + if len(overrides) > 0: + override_config(config, overrides) + print_config(config) + check_config(config) + return config diff --git a/ppcls/utils/environment.py b/ppcls/utils/environment.py new file mode 100644 index 000000000..e6061ecea --- /dev/null +++ b/ppcls/utils/environment.py @@ -0,0 +1,39 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import os +import paddle +import paddle.fluid as fluid +import paddle.fluid.framework as pff + +trainers_num = int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) +trainer_id = int(os.environ.get("PADDLE_TRAINER_ID", 0)) + + +def place(): + gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) + return fluid.CUDAPlace(gpu_id) + + +def places(): + """ + Returns available running places, the numbers are usually + indicated by 'export CUDA_VISIBLE_DEVICES= ' + Args: + """ + + if trainers_num <= 1: + return pff.cuda_places() + else: + return place() diff --git a/ppcls/utils/misc.py b/ppcls/utils/misc.py new file mode 100644 index 000000000..5cffa1f67 --- /dev/null +++ b/ppcls/utils/misc.py @@ -0,0 +1,47 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +__all__ = ['AverageMeter'] + + +class AverageMeter(object): + """ + Computes and stores the average and current value + """ + + def __init__(self, name='', fmt=':f', avg=False): + self.name = name + self.fmt = fmt + self.avg_flag = avg + self.reset() + + def reset(self): + """ reset """ + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + """ update """ + self.val = val + self.sum += val * n + self.count += n + self.avg = self.sum / self.count + + def __str__(self): + fmtstr = '[{name}: {val' + self.fmt + '}]' + if self.avg_flag: + fmtstr += '[{name}(avg): {avg' + self.fmt + '}]' + return fmtstr.format(**self.__dict__) diff --git a/ppcls/utils/model_zoo.py b/ppcls/utils/model_zoo.py new file mode 100644 index 000000000..543b2558e --- /dev/null +++ b/ppcls/utils/model_zoo.py @@ -0,0 +1,179 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import shutil +import requests +import tqdm +import tarfile +import zipfile + +from ppcls.utils.check import check_architecture +from ppcls.utils import logger + +__all__ = ['get'] + +DOWNLOAD_RETRY_LIMIT = 3 + + +class UrlError(Exception): + """ UrlError + """ + + def __init__(self, url='', code=''): + message = "Downloading from {} failed with code {}!".format(url, code) + super(UrlError, self).__init__(message) + + +class ModelNameError(Exception): + """ ModelNameError + """ + + def __init__(self, message='', architecture=''): + similar_names = similar_architectures(architecture) + model_list = ', '.join(similar_names) + message += '\n{} is not exist. \nMaybe you want: [{}]'.format( + architecture, model_list) + super(ModelNameError, self).__init__(message) + + +class RetryError(Exception): + """ RetryError + """ + + def __init__(self, url='', times=''): + message = "Download from {} failed. Retry({}) limit reached".format( + url, times) + super(RetryError, self).__init__(message) + + +def _get_url(architecture): + prefix = "https://paddle-imagenet-models-name.bj.bcebos.com/" + fname = architecture + "_pretrained.tar" + return prefix + fname + + +def _move_and_merge_tree(src, dst): + """ + Move src directory to dst, if dst is already exists, + merge src to dst + """ + if not os.path.exists(dst): + shutil.move(src, dst) + elif os.path.isfile(src): + shutil.move(src, dst) + else: + for fp in os.listdir(src): + src_fp = os.path.join(src, fp) + dst_fp = os.path.join(dst, fp) + if os.path.isdir(src_fp): + if os.path.isdir(dst_fp): + _move_and_merge_tree(src_fp, dst_fp) + else: + shutil.move(src_fp, dst_fp) + elif os.path.isfile(src_fp) and \ + not os.path.isfile(dst_fp): + shutil.move(src_fp, dst_fp) + + +def _download(url, path): + """ + Download from url, save to path. + url (str): download url + path (str): download to given path + """ + if not os.path.exists(path): + os.makedirs(path) + + fname = os.path.split(url)[-1] + fullname = os.path.join(path, fname) + retry_cnt = 0 + + while not os.path.exists(fullname): + if retry_cnt < DOWNLOAD_RETRY_LIMIT: + retry_cnt += 1 + else: + raise RetryError(url, DOWNLOAD_RETRY_LIMIT) + + logger.info("Downloading {} from {}".format(fname, url)) + + req = requests.get(url, stream=True) + if req.status_code != 200: + raise UrlError(url, req.status_code) + + # For protecting download interupted, download to + # tmp_fullname firstly, move tmp_fullname to fullname + # after download finished + tmp_fullname = fullname + "_tmp" + total_size = req.headers.get('content-length') + with open(tmp_fullname, 'wb') as f: + if total_size: + for chunk in tqdm.tqdm( + req.iter_content(chunk_size=1024), + total=(int(total_size) + 1023) // 1024, + unit='KB'): + f.write(chunk) + else: + for chunk in req.iter_content(chunk_size=1024): + if chunk: + f.write(chunk) + shutil.move(tmp_fullname, fullname) + + return fullname + + +def _decompress(fname): + """ + Decompress for zip and tar file + """ + logger.info("Decompressing {}...".format(fname)) + + # For protecting decompressing interupted, + # decompress to fpath_tmp directory firstly, if decompress + # successed, move decompress files to fpath and delete + # fpath_tmp and remove download compress file. + fpath = os.path.split(fname)[0] + fpath_tmp = os.path.join(fpath, 'tmp') + if os.path.isdir(fpath_tmp): + shutil.rmtree(fpath_tmp) + os.makedirs(fpath_tmp) + + if fname.find('tar') >= 0: + with tarfile.open(fname) as tf: + tf.extractall(path=fpath_tmp) + elif fname.find('zip') >= 0: + with zipfile.ZipFile(fname) as zf: + zf.extractall(path=fpath_tmp) + else: + raise TypeError("Unsupport compress file type {}".format(fname)) + + for f in os.listdir(fpath_tmp): + src_dir = os.path.join(fpath_tmp, f) + dst_dir = os.path.join(fpath, f) + _move_and_merge_tree(src_dir, dst_dir) + + shutil.rmtree(fpath_tmp) + os.remove(fname) + + +def get(architecture, path, decompress=True): + check_architecture(architecture) + url = _get_url(architecture) + fname = _download(url, path) + if decompress: _decompress(fname) + logger.info("download {} finished ".format(fname)) diff --git a/ppcls/utils/save_load.py b/ppcls/utils/save_load.py new file mode 100644 index 000000000..ac2593936 --- /dev/null +++ b/ppcls/utils/save_load.py @@ -0,0 +1,124 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import tempfile +import shutil + +import paddle +import paddle.fluid as fluid + +from ppcls.utils import logger + +__all__ = ['init_model', 'save_model'] + + +def _mkdir_if_not_exist(path): + """ + mkdir if not exists + """ + if not os.path.exists(os.path.join(path)): + os.makedirs(os.path.join(path)) + + +def _load_state(path): + print("path: ", path) + if os.path.exists(path + '.pdopt'): + # XXX another hack to ignore the optimizer state + tmp = tempfile.mkdtemp() + dst = os.path.join(tmp, os.path.basename(os.path.normpath(path))) + shutil.copy(path + '.pdparams', dst + '.pdparams') + state = fluid.io.load_program_state(dst) + shutil.rmtree(tmp) + else: + print("path: ", path) + state = fluid.io.load_program_state(path) + return state + + +def load_params(exe, prog, path, ignore_params=[]): + """ + Load model from the given path. + Args: + exe (fluid.Executor): The fluid.Executor object. + prog (fluid.Program): load weight to which Program object. + path (string): URL string or loca model path. + ignore_params (list): ignore variable to load when finetuning. + It can be specified by finetune_exclude_pretrained_params + and the usage can refer to docs/advanced_tutorials/TRANSFER_LEARNING.md + """ + if not (os.path.isdir(path) or os.path.exists(path + '.pdparams')): + raise ValueError("Model pretrain path {} does not " + "exists.".format(path)) + + logger.info('Loading parameters from {}...'.format(path)) + + ignore_set = set() + state = _load_state(path) + + # ignore the parameter which mismatch the shape + # between the model and pretrain weight. + all_var_shape = {} + for block in prog.blocks: + for param in block.all_parameters(): + all_var_shape[param.name] = param.shape + ignore_set.update([ + name for name, shape in all_var_shape.items() + if name in state and shape != state[name].shape + ]) + + if ignore_params: + all_var_names = [var.name for var in prog.list_vars()] + ignore_list = filter( + lambda var: any([re.match(name, var) for name in ignore_params]), + all_var_names) + ignore_set.update(list(ignore_list)) + + if len(ignore_set) > 0: + for k in ignore_set: + if k in state: + logger.warning('variable {} not used'.format(k)) + del state[k] + fluid.io.set_program_state(prog, state) + + +def init_model(config, program, exe): + """ + load model from checkpoint or pretrained_model + """ + checkpoints = config.get('checkpoints') + if checkpoints and os.path.exists(checkpoints): + fluid.load(program, checkpoints, exe) + logger.info("Finish initing model from {}".format(checkpoints)) + return + + pretrained_model = config.get('pretrained_model') + if pretrained_model and os.path.exists(pretrained_model): + load_params(exe, program, pretrained_model) + logger.info("Finish initing model from {}".format(pretrained_model)) + + +def save_model(program, model_path, epoch_id, prefix='ppcls'): + """ + save model to the target path + """ + model_path = os.path.join(model_path, str(epoch_id)) + _mkdir_if_not_exist(model_path) + model_prefix = os.path.join(model_path, prefix) + fluid.save(program, model_prefix) + logger.info("Already save model in {}".format(model_path)) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 000000000..13ad95e59 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +opencv-python +pillow +tqdm +PyYAML diff --git a/tools/download.py b/tools/download.py new file mode 100644 index 000000000..ab88fb44f --- /dev/null +++ b/tools/download.py @@ -0,0 +1,41 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import sys +import argparse + +sys.path.append("../") +from ppcls import model_zoo + + +def parse_args(): + def str2bool(v): + return v.lower() in ("true", "t", "1") + + parser = argparse.ArgumentParser() + parser.add_argument('-a', '--architecture', type=str, default='ResNet50') + parser.add_argument('-p', '--path', type=str, default='./pretrained/') + parser.add_argument('-d', '--decompress', type=str2bool, default=True) + + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + model_zoo.get(args.architecture, args.path, args.decompress) + + +if __name__ == '__main__': + main() diff --git a/tools/eval.py b/tools/eval.py new file mode 100644 index 000000000..9fae3044c --- /dev/null +++ b/tools/eval.py @@ -0,0 +1,84 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import sys +import argparse + +import paddle +import paddle.fluid as fluid + +import program + +from ppcls.data import Reader +import ppcls.utils.environment as env +from ppcls.utils.config import get_config +from ppcls.utils.save_load import init_model, save_model +from ppcls.utils import logger + +from paddle.fluid.incubate.fleet.collective import fleet +from paddle.fluid.incubate.fleet.base import role_maker + + +def parse_args(): + parser = argparse.ArgumentParser("PaddleClas eval script") + parser.add_argument( + '-c', + '--config', + type=str, + default='configs/eval.yaml', + help='config file path') + parser.add_argument( + '-o', + '--override', + action='append', + default=[], + help='config options to be overridden') + + args = parser.parse_args() + return args + + +def main(args): + role = role_maker.PaddleCloudRoleMaker(is_collective=True) + fleet.init(role) + + config = get_config(args.config, overrides=args.override, show=True) + place = env.place() + + startup_prog = fluid.Program() + valid_prog = fluid.Program() + valid_dataloader, valid_fetchs = program.build( + config, valid_prog, startup_prog, is_train=False) + valid_prog = valid_prog.clone(for_test=True) + + exe = fluid.Executor(place) + exe.run(startup_prog) + + init_model(config, valid_prog, exe) + + valid_reader = Reader(config, 'valid')() + valid_dataloader.set_sample_list_generator(valid_reader, place) + compiled_valid_prog = program.compile(config, valid_prog) + + program.run(valid_dataloader, exe, compiled_valid_prog, valid_fetchs, 0, + 'valid') + + +if __name__ == '__main__': + args = parse_args() + main(args) diff --git a/tools/export_model.py b/tools/export_model.py new file mode 100644 index 000000000..e2db8b184 --- /dev/null +++ b/tools/export_model.py @@ -0,0 +1,77 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import numpy as np + +from ppcls.modeling import architectures +import paddle.fluid as fluid + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("-m", "--model", type=str) + parser.add_argument("-p", "--pretrained_model", type=str) + parser.add_argument("-o", "--output_path", type=str) + + return parser.parse_args() + + +def create_input(): + image = fluid.data( + name='image', shape=[None, 3, 224, 224], dtype='float32') + return image + + +def create_model(args, model, input, class_dim=1000): + if args.model == "GoogLeNet": + out, _, _ = model.net(input=input, class_dim=class_dim) + else: + out = model.net(input=input, class_dim=class_dim) + out = fluid.layers.softmax(out) + return out + + +def main(): + args = parse_args() + + model = architectures.__dict__[args.model]() + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + startup_prog = fluid.Program() + infer_prog = fluid.Program() + + with fluid.program_guard(infer_prog, startup_prog): + with fluid.unique_name.guard(): + image = create_input() + out = create_model(args, model, image) + + infer_prog = infer_prog.clone(for_test=True) + fluid.load( + program=infer_prog, model_path=args.pretrained_model, executor=exe) + + fluid.io.save_inference_model( + dirname=args.output_path, + feeded_var_names=[image.name], + main_program=infer_prog, + target_vars=out, + executor=exe, + model_filename='model', + params_filename='params') + + +if __name__ == "__main__": + main() diff --git a/tools/infer/cpp_infer.py b/tools/infer/cpp_infer.py new file mode 100644 index 000000000..665dfa162 --- /dev/null +++ b/tools/infer/cpp_infer.py @@ -0,0 +1,103 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import utils +import argparse +import numpy as np + +from paddle.fluid.core import PaddleTensor +from paddle.fluid.core import AnalysisConfig +from paddle.fluid.core import create_paddle_predictor + + +def parse_args(): + def str2bool(v): + return v.lower() in ("true", "t", "1") + + parser = argparse.ArgumentParser() + parser.add_argument("-i", "--image_file", type=str) + parser.add_argument("-m", "--model_file", type=str) + parser.add_argument("-p", "--params_file", type=str) + parser.add_argument("-b", "--max_batch_size", type=int, default=1) + parser.add_argument("--use_gpu", type=str2bool, default=True) + parser.add_argument("--ir_optim", type=str2bool, default=True) + parser.add_argument("--use_tensorrt", type=str2bool, default=False) + + return parser.parse_args() + + +def create_predictor(args): + config = AnalysisConfig(args.model_file, args.params_file) + if args.use_gpu: + config.enable_use_gpu(1000, 0) + else: + config.disable_gpu() + + config.switch_ir_optim(args.ir_optim) # default true + if args.use_tensorrt: + config.enable_tensorrt_engine( + precision_mode=AnalysisConfig.Precision.Float32, + max_batch_size=args.max_batch_size) + predictor = create_paddle_predictor(config) + + return predictor + + +def create_operators(): + size = 224 + img_mean = [0.485, 0.456, 0.406] + img_std = [0.229, 0.224, 0.225] + img_scale = 1.0 / 255.0 + + decode_op = utils.DecodeImage() + resize_op = utils.ResizeImage(resize_short=256) + crop_op = utils.CropImage(size=(size, size)) + normalize_op = utils.NormalizeImage( + scale=img_scale, mean=img_mean, std=img_std) + totensor_op = utils.ToTensor() + + return [decode_op, resize_op, crop_op, normalize_op, totensor_op] + + +def preprocess(fname, ops): + data = open(fname).read() + for op in ops: + data = op(data) + + return data + + +def postprocess(outputs, topk=5): + output = outputs[0] + prob = output.as_ndarray().flatten() + index = prob.argsort(axis=0)[-topk:][::-1].astype('int32') + return zip(index, prob[index]) + + +def main(): + args = parse_args() + operators = create_operators() + predictor = create_predictor(args) + + data = preprocess(args.image_file, operators) + inputs = [PaddleTensor(data.copy())] + outputs = predictor.run(inputs) + probs = postprocess(outputs) + + for idx, prob in probs: + print("class id: {:d}, probability: {:.4f}".format(idx, prob)) + + +if __name__ == "__main__": + main() diff --git a/tools/infer/infer.py b/tools/infer/infer.py new file mode 100644 index 000000000..9a3b8fff9 --- /dev/null +++ b/tools/infer/infer.py @@ -0,0 +1,119 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import utils +import argparse +import numpy as np + +import paddle.fluid as fluid + +from ppcls.modeling import architectures + + +def parse_args(): + def str2bool(v): + return v.lower() in ("true", "t", "1") + + parser = argparse.ArgumentParser() + parser.add_argument("-i", "--image_file", type=str) + parser.add_argument("-m", "--model", type=str) + parser.add_argument("-p", "--pretrained_model", type=str) + parser.add_argument("--use_gpu", type=str2bool, default=True) + + return parser.parse_args() + + +def create_predictor(args): + def create_input(): + image = fluid.data( + name='image', shape=[None, 3, 224, 224], dtype='float32') + return image + + def create_model(args, model, input, class_dim=1000): + if args.model == "GoogLeNet": + out, _, _ = model.net(input=input, class_dim=class_dim) + else: + out = model.net(input=input, class_dim=class_dim) + out = fluid.layers.softmax(out) + return out + + model = architectures.__dict__[args.model]() + + place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() + exe = fluid.Executor(place) + + startup_prog = fluid.Program() + infer_prog = fluid.Program() + + with fluid.program_guard(infer_prog, startup_prog): + with fluid.unique_name.guard(): + image = create_input() + out = create_model(args, model, image) + + infer_prog = infer_prog.clone(for_test=True) + fluid.load( + program=infer_prog, model_path=args.pretrained_model, executor=exe) + + return exe, infer_prog, [image.name], [out.name] + + +def create_operators(): + size = 224 + img_mean = [0.485, 0.456, 0.406] + img_std = [0.229, 0.224, 0.225] + img_scale = 1.0 / 255.0 + + decode_op = utils.DecodeImage() + resize_op = utils.ResizeImage(resize_short=256) + crop_op = utils.CropImage(size=(size, size)) + normalize_op = utils.NormalizeImage( + scale=img_scale, mean=img_mean, std=img_std) + totensor_op = utils.ToTensor() + + return [decode_op, resize_op, crop_op, normalize_op, totensor_op] + + +def preprocess(fname, ops): + data = open(fname).read() + for op in ops: + data = op(data) + + return data + + +def postprocess(outputs, topk=5): + output = outputs[0] + prob = np.array(output).flatten() + index = prob.argsort(axis=0)[-topk:][::-1].astype('int32') + return zip(index, prob[index]) + + +def main(): + args = parse_args() + operators = create_operators() + exe, program, feed_names, fetch_names = create_predictor(args) + + data = preprocess(args.image_file, operators) + outputs = exe.run(program, + feed={feed_names[0]: data}, + fetch_list=fetch_names, + return_numpy=False) + probs = postprocess(outputs) + + for idx, prob in probs: + print("class id: {:d}, probability: {:.4f}".format(idx, prob)) + + +if __name__ == "__main__": + main() diff --git a/tools/infer/py_infer.py b/tools/infer/py_infer.py new file mode 100644 index 000000000..b566092bc --- /dev/null +++ b/tools/infer/py_infer.py @@ -0,0 +1,101 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import utils +import argparse +import numpy as np + +import paddle.fluid as fluid + + +def parse_args(): + def str2bool(v): + return v.lower() in ("true", "t", "1") + + parser = argparse.ArgumentParser() + parser.add_argument("-i", "--image_file", type=str) + parser.add_argument("-d", "--model_dir", type=str) + parser.add_argument("-m", "--model_file", type=str) + parser.add_argument("-p", "--params_file", type=str) + parser.add_argument("--use_gpu", type=str2bool, default=True) + + return parser.parse_args() + + +def create_predictor(args): + if args.use_gpu: + place = fluid.CUDAPlace(0) + else: + place = fluid.CPUPlace() + + exe = fluid.Executor(place) + [program, feed_names, fetch_lists] = fluid.io.load_inference_model( + args.model_dir, + exe, + model_filename=args.model_file, + params_filename=args.params_file) + compiled_program = fluid.compiler.CompiledProgram(program) + + return exe, compiled_program, feed_names, fetch_lists + + +def create_operators(): + size = 224 + img_mean = [0.485, 0.456, 0.406] + img_std = [0.229, 0.224, 0.225] + img_scale = 1.0 / 255.0 + + decode_op = utils.DecodeImage() + resize_op = utils.ResizeImage(resize_short=256) + crop_op = utils.CropImage(size=(size, size)) + normalize_op = utils.NormalizeImage( + scale=img_scale, mean=img_mean, std=img_std) + totensor_op = utils.ToTensor() + + return [decode_op, resize_op, crop_op, normalize_op, totensor_op] + + +def preprocess(fname, ops): + data = open(fname).read() + for op in ops: + data = op(data) + + return data + + +def postprocess(outputs, topk=5): + output = outputs[0] + prob = np.array(output).flatten() + index = prob.argsort(axis=0)[-topk:][::-1].astype('int32') + return zip(index, prob[index]) + + +def main(): + args = parse_args() + operators = create_operators() + exe, program, feed_names, fetch_lists = create_predictor(args) + + data = preprocess(args.image_file, operators) + outputs = exe.run(program, + feed={feed_names[0]: data}, + fetch_list=fetch_lists, + return_numpy=False) + probs = postprocess(outputs) + + for idx, prob in probs: + print("class id: {:d}, probability: {:.4f}".format(idx, prob)) + + +if __name__ == "__main__": + main() diff --git a/tools/infer/run.sh b/tools/infer/run.sh new file mode 100644 index 000000000..abed02cda --- /dev/null +++ b/tools/infer/run.sh @@ -0,0 +1,49 @@ +#!/usr/bin/env bash + +python ./cpp_infer.py \ + -i=./test.jpeg \ + -m=./resnet50-vd/model \ + -p=./resnet50-vd/params \ + --use_gpu=1 + +python ./cpp_infer.py \ + -i=./test.jpeg \ + -m=./resnet50-vd/model \ + -p=./resnet50-vd/params \ + --use_gpu=0 + +python py_infer.py \ + -i=./test.jpeg \ + -d ./resnet50-vd/ \ + -m=model -p=params \ + --use_gpu=0 + +python py_infer.py \ + -i=./test.jpeg \ + -d ./resnet50-vd/ \ + -m=model -p=params \ + --use_gpu=1 + +python infer.py \ + -i=./test.jpeg \ + -m ResNet50_vd \ + -p ./resnet50-vd-persistable/ \ + --use_gpu=0 + +python infer.py \ + -i=./test.jpeg \ + -m ResNet50_vd \ + -p ./resnet50-vd-persistable/ \ + --use_gpu=1 + +python export_model.py \ + -m ResNet50_vd \ + -p ./resnet50-vd-persistable/ \ + -o ./test/ + +python py_infer.py \ + -i=./test.jpeg \ + -d ./test/ \ + -m=model \ + -p=params \ + --use_gpu=0 diff --git a/tools/infer/utils.py b/tools/infer/utils.py new file mode 100644 index 000000000..2e65c3f30 --- /dev/null +++ b/tools/infer/utils.py @@ -0,0 +1,85 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import cv2 +import numpy as np + + +class DecodeImage(object): + def __init__(self, to_rgb=True): + self.to_rgb = to_rgb + + def __call__(self, img): + data = np.frombuffer(img, dtype='uint8') + img = cv2.imdecode(data, 1) + if self.to_rgb: + assert img.shape[2] == 3, 'invalid shape of image[%s]' % ( + img.shape) + img = img[:, :, ::-1] + + return img + + +class ResizeImage(object): + def __init__(self, resize_short=None): + self.resize_short = resize_short + + def __call__(self, img): + img_h, img_w = img.shape[:2] + percent = float(self.resize_short) / min(img_w, img_h) + w = int(round(img_w * percent)) + h = int(round(img_h * percent)) + return cv2.resize(img, (w, h)) + + +class CropImage(object): + def __init__(self, size): + if type(size) is int: + self.size = (size, size) + else: + self.size = size + + def __call__(self, img): + w, h = self.size + img_h, img_w = img.shape[:2] + w_start = (img_w - w) // 2 + h_start = (img_h - h) // 2 + + w_end = w_start + w + h_end = h_start + h + return img[h_start:h_end, w_start:w_end, :] + + +class NormalizeImage(object): + def __init__(self, scale=None, mean=None, std=None): + self.scale = np.float32(scale if scale is not None else 1.0 / 255.0) + mean = mean if mean is not None else [0.485, 0.456, 0.406] + std = std if std is not None else [0.229, 0.224, 0.225] + + shape = (1, 1, 3) + self.mean = np.array(mean).reshape(shape).astype('float32') + self.std = np.array(std).reshape(shape).astype('float32') + + def __call__(self, img): + return (img.astype('float32') * self.scale - self.mean) / self.std + + +class ToTensor(object): + def __init__(self): + pass + + def __call__(self, img): + img = img.transpose((2, 0, 1)) + img = np.expand_dims(img, axis=0) + return img diff --git a/tools/program.py b/tools/program.py new file mode 100644 index 000000000..7d799a204 --- /dev/null +++ b/tools/program.py @@ -0,0 +1,370 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import sys +import time + +from collections import OrderedDict + +import paddle +import paddle.fluid as fluid + +from ppcls.optimizer import LearningRateBuilder +from ppcls.optimizer import OptimizerBuilder + +from ppcls.modeling import architectures +from ppcls.modeling.loss import CELoss +from ppcls.modeling.loss import MixCELoss +from ppcls.modeling.loss import GoogLeNetLoss +from ppcls.utils.misc import AverageMeter +from ppcls.utils import logger + +from paddle.fluid.incubate.fleet.collective import fleet +from paddle.fluid.incubate.fleet.collective import DistributedStrategy + + +def create_feeds(image_shape, mix=None): + """ + Create feeds as model input + + Args: + image_shape(list[int]): model input shape, such as [3, 224, 224] + mix(bool): whether to use mix(include mixup, cutmix, fmix) + + Returns: + feeds(dict): dict of model input variables + """ + feeds = OrderedDict() + feeds['image'] = fluid.data( + name="feed_image", shape=[None] + image_shape, dtype="float32") + if mix: + feeds['feed_y_a'] = fluid.data( + name="feed_y_a", shape=[None, 1], dtype="int64") + feeds['feed_y_b'] = fluid.data( + name="feed_y_b", shape=[None, 1], dtype="int64") + feeds['feed_lam'] = fluid.data( + name="feed_lam", shape=[None, 1], dtype="float32") + else: + feeds['label'] = fluid.data( + name="feed_label", shape=[None, 1], dtype="int64") + + return feeds + + +def create_dataloader(feeds): + """ + Create a dataloader with model input variables + + Args: + feeds(dict): dict of model input variables + + Returns: + dataloader(fluid dataloader): + """ + trainer_num = int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) + capacity = 64 if trainer_num <= 1 else 8 + dataloader = fluid.io.DataLoader.from_generator( + feed_list=feeds, + capacity=capacity, + use_double_buffer=True, + iterable=True) + + return dataloader + + +def create_model(name, image, classes_num): + """ + Create a model + + Args: + name(str): model name, such as ResNet50 + image(variable): model input variable + classes_num(int): num of classes + + Returns: + out(variable): model output variable + """ + model = architectures.__dict__[name]() + out = model.net(input=image, class_dim=classes_num) + return out + + +def create_loss(out, + feeds, + architecture, + classes_num=1000, + epsilon=None, + mix=False): + """ + Create a loss for optimization, such as: + 1. CrossEnotry loss + 2. CrossEnotry loss with label smoothing + 3. CrossEnotry loss with mix(mixup, cutmix, fmix) + 4. CrossEnotry loss with label smoothing and (mixup, cutmix, fmix) + 5. GoogLeNet loss + + Args: + out(variable): model output variable + feeds(dict): dict of model input variables + architecture(str): model name, such as ResNet50 + classes_num(int): num of classes + epsilon(float): parameter for label smoothing, 0.0 <= epsilon <= 1.0 + mix(bool): whether to use mix(include mixup, cutmix, fmix) + + Returns: + loss(variable): loss variable + """ + if architecture == "GoogLeNet": + assert len(out) == 3, "GoogLeNet should have 3 outputs" + loss = GoogLeNetLoss(class_dim=classes_num, epsilon=epsilon) + target = feeds['label'] + return loss(out[0], out[1], out[2], target) + + if mix: + loss = MixCELoss(class_dim=classes_num, epsilon=epsilon) + feed_y_a = feeds['feed_y_a'] + feed_y_b = feeds['feed_y_b'] + feed_lam = feeds['feed_lam'] + return loss(out, feed_y_a, feed_y_b, feed_lam) + else: + loss = CELoss(class_dim=classes_num, epsilon=epsilon) + target = feeds['label'] + return loss(out, target) + + +def create_metric(out, feeds, topk=5, classes_num=1000): + """ + Create measures of model accuracy, such as top1 and top5 + + Args: + out(variable): model output variable + feeds(dict): dict of model input variables(included label) + topk(int): usually top5 + classes_num(int): num of classes + + Returns: + fetchs(dict): dict of measures + """ + fetchs = OrderedDict() + label = feeds['label'] + softmax_out = fluid.layers.softmax(out, use_cudnn=False) + top1 = fluid.layers.accuracy(softmax_out, label=label, k=1) + fetchs['top1'] = (top1, AverageMeter('top1', ':2.4f', True)) + k = min(topk, classes_num) + topk = fluid.layers.accuracy(softmax_out, label=label, k=k) + topk_name = 'top{}'.format(k) + fetchs[topk_name] = (topk, AverageMeter(topk_name, ':2.4f', True)) + + return fetchs + + +def create_fetchs(out, + feeds, + architecture, + topk=5, + classes_num=1000, + epsilon=None, + mix=False): + """ + Create fetchs as model outputs(included loss and measures), + will call create_loss and create_metric(if mix). + + Args: + out(variable): model output variable + feeds(dict): dict of model input variables(included label) + architecture(str): model name, such as ResNet50 + topk(int): usually top5 + classes_num(int): num of classes + epsilon(float): parameter for label smoothing, 0.0 <= epsilon <= 1.0 + mix(bool): whether to use mix(include mixup, cutmix, fmix) + + Returns: + fetchs(dict): dict of model outputs(included loss and measures) + """ + fetchs = OrderedDict() + loss = create_loss(out, feeds, architecture, classes_num, epsilon, mix) + fetchs['loss'] = (loss, AverageMeter('loss', ':2.4f', True)) + if not mix: + metric = create_metric(out, feeds, topk, classes_num) + fetchs.update(metric) + + return fetchs + + +def create_optimizer(config): + """ + Create an optimizer using config, usually including + learning rate and regularization. + + Args: + config(dict): such as + { + 'LEARNING_RATE': + {'function': 'Cosine', + 'params': {'lr': 0.1} + }, + 'OPTIMIZER': + {'function': 'Momentum', + 'params':{'momentum': 0.9}, + 'regularizer': + {'function': 'L2', 'factor': 0.0001} + } + } + + Returns: + an optimizer instance + """ + # create learning_rate instance + lr_config = config['LEARNING_RATE'] + lr_config['params'].update({ + 'epochs': config['epochs'], + 'step_each_epoch': + config['total_images'] // config['TRAIN']['batch_size'], + }) + lr = LearningRateBuilder(**lr_config)() + + # create optimizer instance + opt_config = config['OPTIMIZER'] + opt = OptimizerBuilder(**opt_config) + return opt(lr) + + +def dist_optimizer(config, optimizer): + """ + Create a distributed optimizer based on a normal optimizer + + Args: + config(dict): + optimizer(): a normal optimizer + + Returns: + optimizer: a distributed optimizer + """ + exec_strategy = fluid.ExecutionStrategy() + exec_strategy.num_threads = 3 + exec_strategy.num_iteration_per_drop_scope = 10 + + dist_strategy = DistributedStrategy() + dist_strategy.nccl_comm_num = 1 + dist_strategy.fuse_all_reduce_ops = True + dist_strategy.exec_strategy = exec_strategy + optimizer = fleet.distributed_optimizer(optimizer, strategy=dist_strategy) + + return optimizer + + +def build(config, main_prog, startup_prog, is_train=True): + """ + Build a program using a model and an optimizer + 1. create feeds + 2. create a dataloader + 3. create a model + 4. create fetchs + 5. create an optimizer + + Args: + config(dict): config + main_prog(): main program + startup_prog(): startup program + is_train(bool): train or valid + + Returns: + dataloader(): a bridge between the model and the data + fetchs(dict): dict of model outputs(included loss and measures) + """ + with fluid.program_guard(main_prog, startup_prog): + with fluid.unique_name.guard(): + use_mix = config.get('use_mix') and is_train + feeds = create_feeds(config.image_shape, mix=use_mix) + dataloader = create_dataloader(feeds.values()) + out = create_model(config.architecture, feeds['image'], + config.classes_num) + fetchs = create_fetchs( + out, + feeds, + config.architecture, + config.topk, + config.classes_num, + epsilon=config.get('ls_epsilon'), + mix=use_mix) + if is_train: + optimizer = create_optimizer(config) + lr = optimizer._global_learning_rate() + fetchs['lr'] = (lr, AverageMeter('lr', ':f', False)) + optimizer = dist_optimizer(config, optimizer) + optimizer.minimize(fetchs['loss'][0]) + + return dataloader, fetchs + + +def compile(config, program, loss_name=None): + """ + Compile the program + + Args: + config(dict): config + program(): the program which is wrapped by + loss_name(str): loss name + + Returns: + compiled_program(): a compiled program + """ + build_strategy = fluid.compiler.BuildStrategy() + #build_strategy.fuse_bn_act_ops = config.get("fuse_bn_act_ops") + #build_strategy.fuse_elewise_add_act_ops = config.get("fuse_elewise_add_act_ops") + exec_strategy = fluid.ExecutionStrategy() + + exec_strategy.num_threads = 1 + exec_strategy.num_iteration_per_drop_scope = 10 + + compiled_program = fluid.CompiledProgram(program).with_data_parallel( + loss_name=loss_name, + build_strategy=build_strategy, + exec_strategy=exec_strategy) + + return compiled_program + + +def run(dataloader, exe, program, fetchs, epoch=0, mode='train'): + """ + Feed data to the model and fetch the measures and loss + + Args: + dataloader(fluid dataloader): + exe(): + program(): + fetchs(dict): dict of measures and the loss + epoch(int): epoch of training or validation + model(str): log only + + Returns: + """ + fetch_list = [f[0] for f in fetchs.values()] + metric_list = [f[1] for f in fetchs.values()] + batch_time = AverageMeter('cost', ':6.3f') + tic = time.time() + for idx, batch in enumerate(dataloader()): + metrics = exe.run(program=program, feed=batch, fetch_list=fetch_list) + batch_time.update(time.time() - tic) + tic = time.time() + for i, m in enumerate(metrics): + metric_list[i].update(m[0], len(batch[0])) + fetchs_str = ''.join([str(m) for m in metric_list] + [str(batch_time)]) + logger.info("[epoch:%3d][%s][step:%4d]%s" % + (epoch, mode, idx, fetchs_str)) diff --git a/tools/run.sh b/tools/run.sh new file mode 100755 index 000000000..d718a3873 --- /dev/null +++ b/tools/run.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash + +export PYTHONPATH=$(dirname "$PWD"):$PWD:$PYTHONPATH + +#python download.py -a ResNet181 -p ./pretrained/ -d 1 + +#python download.py -a ResNet18 -p ./pretrained/ -d 1 + +#python download.py -a ResNet34 -p ./pretrained/ -d 0 + +#python -m paddle.distributed.launch --selected_gpus="0,1,2,3" --log_dir=mylog tools/train.py + +#python -m paddle.distributed.launch --selected_gpus="0,1,2,3" --log_dir=mylog ./eval.py + +python -m paddle.distributed.launch \ + --selected_gpus="0,1,2,3" \ + --log_dir=mylog \ + tools/train.py \ + -c configs/ResNet/ResNet50_vd.yaml \ + -o use_mix=0 \ + -o TRAIN.batch_size=128 \ + -o TRAIN.transforms.3.NormalizeImage.mean.2=0.4 diff --git a/tools/train.py b/tools/train.py new file mode 100644 index 000000000..3a5afd6ce --- /dev/null +++ b/tools/train.py @@ -0,0 +1,105 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import os +import sys + +import paddle +import paddle.fluid as fluid + +import program + +from ppcls.data import Reader +import ppcls.utils.environment as env +from ppcls.utils.config import get_config +from ppcls.utils.save_load import init_model, save_model +from ppcls.utils import logger + +from paddle.fluid.incubate.fleet.collective import fleet +from paddle.fluid.incubate.fleet.base import role_maker + + +def parse_args(): + parser = argparse.ArgumentParser("PaddleClas train script") + parser.add_argument( + '-c', + '--config', + type=str, + default='configs/ResNet/ResNet18_vd.yaml', + help='config file path') + parser.add_argument( + '-o', + '--override', + action='append', + default=[], + help='config options to be overridden') + args = parser.parse_args() + return args + + +def main(args): + role = role_maker.PaddleCloudRoleMaker(is_collective=True) + fleet.init(role) + + config = get_config(args.config, overrides=args.override, show=True) + place = env.place() + + startup_prog = fluid.Program() + train_prog = fluid.Program() + + train_dataloader, train_fetchs = program.build( + config, train_prog, startup_prog, is_train=True) + + if config.validate: + valid_prog = fluid.Program() + valid_dataloader, valid_fetchs = program.build( + config, valid_prog, startup_prog, is_train=False) + valid_prog = valid_prog.clone(for_test=True) + + exe = fluid.Executor(place) + exe.run(startup_prog) + + init_model(config, train_prog, exe) + + train_reader = Reader(config, 'train')() + train_dataloader.set_sample_list_generator(train_reader, place) + + if config.validate: + valid_reader = Reader(config, 'valid')() + valid_dataloader.set_sample_list_generator(valid_reader, place) + compiled_valid_prog = program.compile(config, valid_prog) + + compiled_train_prog = fleet.main_program + for epoch_id in range(config.epochs): + program.run(train_dataloader, exe, compiled_train_prog, train_fetchs, + epoch_id, 'train') + + if config.validate and epoch_id % config.valid_interval == 0: + program.run(valid_dataloader, exe, compiled_valid_prog, + valid_fetchs, epoch_id, 'valid') + + if epoch_id % config.save_interval == 0: + model_path = os.path.join(config.model_save_dir, + config.architecture) + save_model(train_prog, model_path, epoch_id) + + +if __name__ == '__main__': + args = parse_args() + main(args)