add MobileViTv3
parent
df31d808fc
commit
dc4fdba0ab
|
@ -78,6 +78,7 @@ from .model_zoo.cae import cae_base_patch16_224, cae_large_patch16_224
|
||||||
from .model_zoo.cvt import CvT_13_224, CvT_13_384, CvT_21_224, CvT_21_384, CvT_W24_384
|
from .model_zoo.cvt import CvT_13_224, CvT_13_384, CvT_21_224, CvT_21_384, CvT_W24_384
|
||||||
from .model_zoo.micronet import MicroNet_M0, MicroNet_M1, MicroNet_M2, MicroNet_M3
|
from .model_zoo.micronet import MicroNet_M0, MicroNet_M1, MicroNet_M2, MicroNet_M3
|
||||||
from .model_zoo.mobilenext import MobileNeXt_x0_35, MobileNeXt_x0_5, MobileNeXt_x0_75, MobileNeXt_x1_0, MobileNeXt_x1_4
|
from .model_zoo.mobilenext import MobileNeXt_x0_35, MobileNeXt_x0_5, MobileNeXt_x0_75, MobileNeXt_x1_0, MobileNeXt_x1_4
|
||||||
|
from .model_zoo.mobilevit_v3 import MobileViTv3_XXS, MobileViTv3_XS, MobileViTv3_S, MobileViTv3_x0_5, MobileViTv3_x0_75, MobileViTv3_x1_0
|
||||||
|
|
||||||
from .variant_models.resnet_variant import ResNet50_last_stage_stride1
|
from .variant_models.resnet_variant import ResNet50_last_stage_stride1
|
||||||
from .variant_models.resnet_variant import ResNet50_adaptive_max_pool2d
|
from .variant_models.resnet_variant import ResNet50_adaptive_max_pool2d
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,152 @@
|
||||||
|
# global configs
|
||||||
|
Global:
|
||||||
|
checkpoints: null
|
||||||
|
pretrained_model: null
|
||||||
|
output_dir: ./output/
|
||||||
|
device: gpu
|
||||||
|
save_interval: 1
|
||||||
|
eval_during_train: True
|
||||||
|
eval_interval: 1
|
||||||
|
epochs: 300
|
||||||
|
print_batch_step: 10
|
||||||
|
use_visualdl: False
|
||||||
|
# used for static mode and model export
|
||||||
|
image_shape: [3, 256, 256]
|
||||||
|
save_inference_dir: ./inference
|
||||||
|
use_dali: False
|
||||||
|
|
||||||
|
# mixed precision training
|
||||||
|
AMP:
|
||||||
|
scale_loss: 65536
|
||||||
|
use_dynamic_loss_scaling: True
|
||||||
|
# O1: mixed fp16
|
||||||
|
level: O1
|
||||||
|
|
||||||
|
# model ema
|
||||||
|
EMA:
|
||||||
|
decay: 0.9995
|
||||||
|
|
||||||
|
# model architecture
|
||||||
|
Arch:
|
||||||
|
name: MobileViTv3_S
|
||||||
|
class_num: 1000
|
||||||
|
dropout: 0.1
|
||||||
|
|
||||||
|
# loss function config for traing/eval process
|
||||||
|
Loss:
|
||||||
|
Train:
|
||||||
|
- CELoss:
|
||||||
|
weight: 1.0
|
||||||
|
epsilon: 0.1
|
||||||
|
Eval:
|
||||||
|
- CELoss:
|
||||||
|
weight: 1.0
|
||||||
|
|
||||||
|
Optimizer:
|
||||||
|
name: AdamW
|
||||||
|
beta1: 0.9
|
||||||
|
beta2: 0.999
|
||||||
|
epsilon: 1e-8
|
||||||
|
weight_decay: 0.01
|
||||||
|
lr:
|
||||||
|
# for 8 cards
|
||||||
|
name: Cosine
|
||||||
|
learning_rate: 0.002
|
||||||
|
eta_min: 0.0002
|
||||||
|
warmup_epoch: 1 # 3000 iterations
|
||||||
|
warmup_start_lr: 0.0002
|
||||||
|
# by_epoch: True
|
||||||
|
|
||||||
|
# data loader for train and eval
|
||||||
|
DataLoader:
|
||||||
|
Train:
|
||||||
|
dataset:
|
||||||
|
name: MultiScaleDataset
|
||||||
|
image_root: ./dataset/ILSVRC2012/
|
||||||
|
cls_label_path: ./dataset/ILSVRC2012/train_list.txt
|
||||||
|
transform_ops:
|
||||||
|
- DecodeImage:
|
||||||
|
to_rgb: True
|
||||||
|
channel_first: False
|
||||||
|
- RandCropImage:
|
||||||
|
size: 256
|
||||||
|
interpolation: bilinear
|
||||||
|
use_log_aspect: True
|
||||||
|
- RandFlipImage:
|
||||||
|
flip_code: 1
|
||||||
|
- NormalizeImage:
|
||||||
|
scale: 1.0/255.0
|
||||||
|
mean: [0.0, 0.0, 0.0]
|
||||||
|
std: [1.0, 1.0, 1.0]
|
||||||
|
order: ''
|
||||||
|
# support to specify width and height respectively:
|
||||||
|
# scales: [(256,256) (160,160), (192,192), (224,224) (288,288) (320,320)]
|
||||||
|
sampler:
|
||||||
|
name: MultiScaleSampler
|
||||||
|
scales: [256, 160, 192, 224, 288, 320]
|
||||||
|
# first_bs: batch size for the first image resolution in the scales list
|
||||||
|
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
|
||||||
|
first_bs: 48
|
||||||
|
divided_factor: 32
|
||||||
|
is_training: True
|
||||||
|
loader:
|
||||||
|
num_workers: 4
|
||||||
|
use_shared_memory: True
|
||||||
|
Eval:
|
||||||
|
dataset:
|
||||||
|
name: ImageNetDataset
|
||||||
|
image_root: ./dataset/ILSVRC2012/
|
||||||
|
cls_label_path: ./dataset/ILSVRC2012/val_list.txt
|
||||||
|
transform_ops:
|
||||||
|
- DecodeImage:
|
||||||
|
to_rgb: False
|
||||||
|
channel_first: False
|
||||||
|
- ResizeImage:
|
||||||
|
resize_short: 288
|
||||||
|
interpolation: bilinear
|
||||||
|
- CropImage:
|
||||||
|
size: 256
|
||||||
|
- NormalizeImage:
|
||||||
|
scale: 1.0/255.0
|
||||||
|
mean: [0.0, 0.0, 0.0]
|
||||||
|
std: [1.0, 1.0, 1.0]
|
||||||
|
order: ''
|
||||||
|
sampler:
|
||||||
|
name: DistributedBatchSampler
|
||||||
|
batch_size: 48
|
||||||
|
drop_last: False
|
||||||
|
shuffle: False
|
||||||
|
loader:
|
||||||
|
num_workers: 4
|
||||||
|
use_shared_memory: True
|
||||||
|
|
||||||
|
Infer:
|
||||||
|
infer_imgs: docs/images/inference_deployment/whl_demo.jpg
|
||||||
|
batch_size: 10
|
||||||
|
transforms:
|
||||||
|
- DecodeImage:
|
||||||
|
to_rgb: True
|
||||||
|
channel_first: False
|
||||||
|
- ResizeImage:
|
||||||
|
resize_short: 288
|
||||||
|
interpolation: bilinear
|
||||||
|
- CropImage:
|
||||||
|
size: 256
|
||||||
|
- NormalizeImage:
|
||||||
|
scale: 1.0/255.0
|
||||||
|
mean: [0.0, 0.0, 0.0]
|
||||||
|
std: [1.0, 1.0, 1.0]
|
||||||
|
order: ''
|
||||||
|
- ToCHWImage:
|
||||||
|
PostProcess:
|
||||||
|
name: Topk
|
||||||
|
topk: 5
|
||||||
|
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
|
||||||
|
|
||||||
|
Metric:
|
||||||
|
Train:
|
||||||
|
- TopkAcc:
|
||||||
|
topk: [1, 5]
|
||||||
|
Eval:
|
||||||
|
- TopkAcc:
|
||||||
|
topk: [1, 5]
|
|
@ -0,0 +1,152 @@
|
||||||
|
# global configs
|
||||||
|
Global:
|
||||||
|
checkpoints: null
|
||||||
|
pretrained_model: null
|
||||||
|
output_dir: ./output/
|
||||||
|
device: gpu
|
||||||
|
save_interval: 1
|
||||||
|
eval_during_train: True
|
||||||
|
eval_interval: 1
|
||||||
|
epochs: 300
|
||||||
|
print_batch_step: 10
|
||||||
|
use_visualdl: False
|
||||||
|
# used for static mode and model export
|
||||||
|
image_shape: [3, 256, 256]
|
||||||
|
save_inference_dir: ./inference
|
||||||
|
use_dali: False
|
||||||
|
|
||||||
|
# mixed precision training
|
||||||
|
AMP:
|
||||||
|
scale_loss: 65536
|
||||||
|
use_dynamic_loss_scaling: True
|
||||||
|
# O1: mixed fp16
|
||||||
|
level: O1
|
||||||
|
|
||||||
|
# model ema
|
||||||
|
EMA:
|
||||||
|
decay: 0.9995
|
||||||
|
|
||||||
|
# model architecture
|
||||||
|
Arch:
|
||||||
|
name: MobileViTv3_XS
|
||||||
|
class_num: 1000
|
||||||
|
dropout: 0.1
|
||||||
|
|
||||||
|
# loss function config for traing/eval process
|
||||||
|
Loss:
|
||||||
|
Train:
|
||||||
|
- CELoss:
|
||||||
|
weight: 1.0
|
||||||
|
epsilon: 0.1
|
||||||
|
Eval:
|
||||||
|
- CELoss:
|
||||||
|
weight: 1.0
|
||||||
|
|
||||||
|
Optimizer:
|
||||||
|
name: AdamW
|
||||||
|
beta1: 0.9
|
||||||
|
beta2: 0.999
|
||||||
|
epsilon: 1e-8
|
||||||
|
weight_decay: 0.01
|
||||||
|
lr:
|
||||||
|
# for 8 cards
|
||||||
|
name: Cosine
|
||||||
|
learning_rate: 0.002
|
||||||
|
eta_min: 0.0002
|
||||||
|
warmup_epoch: 1 # 3000 iterations
|
||||||
|
warmup_start_lr: 0.0002
|
||||||
|
# by_epoch: True
|
||||||
|
|
||||||
|
# data loader for train and eval
|
||||||
|
DataLoader:
|
||||||
|
Train:
|
||||||
|
dataset:
|
||||||
|
name: MultiScaleDataset
|
||||||
|
image_root: ./dataset/ILSVRC2012/
|
||||||
|
cls_label_path: ./dataset/ILSVRC2012/train_list.txt
|
||||||
|
transform_ops:
|
||||||
|
- DecodeImage:
|
||||||
|
to_rgb: True
|
||||||
|
channel_first: False
|
||||||
|
- RandCropImage:
|
||||||
|
size: 256
|
||||||
|
interpolation: bilinear
|
||||||
|
use_log_aspect: True
|
||||||
|
- RandFlipImage:
|
||||||
|
flip_code: 1
|
||||||
|
- NormalizeImage:
|
||||||
|
scale: 1.0/255.0
|
||||||
|
mean: [0.0, 0.0, 0.0]
|
||||||
|
std: [1.0, 1.0, 1.0]
|
||||||
|
order: ''
|
||||||
|
# support to specify width and height respectively:
|
||||||
|
# scales: [(256,256) (160,160), (192,192), (224,224) (288,288) (320,320)]
|
||||||
|
sampler:
|
||||||
|
name: MultiScaleSampler
|
||||||
|
scales: [256, 160, 192, 224, 288, 320]
|
||||||
|
# first_bs: batch size for the first image resolution in the scales list
|
||||||
|
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
|
||||||
|
first_bs: 48
|
||||||
|
divided_factor: 32
|
||||||
|
is_training: True
|
||||||
|
loader:
|
||||||
|
num_workers: 4
|
||||||
|
use_shared_memory: True
|
||||||
|
Eval:
|
||||||
|
dataset:
|
||||||
|
name: ImageNetDataset
|
||||||
|
image_root: ./dataset/ILSVRC2012/
|
||||||
|
cls_label_path: ./dataset/ILSVRC2012/val_list.txt
|
||||||
|
transform_ops:
|
||||||
|
- DecodeImage:
|
||||||
|
to_rgb: False
|
||||||
|
channel_first: False
|
||||||
|
- ResizeImage:
|
||||||
|
resize_short: 288
|
||||||
|
interpolation: bilinear
|
||||||
|
- CropImage:
|
||||||
|
size: 256
|
||||||
|
- NormalizeImage:
|
||||||
|
scale: 1.0/255.0
|
||||||
|
mean: [0.0, 0.0, 0.0]
|
||||||
|
std: [1.0, 1.0, 1.0]
|
||||||
|
order: ''
|
||||||
|
sampler:
|
||||||
|
name: DistributedBatchSampler
|
||||||
|
batch_size: 48
|
||||||
|
drop_last: False
|
||||||
|
shuffle: False
|
||||||
|
loader:
|
||||||
|
num_workers: 4
|
||||||
|
use_shared_memory: True
|
||||||
|
|
||||||
|
Infer:
|
||||||
|
infer_imgs: docs/images/inference_deployment/whl_demo.jpg
|
||||||
|
batch_size: 10
|
||||||
|
transforms:
|
||||||
|
- DecodeImage:
|
||||||
|
to_rgb: True
|
||||||
|
channel_first: False
|
||||||
|
- ResizeImage:
|
||||||
|
resize_short: 288
|
||||||
|
interpolation: bilinear
|
||||||
|
- CropImage:
|
||||||
|
size: 256
|
||||||
|
- NormalizeImage:
|
||||||
|
scale: 1.0/255.0
|
||||||
|
mean: [0.0, 0.0, 0.0]
|
||||||
|
std: [1.0, 1.0, 1.0]
|
||||||
|
order: ''
|
||||||
|
- ToCHWImage:
|
||||||
|
PostProcess:
|
||||||
|
name: Topk
|
||||||
|
topk: 5
|
||||||
|
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
|
||||||
|
|
||||||
|
Metric:
|
||||||
|
Train:
|
||||||
|
- TopkAcc:
|
||||||
|
topk: [1, 5]
|
||||||
|
Eval:
|
||||||
|
- TopkAcc:
|
||||||
|
topk: [1, 5]
|
|
@ -0,0 +1,152 @@
|
||||||
|
# global configs
|
||||||
|
Global:
|
||||||
|
checkpoints: null
|
||||||
|
pretrained_model: null
|
||||||
|
output_dir: ./output/
|
||||||
|
device: gpu
|
||||||
|
save_interval: 1
|
||||||
|
eval_during_train: True
|
||||||
|
eval_interval: 1
|
||||||
|
epochs: 300
|
||||||
|
print_batch_step: 10
|
||||||
|
use_visualdl: False
|
||||||
|
# used for static mode and model export
|
||||||
|
image_shape: [3, 256, 256]
|
||||||
|
save_inference_dir: ./inference
|
||||||
|
use_dali: False
|
||||||
|
|
||||||
|
# mixed precision training
|
||||||
|
AMP:
|
||||||
|
scale_loss: 65536
|
||||||
|
use_dynamic_loss_scaling: True
|
||||||
|
# O1: mixed fp16
|
||||||
|
level: O1
|
||||||
|
|
||||||
|
# model ema
|
||||||
|
EMA:
|
||||||
|
decay: 0.9995
|
||||||
|
|
||||||
|
# model architecture
|
||||||
|
Arch:
|
||||||
|
name: MobileViTv3_XXS
|
||||||
|
class_num: 1000
|
||||||
|
dropout: 0.05
|
||||||
|
|
||||||
|
# loss function config for traing/eval process
|
||||||
|
Loss:
|
||||||
|
Train:
|
||||||
|
- CELoss:
|
||||||
|
weight: 1.0
|
||||||
|
epsilon: 0.1
|
||||||
|
Eval:
|
||||||
|
- CELoss:
|
||||||
|
weight: 1.0
|
||||||
|
|
||||||
|
Optimizer:
|
||||||
|
name: AdamW
|
||||||
|
beta1: 0.9
|
||||||
|
beta2: 0.999
|
||||||
|
epsilon: 1e-8
|
||||||
|
weight_decay: 0.01
|
||||||
|
lr:
|
||||||
|
# for 8 cards
|
||||||
|
name: Cosine
|
||||||
|
learning_rate: 0.002
|
||||||
|
eta_min: 0.0002
|
||||||
|
warmup_epoch: 1 # 3000 iterations
|
||||||
|
warmup_start_lr: 0.0002
|
||||||
|
# by_epoch: True
|
||||||
|
|
||||||
|
# data loader for train and eval
|
||||||
|
DataLoader:
|
||||||
|
Train:
|
||||||
|
dataset:
|
||||||
|
name: MultiScaleDataset
|
||||||
|
image_root: ./dataset/ILSVRC2012/
|
||||||
|
cls_label_path: ./dataset/ILSVRC2012/train_list.txt
|
||||||
|
transform_ops:
|
||||||
|
- DecodeImage:
|
||||||
|
to_rgb: True
|
||||||
|
channel_first: False
|
||||||
|
- RandCropImage:
|
||||||
|
size: 256
|
||||||
|
interpolation: bilinear
|
||||||
|
use_log_aspect: True
|
||||||
|
- RandFlipImage:
|
||||||
|
flip_code: 1
|
||||||
|
- NormalizeImage:
|
||||||
|
scale: 1.0/255.0
|
||||||
|
mean: [0.0, 0.0, 0.0]
|
||||||
|
std: [1.0, 1.0, 1.0]
|
||||||
|
order: ''
|
||||||
|
# support to specify width and height respectively:
|
||||||
|
# scales: [(256,256) (160,160), (192,192), (224,224) (288,288) (320,320)]
|
||||||
|
sampler:
|
||||||
|
name: MultiScaleSampler
|
||||||
|
scales: [256, 160, 192, 224, 288, 320]
|
||||||
|
# first_bs: batch size for the first image resolution in the scales list
|
||||||
|
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
|
||||||
|
first_bs: 48
|
||||||
|
divided_factor: 32
|
||||||
|
is_training: True
|
||||||
|
loader:
|
||||||
|
num_workers: 4
|
||||||
|
use_shared_memory: True
|
||||||
|
Eval:
|
||||||
|
dataset:
|
||||||
|
name: ImageNetDataset
|
||||||
|
image_root: ./dataset/ILSVRC2012/
|
||||||
|
cls_label_path: ./dataset/ILSVRC2012/val_list.txt
|
||||||
|
transform_ops:
|
||||||
|
- DecodeImage:
|
||||||
|
to_rgb: False
|
||||||
|
channel_first: False
|
||||||
|
- ResizeImage:
|
||||||
|
resize_short: 288
|
||||||
|
interpolation: bilinear
|
||||||
|
- CropImage:
|
||||||
|
size: 256
|
||||||
|
- NormalizeImage:
|
||||||
|
scale: 1.0/255.0
|
||||||
|
mean: [0.0, 0.0, 0.0]
|
||||||
|
std: [1.0, 1.0, 1.0]
|
||||||
|
order: ''
|
||||||
|
sampler:
|
||||||
|
name: DistributedBatchSampler
|
||||||
|
batch_size: 48
|
||||||
|
drop_last: False
|
||||||
|
shuffle: False
|
||||||
|
loader:
|
||||||
|
num_workers: 4
|
||||||
|
use_shared_memory: True
|
||||||
|
|
||||||
|
Infer:
|
||||||
|
infer_imgs: docs/images/inference_deployment/whl_demo.jpg
|
||||||
|
batch_size: 10
|
||||||
|
transforms:
|
||||||
|
- DecodeImage:
|
||||||
|
to_rgb: True
|
||||||
|
channel_first: False
|
||||||
|
- ResizeImage:
|
||||||
|
resize_short: 288
|
||||||
|
interpolation: bilinear
|
||||||
|
- CropImage:
|
||||||
|
size: 256
|
||||||
|
- NormalizeImage:
|
||||||
|
scale: 1.0/255.0
|
||||||
|
mean: [0.0, 0.0, 0.0]
|
||||||
|
std: [1.0, 1.0, 1.0]
|
||||||
|
order: ''
|
||||||
|
- ToCHWImage:
|
||||||
|
PostProcess:
|
||||||
|
name: Topk
|
||||||
|
topk: 5
|
||||||
|
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
|
||||||
|
|
||||||
|
Metric:
|
||||||
|
Train:
|
||||||
|
- TopkAcc:
|
||||||
|
topk: [1, 5]
|
||||||
|
Eval:
|
||||||
|
- TopkAcc:
|
||||||
|
topk: [1, 5]
|
|
@ -0,0 +1,152 @@
|
||||||
|
# global configs
|
||||||
|
Global:
|
||||||
|
checkpoints: null
|
||||||
|
pretrained_model: null
|
||||||
|
output_dir: ./output/
|
||||||
|
device: gpu
|
||||||
|
save_interval: 1
|
||||||
|
eval_during_train: True
|
||||||
|
eval_interval: 1
|
||||||
|
epochs: 300
|
||||||
|
print_batch_step: 10
|
||||||
|
use_visualdl: False
|
||||||
|
# used for static mode and model export
|
||||||
|
image_shape: [3, 256, 256]
|
||||||
|
save_inference_dir: ./inference
|
||||||
|
use_dali: False
|
||||||
|
|
||||||
|
# mixed precision training
|
||||||
|
AMP:
|
||||||
|
scale_loss: 65536
|
||||||
|
use_dynamic_loss_scaling: True
|
||||||
|
# O1: mixed fp16
|
||||||
|
level: O1
|
||||||
|
|
||||||
|
# model ema
|
||||||
|
EMA:
|
||||||
|
decay: 0.9995
|
||||||
|
|
||||||
|
# model architecture
|
||||||
|
Arch:
|
||||||
|
name: MobileViTv3_x0_5
|
||||||
|
class_num: 1000
|
||||||
|
classifier_dropout: 0.
|
||||||
|
|
||||||
|
# loss function config for traing/eval process
|
||||||
|
Loss:
|
||||||
|
Train:
|
||||||
|
- CELoss:
|
||||||
|
weight: 1.0
|
||||||
|
epsilon: 0.1
|
||||||
|
Eval:
|
||||||
|
- CELoss:
|
||||||
|
weight: 1.0
|
||||||
|
|
||||||
|
Optimizer:
|
||||||
|
name: AdamW
|
||||||
|
beta1: 0.9
|
||||||
|
beta2: 0.999
|
||||||
|
epsilon: 1e-8
|
||||||
|
weight_decay: 0.01
|
||||||
|
lr:
|
||||||
|
# for 8 cards
|
||||||
|
name: Cosine
|
||||||
|
learning_rate: 0.002
|
||||||
|
eta_min: 0.0002
|
||||||
|
warmup_epoch: 1 # 3000 iterations
|
||||||
|
warmup_start_lr: 0.0002
|
||||||
|
# by_epoch: True
|
||||||
|
|
||||||
|
# data loader for train and eval
|
||||||
|
DataLoader:
|
||||||
|
Train:
|
||||||
|
dataset:
|
||||||
|
name: MultiScaleDataset
|
||||||
|
image_root: ./dataset/ILSVRC2012/
|
||||||
|
cls_label_path: ./dataset/ILSVRC2012/train_list.txt
|
||||||
|
transform_ops:
|
||||||
|
- DecodeImage:
|
||||||
|
to_rgb: True
|
||||||
|
channel_first: False
|
||||||
|
- RandCropImage:
|
||||||
|
size: 256
|
||||||
|
interpolation: bilinear
|
||||||
|
use_log_aspect: True
|
||||||
|
- RandFlipImage:
|
||||||
|
flip_code: 1
|
||||||
|
- NormalizeImage:
|
||||||
|
scale: 1.0/255.0
|
||||||
|
mean: [0.0, 0.0, 0.0]
|
||||||
|
std: [1.0, 1.0, 1.0]
|
||||||
|
order: ''
|
||||||
|
# support to specify width and height respectively:
|
||||||
|
# scales: [(256,256) (160,160), (192,192), (224,224) (288,288) (320,320)]
|
||||||
|
sampler:
|
||||||
|
name: MultiScaleSampler
|
||||||
|
scales: [256, 160, 192, 224, 288, 320]
|
||||||
|
# first_bs: batch size for the first image resolution in the scales list
|
||||||
|
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
|
||||||
|
first_bs: 48
|
||||||
|
divided_factor: 32
|
||||||
|
is_training: True
|
||||||
|
loader:
|
||||||
|
num_workers: 4
|
||||||
|
use_shared_memory: True
|
||||||
|
Eval:
|
||||||
|
dataset:
|
||||||
|
name: ImageNetDataset
|
||||||
|
image_root: ./dataset/ILSVRC2012/
|
||||||
|
cls_label_path: ./dataset/ILSVRC2012/val_list.txt
|
||||||
|
transform_ops:
|
||||||
|
- DecodeImage:
|
||||||
|
to_rgb: False
|
||||||
|
channel_first: False
|
||||||
|
- ResizeImage:
|
||||||
|
resize_short: 288
|
||||||
|
interpolation: bilinear
|
||||||
|
- CropImage:
|
||||||
|
size: 256
|
||||||
|
- NormalizeImage:
|
||||||
|
scale: 1.0/255.0
|
||||||
|
mean: [0.0, 0.0, 0.0]
|
||||||
|
std: [1.0, 1.0, 1.0]
|
||||||
|
order: ''
|
||||||
|
sampler:
|
||||||
|
name: DistributedBatchSampler
|
||||||
|
batch_size: 48
|
||||||
|
drop_last: False
|
||||||
|
shuffle: False
|
||||||
|
loader:
|
||||||
|
num_workers: 4
|
||||||
|
use_shared_memory: True
|
||||||
|
|
||||||
|
Infer:
|
||||||
|
infer_imgs: docs/images/inference_deployment/whl_demo.jpg
|
||||||
|
batch_size: 10
|
||||||
|
transforms:
|
||||||
|
- DecodeImage:
|
||||||
|
to_rgb: True
|
||||||
|
channel_first: False
|
||||||
|
- ResizeImage:
|
||||||
|
resize_short: 288
|
||||||
|
interpolation: bilinear
|
||||||
|
- CropImage:
|
||||||
|
size: 256
|
||||||
|
- NormalizeImage:
|
||||||
|
scale: 1.0/255.0
|
||||||
|
mean: [0.0, 0.0, 0.0]
|
||||||
|
std: [1.0, 1.0, 1.0]
|
||||||
|
order: ''
|
||||||
|
- ToCHWImage:
|
||||||
|
PostProcess:
|
||||||
|
name: Topk
|
||||||
|
topk: 5
|
||||||
|
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
|
||||||
|
|
||||||
|
Metric:
|
||||||
|
Train:
|
||||||
|
- TopkAcc:
|
||||||
|
topk: [1, 5]
|
||||||
|
Eval:
|
||||||
|
- TopkAcc:
|
||||||
|
topk: [1, 5]
|
Loading…
Reference in New Issue