commit
897760f073
|
@ -48,7 +48,7 @@ from ppcls.arch.backbone.model_zoo.resnext101_wsl import ResNeXt101_32x8d_wsl, R
|
||||||
from ppcls.arch.backbone.model_zoo.squeezenet import SqueezeNet1_0, SqueezeNet1_1
|
from ppcls.arch.backbone.model_zoo.squeezenet import SqueezeNet1_0, SqueezeNet1_1
|
||||||
from ppcls.arch.backbone.model_zoo.darknet import DarkNet53
|
from ppcls.arch.backbone.model_zoo.darknet import DarkNet53
|
||||||
from ppcls.arch.backbone.model_zoo.regnet import RegNetX_200MF, RegNetX_4GF, RegNetX_32GF, RegNetY_200MF, RegNetY_4GF, RegNetY_32GF
|
from ppcls.arch.backbone.model_zoo.regnet import RegNetX_200MF, RegNetX_4GF, RegNetX_32GF, RegNetY_200MF, RegNetY_4GF, RegNetY_32GF
|
||||||
from ppcls.arch.backbone.model_zoo.vision_transformer import ViT_small_patch16_224, ViT_base_patch16_224, ViT_base_patch16_384, ViT_base_patch32_384, ViT_large_patch16_224, ViT_large_patch16_384, ViT_large_patch32_384, ViT_huge_patch16_224, ViT_huge_patch32_384
|
from ppcls.arch.backbone.model_zoo.vision_transformer import ViT_small_patch16_224, ViT_base_patch16_224, ViT_base_patch16_384, ViT_base_patch32_384, ViT_large_patch16_224, ViT_large_patch16_384, ViT_large_patch32_384
|
||||||
from ppcls.arch.backbone.model_zoo.distilled_vision_transformer import DeiT_tiny_patch16_224, DeiT_small_patch16_224, DeiT_base_patch16_224, DeiT_tiny_distilled_patch16_224, DeiT_small_distilled_patch16_224, DeiT_base_distilled_patch16_224, DeiT_base_patch16_384, DeiT_base_distilled_patch16_384
|
from ppcls.arch.backbone.model_zoo.distilled_vision_transformer import DeiT_tiny_patch16_224, DeiT_small_patch16_224, DeiT_base_patch16_224, DeiT_tiny_distilled_patch16_224, DeiT_small_distilled_patch16_224, DeiT_base_distilled_patch16_224, DeiT_base_patch16_384, DeiT_base_distilled_patch16_384
|
||||||
from ppcls.arch.backbone.model_zoo.swin_transformer import SwinTransformer_tiny_patch4_window7_224, SwinTransformer_small_patch4_window7_224, SwinTransformer_base_patch4_window7_224, SwinTransformer_base_patch4_window12_384, SwinTransformer_large_patch4_window7_224, SwinTransformer_large_patch4_window12_384
|
from ppcls.arch.backbone.model_zoo.swin_transformer import SwinTransformer_tiny_patch4_window7_224, SwinTransformer_small_patch4_window7_224, SwinTransformer_base_patch4_window7_224, SwinTransformer_base_patch4_window12_384, SwinTransformer_large_patch4_window7_224, SwinTransformer_large_patch4_window12_384
|
||||||
from ppcls.arch.backbone.model_zoo.mixnet import MixNet_S, MixNet_M, MixNet_L
|
from ppcls.arch.backbone.model_zoo.mixnet import MixNet_S, MixNet_M, MixNet_L
|
||||||
|
|
|
@ -38,10 +38,6 @@ MODEL_URLS = {
|
||||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_large_patch16_384_pretrained.pdparams",
|
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_large_patch16_384_pretrained.pdparams",
|
||||||
"ViT_large_patch32_384":
|
"ViT_large_patch32_384":
|
||||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_large_patch32_384_pretrained.pdparams",
|
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_large_patch32_384_pretrained.pdparams",
|
||||||
"ViT_huge_patch16_224":
|
|
||||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_huge_patch16_224_pretrained.pdparams",
|
|
||||||
"ViT_huge_patch32_384":
|
|
||||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_huge_patch32_384_pretrained.pdparams"
|
|
||||||
}
|
}
|
||||||
|
|
||||||
__all__ = list(MODEL_URLS.keys())
|
__all__ = list(MODEL_URLS.keys())
|
||||||
|
@ -460,36 +456,3 @@ def ViT_large_patch32_384(pretrained=False, use_ssld=False, **kwargs):
|
||||||
MODEL_URLS["ViT_large_patch32_384"],
|
MODEL_URLS["ViT_large_patch32_384"],
|
||||||
use_ssld=use_ssld)
|
use_ssld=use_ssld)
|
||||||
return model
|
return model
|
||||||
|
|
||||||
|
|
||||||
def ViT_huge_patch16_224(pretrained=False, use_ssld=False, **kwargs):
|
|
||||||
model = VisionTransformer(
|
|
||||||
patch_size=16,
|
|
||||||
embed_dim=1280,
|
|
||||||
depth=32,
|
|
||||||
num_heads=16,
|
|
||||||
mlp_ratio=4,
|
|
||||||
**kwargs)
|
|
||||||
_load_pretrained(
|
|
||||||
pretrained,
|
|
||||||
model,
|
|
||||||
MODEL_URLS["ViT_huge_patch16_224"],
|
|
||||||
use_ssld=use_ssld)
|
|
||||||
return model
|
|
||||||
|
|
||||||
|
|
||||||
def ViT_huge_patch32_384(pretrained=False, use_ssld=False, **kwargs):
|
|
||||||
model = VisionTransformer(
|
|
||||||
img_size=384,
|
|
||||||
patch_size=32,
|
|
||||||
embed_dim=1280,
|
|
||||||
depth=32,
|
|
||||||
num_heads=16,
|
|
||||||
mlp_ratio=4,
|
|
||||||
**kwargs)
|
|
||||||
_load_pretrained(
|
|
||||||
pretrained,
|
|
||||||
model,
|
|
||||||
MODEL_URLS["ViT_huge_patch32_384"],
|
|
||||||
use_ssld=use_ssld)
|
|
||||||
return model
|
|
||||||
|
|
|
@ -1,130 +0,0 @@
|
||||||
# global configs
|
|
||||||
Global:
|
|
||||||
checkpoints: null
|
|
||||||
pretrained_model: null
|
|
||||||
output_dir: ./output/
|
|
||||||
device: gpu
|
|
||||||
save_interval: 1
|
|
||||||
eval_during_train: True
|
|
||||||
eval_interval: 1
|
|
||||||
epochs: 120
|
|
||||||
print_batch_step: 10
|
|
||||||
use_visualdl: False
|
|
||||||
# used for static mode and model export
|
|
||||||
image_shape: [3, 224, 224]
|
|
||||||
save_inference_dir: ./inference
|
|
||||||
|
|
||||||
# model architecture
|
|
||||||
Arch:
|
|
||||||
name: ViT_huge_patch16_224
|
|
||||||
class_num: 1000
|
|
||||||
|
|
||||||
# loss function config for traing/eval process
|
|
||||||
Loss:
|
|
||||||
Train:
|
|
||||||
- CELoss:
|
|
||||||
weight: 1.0
|
|
||||||
Eval:
|
|
||||||
- CELoss:
|
|
||||||
weight: 1.0
|
|
||||||
|
|
||||||
|
|
||||||
Optimizer:
|
|
||||||
name: Momentum
|
|
||||||
momentum: 0.9
|
|
||||||
lr:
|
|
||||||
name: Piecewise
|
|
||||||
learning_rate: 0.1
|
|
||||||
decay_epochs: [30, 60, 90]
|
|
||||||
values: [0.1, 0.01, 0.001, 0.0001]
|
|
||||||
regularizer:
|
|
||||||
name: 'L2'
|
|
||||||
coeff: 0.0001
|
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
|
||||||
DataLoader:
|
|
||||||
Train:
|
|
||||||
dataset:
|
|
||||||
name: ImageNetDataset
|
|
||||||
image_root: ./dataset/ILSVRC2012/
|
|
||||||
cls_label_path: ./dataset/ILSVRC2012/train_list.txt
|
|
||||||
transform_ops:
|
|
||||||
- DecodeImage:
|
|
||||||
to_rgb: True
|
|
||||||
channel_first: False
|
|
||||||
- RandCropImage:
|
|
||||||
size: 224
|
|
||||||
- RandFlipImage:
|
|
||||||
flip_code: 1
|
|
||||||
- NormalizeImage:
|
|
||||||
scale: 1.0/255.0
|
|
||||||
mean: [0.5, 0.5, 0.5]
|
|
||||||
std: [0.5, 0.5, 0.5]
|
|
||||||
order: ''
|
|
||||||
|
|
||||||
sampler:
|
|
||||||
name: DistributedBatchSampler
|
|
||||||
batch_size: 64
|
|
||||||
drop_last: False
|
|
||||||
shuffle: True
|
|
||||||
loader:
|
|
||||||
num_workers: 4
|
|
||||||
use_shared_memory: True
|
|
||||||
|
|
||||||
Eval:
|
|
||||||
dataset:
|
|
||||||
name: ImageNetDataset
|
|
||||||
image_root: ./dataset/ILSVRC2012/
|
|
||||||
cls_label_path: ./dataset/ILSVRC2012/val_list.txt
|
|
||||||
transform_ops:
|
|
||||||
- DecodeImage:
|
|
||||||
to_rgb: True
|
|
||||||
channel_first: False
|
|
||||||
- ResizeImage:
|
|
||||||
resize_short: 256
|
|
||||||
- CropImage:
|
|
||||||
size: 224
|
|
||||||
- NormalizeImage:
|
|
||||||
scale: 1.0/255.0
|
|
||||||
mean: [0.5, 0.5, 0.5]
|
|
||||||
std: [0.5, 0.5, 0.5]
|
|
||||||
order: ''
|
|
||||||
sampler:
|
|
||||||
name: DistributedBatchSampler
|
|
||||||
batch_size: 64
|
|
||||||
drop_last: False
|
|
||||||
shuffle: False
|
|
||||||
loader:
|
|
||||||
num_workers: 4
|
|
||||||
use_shared_memory: True
|
|
||||||
|
|
||||||
Infer:
|
|
||||||
infer_imgs: docs/images/whl/demo.jpg
|
|
||||||
batch_size: 10
|
|
||||||
transforms:
|
|
||||||
- DecodeImage:
|
|
||||||
to_rgb: True
|
|
||||||
channel_first: False
|
|
||||||
- ResizeImage:
|
|
||||||
resize_short: 256
|
|
||||||
- CropImage:
|
|
||||||
size: 224
|
|
||||||
- NormalizeImage:
|
|
||||||
scale: 1.0/255.0
|
|
||||||
mean: [0.5, 0.5, 0.5]
|
|
||||||
std: [0.5, 0.5, 0.5]
|
|
||||||
order: ''
|
|
||||||
- ToCHWImage:
|
|
||||||
PostProcess:
|
|
||||||
name: Topk
|
|
||||||
topk: 5
|
|
||||||
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
|
|
||||||
|
|
||||||
Metric:
|
|
||||||
Train:
|
|
||||||
- TopkAcc:
|
|
||||||
topk: [1, 5]
|
|
||||||
Eval:
|
|
||||||
- TopkAcc:
|
|
||||||
topk: [1, 5]
|
|
|
@ -1,130 +0,0 @@
|
||||||
# global configs
|
|
||||||
Global:
|
|
||||||
checkpoints: null
|
|
||||||
pretrained_model: null
|
|
||||||
output_dir: ./output/
|
|
||||||
device: gpu
|
|
||||||
save_interval: 1
|
|
||||||
eval_during_train: True
|
|
||||||
eval_interval: 1
|
|
||||||
epochs: 120
|
|
||||||
print_batch_step: 10
|
|
||||||
use_visualdl: False
|
|
||||||
# used for static mode and model export
|
|
||||||
image_shape: [3, 384, 384]
|
|
||||||
save_inference_dir: ./inference
|
|
||||||
|
|
||||||
# model architecture
|
|
||||||
Arch:
|
|
||||||
name: ViT_huge_patch32_384
|
|
||||||
class_num: 1000
|
|
||||||
|
|
||||||
# loss function config for traing/eval process
|
|
||||||
Loss:
|
|
||||||
Train:
|
|
||||||
- CELoss:
|
|
||||||
weight: 1.0
|
|
||||||
Eval:
|
|
||||||
- CELoss:
|
|
||||||
weight: 1.0
|
|
||||||
|
|
||||||
|
|
||||||
Optimizer:
|
|
||||||
name: Momentum
|
|
||||||
momentum: 0.9
|
|
||||||
lr:
|
|
||||||
name: Piecewise
|
|
||||||
learning_rate: 0.1
|
|
||||||
decay_epochs: [30, 60, 90]
|
|
||||||
values: [0.1, 0.01, 0.001, 0.0001]
|
|
||||||
regularizer:
|
|
||||||
name: 'L2'
|
|
||||||
coeff: 0.0001
|
|
||||||
|
|
||||||
|
|
||||||
# data loader for train and eval
|
|
||||||
DataLoader:
|
|
||||||
Train:
|
|
||||||
dataset:
|
|
||||||
name: ImageNetDataset
|
|
||||||
image_root: ./dataset/ILSVRC2012/
|
|
||||||
cls_label_path: ./dataset/ILSVRC2012/train_list.txt
|
|
||||||
transform_ops:
|
|
||||||
- DecodeImage:
|
|
||||||
to_rgb: True
|
|
||||||
channel_first: False
|
|
||||||
- RandCropImage:
|
|
||||||
size: 384
|
|
||||||
- RandFlipImage:
|
|
||||||
flip_code: 1
|
|
||||||
- NormalizeImage:
|
|
||||||
scale: 1.0/255.0
|
|
||||||
mean: [0.5, 0.5, 0.5]
|
|
||||||
std: [0.5, 0.5, 0.5]
|
|
||||||
order: ''
|
|
||||||
|
|
||||||
sampler:
|
|
||||||
name: DistributedBatchSampler
|
|
||||||
batch_size: 64
|
|
||||||
drop_last: False
|
|
||||||
shuffle: True
|
|
||||||
loader:
|
|
||||||
num_workers: 4
|
|
||||||
use_shared_memory: True
|
|
||||||
|
|
||||||
Eval:
|
|
||||||
dataset:
|
|
||||||
name: ImageNetDataset
|
|
||||||
image_root: ./dataset/ILSVRC2012/
|
|
||||||
cls_label_path: ./dataset/ILSVRC2012/val_list.txt
|
|
||||||
transform_ops:
|
|
||||||
- DecodeImage:
|
|
||||||
to_rgb: True
|
|
||||||
channel_first: False
|
|
||||||
- ResizeImage:
|
|
||||||
resize_short: 384
|
|
||||||
- CropImage:
|
|
||||||
size: 384
|
|
||||||
- NormalizeImage:
|
|
||||||
scale: 1.0/255.0
|
|
||||||
mean: [0.5, 0.5, 0.5]
|
|
||||||
std: [0.5, 0.5, 0.5]
|
|
||||||
order: ''
|
|
||||||
sampler:
|
|
||||||
name: DistributedBatchSampler
|
|
||||||
batch_size: 64
|
|
||||||
drop_last: False
|
|
||||||
shuffle: False
|
|
||||||
loader:
|
|
||||||
num_workers: 4
|
|
||||||
use_shared_memory: True
|
|
||||||
|
|
||||||
Infer:
|
|
||||||
infer_imgs: docs/images/whl/demo.jpg
|
|
||||||
batch_size: 10
|
|
||||||
transforms:
|
|
||||||
- DecodeImage:
|
|
||||||
to_rgb: True
|
|
||||||
channel_first: False
|
|
||||||
- ResizeImage:
|
|
||||||
resize_short: 384
|
|
||||||
- CropImage:
|
|
||||||
size: 384
|
|
||||||
- NormalizeImage:
|
|
||||||
scale: 1.0/255.0
|
|
||||||
mean: [0.5, 0.5, 0.5]
|
|
||||||
std: [0.5, 0.5, 0.5]
|
|
||||||
order: ''
|
|
||||||
- ToCHWImage:
|
|
||||||
PostProcess:
|
|
||||||
name: Topk
|
|
||||||
topk: 5
|
|
||||||
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
|
|
||||||
|
|
||||||
Metric:
|
|
||||||
Train:
|
|
||||||
- TopkAcc:
|
|
||||||
topk: [1, 5]
|
|
||||||
Eval:
|
|
||||||
- TopkAcc:
|
|
||||||
topk: [1, 5]
|
|
|
@ -1,52 +0,0 @@
|
||||||
===========================train_params===========================
|
|
||||||
model_name:ViT_huge_patch16_224
|
|
||||||
python:python3.7
|
|
||||||
gpu_list:0|0,1
|
|
||||||
-o Global.device:gpu
|
|
||||||
-o Global.auto_cast:null
|
|
||||||
-o Global.epochs:lite_train_lite_infer=2|whole_train_whole_infer=120
|
|
||||||
-o Global.output_dir:./output/
|
|
||||||
-o DataLoader.Train.sampler.batch_size:8
|
|
||||||
-o Global.pretrained_model:null
|
|
||||||
train_model_name:latest
|
|
||||||
train_infer_img_dir:./dataset/ILSVRC2012/val
|
|
||||||
null:null
|
|
||||||
##
|
|
||||||
trainer:norm_train
|
|
||||||
norm_train:tools/train.py -c ppcls/configs/ImageNet/VisionTransformer/ViT_huge_patch16_224.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False
|
|
||||||
pact_train:null
|
|
||||||
fpgm_train:null
|
|
||||||
distill_train:null
|
|
||||||
null:null
|
|
||||||
null:null
|
|
||||||
##
|
|
||||||
===========================eval_params===========================
|
|
||||||
eval:tools/eval.py -c ppcls/configs/ImageNet/VisionTransformer/ViT_huge_patch16_224.yaml
|
|
||||||
null:null
|
|
||||||
##
|
|
||||||
===========================infer_params==========================
|
|
||||||
-o Global.save_inference_dir:./inference
|
|
||||||
-o Global.pretrained_model:
|
|
||||||
norm_export:tools/export_model.py -c ppcls/configs/ImageNet/VisionTransformer/ViT_huge_patch16_224.yaml
|
|
||||||
quant_export:null
|
|
||||||
fpgm_export:null
|
|
||||||
distill_export:null
|
|
||||||
kl_quant:null
|
|
||||||
export2:null
|
|
||||||
pretrained_model_url:https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_huge_patch16_224_pretrained.pdparams
|
|
||||||
infer_model:../inference/
|
|
||||||
infer_export:True
|
|
||||||
infer_quant:Fasle
|
|
||||||
inference:python/predict_cls.py -c configs/inference_cls.yaml
|
|
||||||
-o Global.use_gpu:True|False
|
|
||||||
-o Global.enable_mkldnn:True|False
|
|
||||||
-o Global.cpu_num_threads:1|6
|
|
||||||
-o Global.batch_size:1|16
|
|
||||||
-o Global.use_tensorrt:True|False
|
|
||||||
-o Global.use_fp16:True|False
|
|
||||||
-o Global.inference_model_dir:../inference
|
|
||||||
-o Global.infer_imgs:../dataset/ILSVRC2012/val
|
|
||||||
-o Global.save_log_path:null
|
|
||||||
-o Global.benchmark:True
|
|
||||||
null:null
|
|
||||||
null:null
|
|
|
@ -1,52 +0,0 @@
|
||||||
===========================train_params===========================
|
|
||||||
model_name:ViT_huge_patch32_384
|
|
||||||
python:python3.7
|
|
||||||
gpu_list:0|0,1
|
|
||||||
-o Global.device:gpu
|
|
||||||
-o Global.auto_cast:null
|
|
||||||
-o Global.epochs:lite_train_lite_infer=2|whole_train_whole_infer=120
|
|
||||||
-o Global.output_dir:./output/
|
|
||||||
-o DataLoader.Train.sampler.batch_size:8
|
|
||||||
-o Global.pretrained_model:null
|
|
||||||
train_model_name:latest
|
|
||||||
train_infer_img_dir:./dataset/ILSVRC2012/val
|
|
||||||
null:null
|
|
||||||
##
|
|
||||||
trainer:norm_train
|
|
||||||
norm_train:tools/train.py -c ppcls/configs/ImageNet/VisionTransformer/ViT_huge_patch32_384.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False
|
|
||||||
pact_train:null
|
|
||||||
fpgm_train:null
|
|
||||||
distill_train:null
|
|
||||||
null:null
|
|
||||||
null:null
|
|
||||||
##
|
|
||||||
===========================eval_params===========================
|
|
||||||
eval:tools/eval.py -c ppcls/configs/ImageNet/VisionTransformer/ViT_huge_patch32_384.yaml
|
|
||||||
null:null
|
|
||||||
##
|
|
||||||
===========================infer_params==========================
|
|
||||||
-o Global.save_inference_dir:./inference
|
|
||||||
-o Global.pretrained_model:
|
|
||||||
norm_export:tools/export_model.py -c ppcls/configs/ImageNet/VisionTransformer/ViT_huge_patch32_384.yaml
|
|
||||||
quant_export:null
|
|
||||||
fpgm_export:null
|
|
||||||
distill_export:null
|
|
||||||
kl_quant:null
|
|
||||||
export2:null
|
|
||||||
pretrained_model_url:https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_huge_patch32_384_pretrained.pdparams
|
|
||||||
infer_model:../inference/
|
|
||||||
infer_export:True
|
|
||||||
infer_quant:Fasle
|
|
||||||
inference:python/predict_cls.py -c configs/inference_cls.yaml -o PreProcess.transform_ops.0.ResizeImage.resize_short=384 -o PreProcess.transform_ops.1.CropImage.size=384
|
|
||||||
-o Global.use_gpu:True|False
|
|
||||||
-o Global.enable_mkldnn:True|False
|
|
||||||
-o Global.cpu_num_threads:1|6
|
|
||||||
-o Global.batch_size:1|16
|
|
||||||
-o Global.use_tensorrt:True|False
|
|
||||||
-o Global.use_fp16:True|False
|
|
||||||
-o Global.inference_model_dir:../inference
|
|
||||||
-o Global.infer_imgs:../dataset/ILSVRC2012/val
|
|
||||||
-o Global.save_log_path:null
|
|
||||||
-o Global.benchmark:True
|
|
||||||
null:null
|
|
||||||
null:null
|
|
Loading…
Reference in New Issue