commit
897760f073
|
@ -48,7 +48,7 @@ from ppcls.arch.backbone.model_zoo.resnext101_wsl import ResNeXt101_32x8d_wsl, R
|
|||
from ppcls.arch.backbone.model_zoo.squeezenet import SqueezeNet1_0, SqueezeNet1_1
|
||||
from ppcls.arch.backbone.model_zoo.darknet import DarkNet53
|
||||
from ppcls.arch.backbone.model_zoo.regnet import RegNetX_200MF, RegNetX_4GF, RegNetX_32GF, RegNetY_200MF, RegNetY_4GF, RegNetY_32GF
|
||||
from ppcls.arch.backbone.model_zoo.vision_transformer import ViT_small_patch16_224, ViT_base_patch16_224, ViT_base_patch16_384, ViT_base_patch32_384, ViT_large_patch16_224, ViT_large_patch16_384, ViT_large_patch32_384, ViT_huge_patch16_224, ViT_huge_patch32_384
|
||||
from ppcls.arch.backbone.model_zoo.vision_transformer import ViT_small_patch16_224, ViT_base_patch16_224, ViT_base_patch16_384, ViT_base_patch32_384, ViT_large_patch16_224, ViT_large_patch16_384, ViT_large_patch32_384
|
||||
from ppcls.arch.backbone.model_zoo.distilled_vision_transformer import DeiT_tiny_patch16_224, DeiT_small_patch16_224, DeiT_base_patch16_224, DeiT_tiny_distilled_patch16_224, DeiT_small_distilled_patch16_224, DeiT_base_distilled_patch16_224, DeiT_base_patch16_384, DeiT_base_distilled_patch16_384
|
||||
from ppcls.arch.backbone.model_zoo.swin_transformer import SwinTransformer_tiny_patch4_window7_224, SwinTransformer_small_patch4_window7_224, SwinTransformer_base_patch4_window7_224, SwinTransformer_base_patch4_window12_384, SwinTransformer_large_patch4_window7_224, SwinTransformer_large_patch4_window12_384
|
||||
from ppcls.arch.backbone.model_zoo.mixnet import MixNet_S, MixNet_M, MixNet_L
|
||||
|
|
|
@ -38,10 +38,6 @@ MODEL_URLS = {
|
|||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_large_patch16_384_pretrained.pdparams",
|
||||
"ViT_large_patch32_384":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_large_patch32_384_pretrained.pdparams",
|
||||
"ViT_huge_patch16_224":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_huge_patch16_224_pretrained.pdparams",
|
||||
"ViT_huge_patch32_384":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_huge_patch32_384_pretrained.pdparams"
|
||||
}
|
||||
|
||||
__all__ = list(MODEL_URLS.keys())
|
||||
|
@ -460,36 +456,3 @@ def ViT_large_patch32_384(pretrained=False, use_ssld=False, **kwargs):
|
|||
MODEL_URLS["ViT_large_patch32_384"],
|
||||
use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def ViT_huge_patch16_224(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = VisionTransformer(
|
||||
patch_size=16,
|
||||
embed_dim=1280,
|
||||
depth=32,
|
||||
num_heads=16,
|
||||
mlp_ratio=4,
|
||||
**kwargs)
|
||||
_load_pretrained(
|
||||
pretrained,
|
||||
model,
|
||||
MODEL_URLS["ViT_huge_patch16_224"],
|
||||
use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def ViT_huge_patch32_384(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = VisionTransformer(
|
||||
img_size=384,
|
||||
patch_size=32,
|
||||
embed_dim=1280,
|
||||
depth=32,
|
||||
num_heads=16,
|
||||
mlp_ratio=4,
|
||||
**kwargs)
|
||||
_load_pretrained(
|
||||
pretrained,
|
||||
model,
|
||||
MODEL_URLS["ViT_huge_patch32_384"],
|
||||
use_ssld=use_ssld)
|
||||
return model
|
||||
|
|
|
@ -1,130 +0,0 @@
|
|||
# global configs
|
||||
Global:
|
||||
checkpoints: null
|
||||
pretrained_model: null
|
||||
output_dir: ./output/
|
||||
device: gpu
|
||||
save_interval: 1
|
||||
eval_during_train: True
|
||||
eval_interval: 1
|
||||
epochs: 120
|
||||
print_batch_step: 10
|
||||
use_visualdl: False
|
||||
# used for static mode and model export
|
||||
image_shape: [3, 224, 224]
|
||||
save_inference_dir: ./inference
|
||||
|
||||
# model architecture
|
||||
Arch:
|
||||
name: ViT_huge_patch16_224
|
||||
class_num: 1000
|
||||
|
||||
# loss function config for traing/eval process
|
||||
Loss:
|
||||
Train:
|
||||
- CELoss:
|
||||
weight: 1.0
|
||||
Eval:
|
||||
- CELoss:
|
||||
weight: 1.0
|
||||
|
||||
|
||||
Optimizer:
|
||||
name: Momentum
|
||||
momentum: 0.9
|
||||
lr:
|
||||
name: Piecewise
|
||||
learning_rate: 0.1
|
||||
decay_epochs: [30, 60, 90]
|
||||
values: [0.1, 0.01, 0.001, 0.0001]
|
||||
regularizer:
|
||||
name: 'L2'
|
||||
coeff: 0.0001
|
||||
|
||||
|
||||
# data loader for train and eval
|
||||
DataLoader:
|
||||
Train:
|
||||
dataset:
|
||||
name: ImageNetDataset
|
||||
image_root: ./dataset/ILSVRC2012/
|
||||
cls_label_path: ./dataset/ILSVRC2012/train_list.txt
|
||||
transform_ops:
|
||||
- DecodeImage:
|
||||
to_rgb: True
|
||||
channel_first: False
|
||||
- RandCropImage:
|
||||
size: 224
|
||||
- RandFlipImage:
|
||||
flip_code: 1
|
||||
- NormalizeImage:
|
||||
scale: 1.0/255.0
|
||||
mean: [0.5, 0.5, 0.5]
|
||||
std: [0.5, 0.5, 0.5]
|
||||
order: ''
|
||||
|
||||
sampler:
|
||||
name: DistributedBatchSampler
|
||||
batch_size: 64
|
||||
drop_last: False
|
||||
shuffle: True
|
||||
loader:
|
||||
num_workers: 4
|
||||
use_shared_memory: True
|
||||
|
||||
Eval:
|
||||
dataset:
|
||||
name: ImageNetDataset
|
||||
image_root: ./dataset/ILSVRC2012/
|
||||
cls_label_path: ./dataset/ILSVRC2012/val_list.txt
|
||||
transform_ops:
|
||||
- DecodeImage:
|
||||
to_rgb: True
|
||||
channel_first: False
|
||||
- ResizeImage:
|
||||
resize_short: 256
|
||||
- CropImage:
|
||||
size: 224
|
||||
- NormalizeImage:
|
||||
scale: 1.0/255.0
|
||||
mean: [0.5, 0.5, 0.5]
|
||||
std: [0.5, 0.5, 0.5]
|
||||
order: ''
|
||||
sampler:
|
||||
name: DistributedBatchSampler
|
||||
batch_size: 64
|
||||
drop_last: False
|
||||
shuffle: False
|
||||
loader:
|
||||
num_workers: 4
|
||||
use_shared_memory: True
|
||||
|
||||
Infer:
|
||||
infer_imgs: docs/images/whl/demo.jpg
|
||||
batch_size: 10
|
||||
transforms:
|
||||
- DecodeImage:
|
||||
to_rgb: True
|
||||
channel_first: False
|
||||
- ResizeImage:
|
||||
resize_short: 256
|
||||
- CropImage:
|
||||
size: 224
|
||||
- NormalizeImage:
|
||||
scale: 1.0/255.0
|
||||
mean: [0.5, 0.5, 0.5]
|
||||
std: [0.5, 0.5, 0.5]
|
||||
order: ''
|
||||
- ToCHWImage:
|
||||
PostProcess:
|
||||
name: Topk
|
||||
topk: 5
|
||||
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
|
||||
|
||||
Metric:
|
||||
Train:
|
||||
- TopkAcc:
|
||||
topk: [1, 5]
|
||||
Eval:
|
||||
- TopkAcc:
|
||||
topk: [1, 5]
|
|
@ -1,130 +0,0 @@
|
|||
# global configs
|
||||
Global:
|
||||
checkpoints: null
|
||||
pretrained_model: null
|
||||
output_dir: ./output/
|
||||
device: gpu
|
||||
save_interval: 1
|
||||
eval_during_train: True
|
||||
eval_interval: 1
|
||||
epochs: 120
|
||||
print_batch_step: 10
|
||||
use_visualdl: False
|
||||
# used for static mode and model export
|
||||
image_shape: [3, 384, 384]
|
||||
save_inference_dir: ./inference
|
||||
|
||||
# model architecture
|
||||
Arch:
|
||||
name: ViT_huge_patch32_384
|
||||
class_num: 1000
|
||||
|
||||
# loss function config for traing/eval process
|
||||
Loss:
|
||||
Train:
|
||||
- CELoss:
|
||||
weight: 1.0
|
||||
Eval:
|
||||
- CELoss:
|
||||
weight: 1.0
|
||||
|
||||
|
||||
Optimizer:
|
||||
name: Momentum
|
||||
momentum: 0.9
|
||||
lr:
|
||||
name: Piecewise
|
||||
learning_rate: 0.1
|
||||
decay_epochs: [30, 60, 90]
|
||||
values: [0.1, 0.01, 0.001, 0.0001]
|
||||
regularizer:
|
||||
name: 'L2'
|
||||
coeff: 0.0001
|
||||
|
||||
|
||||
# data loader for train and eval
|
||||
DataLoader:
|
||||
Train:
|
||||
dataset:
|
||||
name: ImageNetDataset
|
||||
image_root: ./dataset/ILSVRC2012/
|
||||
cls_label_path: ./dataset/ILSVRC2012/train_list.txt
|
||||
transform_ops:
|
||||
- DecodeImage:
|
||||
to_rgb: True
|
||||
channel_first: False
|
||||
- RandCropImage:
|
||||
size: 384
|
||||
- RandFlipImage:
|
||||
flip_code: 1
|
||||
- NormalizeImage:
|
||||
scale: 1.0/255.0
|
||||
mean: [0.5, 0.5, 0.5]
|
||||
std: [0.5, 0.5, 0.5]
|
||||
order: ''
|
||||
|
||||
sampler:
|
||||
name: DistributedBatchSampler
|
||||
batch_size: 64
|
||||
drop_last: False
|
||||
shuffle: True
|
||||
loader:
|
||||
num_workers: 4
|
||||
use_shared_memory: True
|
||||
|
||||
Eval:
|
||||
dataset:
|
||||
name: ImageNetDataset
|
||||
image_root: ./dataset/ILSVRC2012/
|
||||
cls_label_path: ./dataset/ILSVRC2012/val_list.txt
|
||||
transform_ops:
|
||||
- DecodeImage:
|
||||
to_rgb: True
|
||||
channel_first: False
|
||||
- ResizeImage:
|
||||
resize_short: 384
|
||||
- CropImage:
|
||||
size: 384
|
||||
- NormalizeImage:
|
||||
scale: 1.0/255.0
|
||||
mean: [0.5, 0.5, 0.5]
|
||||
std: [0.5, 0.5, 0.5]
|
||||
order: ''
|
||||
sampler:
|
||||
name: DistributedBatchSampler
|
||||
batch_size: 64
|
||||
drop_last: False
|
||||
shuffle: False
|
||||
loader:
|
||||
num_workers: 4
|
||||
use_shared_memory: True
|
||||
|
||||
Infer:
|
||||
infer_imgs: docs/images/whl/demo.jpg
|
||||
batch_size: 10
|
||||
transforms:
|
||||
- DecodeImage:
|
||||
to_rgb: True
|
||||
channel_first: False
|
||||
- ResizeImage:
|
||||
resize_short: 384
|
||||
- CropImage:
|
||||
size: 384
|
||||
- NormalizeImage:
|
||||
scale: 1.0/255.0
|
||||
mean: [0.5, 0.5, 0.5]
|
||||
std: [0.5, 0.5, 0.5]
|
||||
order: ''
|
||||
- ToCHWImage:
|
||||
PostProcess:
|
||||
name: Topk
|
||||
topk: 5
|
||||
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
|
||||
|
||||
Metric:
|
||||
Train:
|
||||
- TopkAcc:
|
||||
topk: [1, 5]
|
||||
Eval:
|
||||
- TopkAcc:
|
||||
topk: [1, 5]
|
|
@ -1,52 +0,0 @@
|
|||
===========================train_params===========================
|
||||
model_name:ViT_huge_patch16_224
|
||||
python:python3.7
|
||||
gpu_list:0|0,1
|
||||
-o Global.device:gpu
|
||||
-o Global.auto_cast:null
|
||||
-o Global.epochs:lite_train_lite_infer=2|whole_train_whole_infer=120
|
||||
-o Global.output_dir:./output/
|
||||
-o DataLoader.Train.sampler.batch_size:8
|
||||
-o Global.pretrained_model:null
|
||||
train_model_name:latest
|
||||
train_infer_img_dir:./dataset/ILSVRC2012/val
|
||||
null:null
|
||||
##
|
||||
trainer:norm_train
|
||||
norm_train:tools/train.py -c ppcls/configs/ImageNet/VisionTransformer/ViT_huge_patch16_224.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False
|
||||
pact_train:null
|
||||
fpgm_train:null
|
||||
distill_train:null
|
||||
null:null
|
||||
null:null
|
||||
##
|
||||
===========================eval_params===========================
|
||||
eval:tools/eval.py -c ppcls/configs/ImageNet/VisionTransformer/ViT_huge_patch16_224.yaml
|
||||
null:null
|
||||
##
|
||||
===========================infer_params==========================
|
||||
-o Global.save_inference_dir:./inference
|
||||
-o Global.pretrained_model:
|
||||
norm_export:tools/export_model.py -c ppcls/configs/ImageNet/VisionTransformer/ViT_huge_patch16_224.yaml
|
||||
quant_export:null
|
||||
fpgm_export:null
|
||||
distill_export:null
|
||||
kl_quant:null
|
||||
export2:null
|
||||
pretrained_model_url:https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_huge_patch16_224_pretrained.pdparams
|
||||
infer_model:../inference/
|
||||
infer_export:True
|
||||
infer_quant:Fasle
|
||||
inference:python/predict_cls.py -c configs/inference_cls.yaml
|
||||
-o Global.use_gpu:True|False
|
||||
-o Global.enable_mkldnn:True|False
|
||||
-o Global.cpu_num_threads:1|6
|
||||
-o Global.batch_size:1|16
|
||||
-o Global.use_tensorrt:True|False
|
||||
-o Global.use_fp16:True|False
|
||||
-o Global.inference_model_dir:../inference
|
||||
-o Global.infer_imgs:../dataset/ILSVRC2012/val
|
||||
-o Global.save_log_path:null
|
||||
-o Global.benchmark:True
|
||||
null:null
|
||||
null:null
|
|
@ -1,52 +0,0 @@
|
|||
===========================train_params===========================
|
||||
model_name:ViT_huge_patch32_384
|
||||
python:python3.7
|
||||
gpu_list:0|0,1
|
||||
-o Global.device:gpu
|
||||
-o Global.auto_cast:null
|
||||
-o Global.epochs:lite_train_lite_infer=2|whole_train_whole_infer=120
|
||||
-o Global.output_dir:./output/
|
||||
-o DataLoader.Train.sampler.batch_size:8
|
||||
-o Global.pretrained_model:null
|
||||
train_model_name:latest
|
||||
train_infer_img_dir:./dataset/ILSVRC2012/val
|
||||
null:null
|
||||
##
|
||||
trainer:norm_train
|
||||
norm_train:tools/train.py -c ppcls/configs/ImageNet/VisionTransformer/ViT_huge_patch32_384.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False
|
||||
pact_train:null
|
||||
fpgm_train:null
|
||||
distill_train:null
|
||||
null:null
|
||||
null:null
|
||||
##
|
||||
===========================eval_params===========================
|
||||
eval:tools/eval.py -c ppcls/configs/ImageNet/VisionTransformer/ViT_huge_patch32_384.yaml
|
||||
null:null
|
||||
##
|
||||
===========================infer_params==========================
|
||||
-o Global.save_inference_dir:./inference
|
||||
-o Global.pretrained_model:
|
||||
norm_export:tools/export_model.py -c ppcls/configs/ImageNet/VisionTransformer/ViT_huge_patch32_384.yaml
|
||||
quant_export:null
|
||||
fpgm_export:null
|
||||
distill_export:null
|
||||
kl_quant:null
|
||||
export2:null
|
||||
pretrained_model_url:https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_huge_patch32_384_pretrained.pdparams
|
||||
infer_model:../inference/
|
||||
infer_export:True
|
||||
infer_quant:Fasle
|
||||
inference:python/predict_cls.py -c configs/inference_cls.yaml -o PreProcess.transform_ops.0.ResizeImage.resize_short=384 -o PreProcess.transform_ops.1.CropImage.size=384
|
||||
-o Global.use_gpu:True|False
|
||||
-o Global.enable_mkldnn:True|False
|
||||
-o Global.cpu_num_threads:1|6
|
||||
-o Global.batch_size:1|16
|
||||
-o Global.use_tensorrt:True|False
|
||||
-o Global.use_fp16:True|False
|
||||
-o Global.inference_model_dir:../inference
|
||||
-o Global.infer_imgs:../dataset/ILSVRC2012/val
|
||||
-o Global.save_log_path:null
|
||||
-o Global.benchmark:True
|
||||
null:null
|
||||
null:null
|
Loading…
Reference in New Issue