mirror of
https://github.com/PaddlePaddle/PaddleClas.git
synced 2025-06-03 21:55:06 +08:00
modify some default hyperparams to adapt to fine-tune downstream tasks (#2921)
1. unset EMA because of the relatively small size of most downstream dataset; 2. use mean and std of IMN.
This commit is contained in:
parent
1f8a830b58
commit
eddba911b1
@ -49,6 +49,20 @@ output = model(inputs) # the output of model embeding
|
||||
| EVA | EVA_vit_giant_patch14 | 1010M | 1408 | ImageNet-21k, CC12M, CC2M, Object365,COCO, ADE | [下载地址](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/EVA_vit_giant_patch14.pdparams) |
|
||||
| CAE | CAE_vit_base_patch16_224 | 85M | 768 | ImageNet-1k | [下载地址](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/CAE_vit_base_patch16_224.pdparams) |
|
||||
|
||||
**备注:** PaddleClas 提供的 CLIP 系列模型在 ImageNet1k 数据集 fine-tune 的配置([CLIP_vit_base_patch14_224](ppcls/configs/CLIP/CLIP_vit_base_patch16_224_finetune.yaml),[CLIP_vit_large_patch14_224](ppcls/configs/CLIP/CLIP_vit_large_patch16_224_finetune.yaml))中:
|
||||
* 默认未使用 `EMA`,如需使用,请自行修改配置文件增加字段:
|
||||
```yaml
|
||||
EMA:
|
||||
decay: 0.9999
|
||||
```
|
||||
* 数据预处理中,`NormalizeImage` 默认使用 ImageNet1k 数据集的 `mean` 和 `std` 参数(`mean` 为 `[0.485, 0.456, 0.406]`,`std` 为 `[0.229, 0.224, 0.225]`),如需使用 LAION 数据集相应参数,请自行修改相应字段:
|
||||
```yaml
|
||||
- NormalizeImage:
|
||||
scale: 1.0/255.0
|
||||
mean: [0.48145466, 0.4578275, 0.40821073]
|
||||
std: [0.26862954, 0.26130258, 0.27577711]
|
||||
```
|
||||
|
||||
## 4. 参考文献
|
||||
|
||||
1. [MoCo v3: An Empirical Study of Training Self-Supervised Vision Transformers](https://arxiv.org/pdf/2104.02057.pdf)
|
||||
|
@ -21,10 +21,6 @@ AMP:
|
||||
# O1: mixed fp16
|
||||
level: O1
|
||||
|
||||
# model ema
|
||||
EMA:
|
||||
decay: 0.9999
|
||||
|
||||
# model architecture
|
||||
Arch:
|
||||
name: CLIP_vit_base_patch16_224
|
||||
@ -81,8 +77,8 @@ DataLoader:
|
||||
img_size: 224
|
||||
- NormalizeImage:
|
||||
scale: 1.0/255.0
|
||||
mean: [0.48145466, 0.4578275, 0.40821073]
|
||||
std: [0.26862954, 0.26130258, 0.27577711]
|
||||
mean: [0.485, 0.456, 0.406]
|
||||
std: [0.229, 0.224, 0.225]
|
||||
order: ''
|
||||
- RandomErasing:
|
||||
EPSILON: 0.25
|
||||
@ -118,8 +114,8 @@ DataLoader:
|
||||
size: 224
|
||||
- NormalizeImage:
|
||||
scale: 1.0/255.0
|
||||
mean: [0.48145466, 0.4578275, 0.40821073]
|
||||
std: [0.26862954, 0.26130258, 0.27577711]
|
||||
mean: [0.485, 0.456, 0.406]
|
||||
std: [0.229, 0.224, 0.225]
|
||||
order: ''
|
||||
sampler:
|
||||
name: DistributedBatchSampler
|
||||
@ -143,8 +139,8 @@ Infer:
|
||||
size: 224
|
||||
- NormalizeImage:
|
||||
scale: 1.0/255.0
|
||||
mean: [0.5, 0.5, 0.5]
|
||||
std: [0.5, 0.5, 0.5]
|
||||
mean: [0.485, 0.456, 0.406]
|
||||
std: [0.229, 0.224, 0.225]
|
||||
order: ''
|
||||
- ToCHWImage:
|
||||
PostProcess:
|
||||
|
157
ppcls/configs/CLIP/CLIP_vit_large_patch16_224_finetune.yaml
Normal file
157
ppcls/configs/CLIP/CLIP_vit_large_patch16_224_finetune.yaml
Normal file
@ -0,0 +1,157 @@
|
||||
# global configs
|
||||
Global:
|
||||
checkpoints: null
|
||||
pretrained_model: null
|
||||
output_dir: ./output/
|
||||
device: gpu
|
||||
save_interval: 10
|
||||
eval_during_train: True
|
||||
eval_interval: 1
|
||||
epochs: 50
|
||||
print_batch_step: 10
|
||||
use_visualdl: False
|
||||
# used for static mode and model export
|
||||
image_shape: [3, 224, 224]
|
||||
save_inference_dir: ./inference
|
||||
|
||||
# mixed precision training
|
||||
AMP:
|
||||
scale_loss: 128.0
|
||||
use_dynamic_loss_scaling: True
|
||||
# O1: mixed fp16
|
||||
level: O1
|
||||
|
||||
# model architecture
|
||||
Arch:
|
||||
name: CLIP_vit_large_patch14_224
|
||||
class_num: 1000
|
||||
return_embed: False
|
||||
pretrained: True
|
||||
head_init_scale: 0.001
|
||||
|
||||
# loss function config for traing/eval process
|
||||
Loss:
|
||||
Train:
|
||||
- CELoss:
|
||||
weight: 1.0
|
||||
epsilon: 0.1
|
||||
Eval:
|
||||
- CELoss:
|
||||
weight: 1.0
|
||||
|
||||
Optimizer:
|
||||
name: AdamWDL
|
||||
beta1: 0.9
|
||||
beta2: 0.999
|
||||
epsilon: 1e-8
|
||||
weight_decay: 0.05
|
||||
layerwise_decay: 0.6
|
||||
filter_bias_and_bn: True
|
||||
lr:
|
||||
name: Cosine
|
||||
learning_rate: 0.0003
|
||||
eta_min: 1e-6
|
||||
warmup_epoch: 10
|
||||
warmup_start_lr: 1e-6
|
||||
|
||||
# data loader for train and eval
|
||||
DataLoader:
|
||||
Train:
|
||||
dataset:
|
||||
name: ImageNetDataset
|
||||
image_root: ./dataset/ILSVRC2012/
|
||||
cls_label_path: ./dataset/ILSVRC2012/train_list.txt
|
||||
transform_ops:
|
||||
- DecodeImage:
|
||||
to_rgb: True
|
||||
channel_first: False
|
||||
- RandCropImage:
|
||||
size: 224
|
||||
interpolation: bicubic
|
||||
backend: pil
|
||||
- RandFlipImage:
|
||||
flip_code: 1
|
||||
- TimmAutoAugment:
|
||||
config_str: rand-m9-mstd0.5-inc1
|
||||
interpolation: bicubic
|
||||
img_size: 224
|
||||
- NormalizeImage:
|
||||
scale: 1.0/255.0
|
||||
mean: [0.485, 0.456, 0.406]
|
||||
std: [0.229, 0.224, 0.225]
|
||||
order: ''
|
||||
- RandomErasing:
|
||||
EPSILON: 0.25
|
||||
sl: 0.02
|
||||
sh: 1.0/3.0
|
||||
r1: 0.3
|
||||
attempt: 10
|
||||
use_log_aspect: True
|
||||
mode: pixel
|
||||
sampler:
|
||||
name: DistributedBatchSampler
|
||||
batch_size: 128
|
||||
drop_last: True
|
||||
shuffle: True
|
||||
loader:
|
||||
num_workers: 16
|
||||
use_shared_memory: True
|
||||
|
||||
Eval:
|
||||
dataset:
|
||||
name: ImageNetDataset
|
||||
image_root: ./dataset/ILSVRC2012/
|
||||
cls_label_path: ./dataset/ILSVRC2012/val_list.txt
|
||||
transform_ops:
|
||||
- DecodeImage:
|
||||
to_rgb: True
|
||||
channel_first: False
|
||||
- ResizeImage:
|
||||
resize_short: 224
|
||||
interpolation: bicubic
|
||||
backend: pil
|
||||
- CropImage:
|
||||
size: 224
|
||||
- NormalizeImage:
|
||||
scale: 1.0/255.0
|
||||
mean: [0.485, 0.456, 0.406]
|
||||
std: [0.229, 0.224, 0.225]
|
||||
order: ''
|
||||
sampler:
|
||||
name: DistributedBatchSampler
|
||||
batch_size: 128
|
||||
drop_last: False
|
||||
shuffle: False
|
||||
loader:
|
||||
num_workers: 8
|
||||
use_shared_memory: True
|
||||
|
||||
Infer:
|
||||
infer_imgs: docs/images/inference_deployment/whl_demo.jpg
|
||||
batch_size: 10
|
||||
transforms:
|
||||
- DecodeImage:
|
||||
to_rgb: True
|
||||
channel_first: False
|
||||
- ResizeImage:
|
||||
resize_short: 256
|
||||
- CropImage:
|
||||
size: 224
|
||||
- NormalizeImage:
|
||||
scale: 1.0/255.0
|
||||
mean: [0.485, 0.456, 0.406]
|
||||
std: [0.229, 0.224, 0.225]
|
||||
order: ''
|
||||
- ToCHWImage:
|
||||
PostProcess:
|
||||
name: Topk
|
||||
topk: 5
|
||||
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
|
||||
|
||||
Metric:
|
||||
Train:
|
||||
- TopkAcc:
|
||||
topk: [1, 5]
|
||||
Eval:
|
||||
- TopkAcc:
|
||||
topk: [1, 5]
|
Loading…
x
Reference in New Issue
Block a user