mirror of
https://github.com/PaddlePaddle/PaddleClas.git
synced 2025-06-03 21:55:06 +08:00
Merge branch 'develop' of https://github.com/PaddlePaddle/PaddleClas into fix_cpp_serving_bug
This commit is contained in:
commit
248d19e558
@ -37,7 +37,7 @@ Res2Net200_vd预训练模型Top-1精度高达85.1%。
|
||||
* 您可以扫描下面的微信群二维码, 加入PaddleClas 微信交流群。获得更高效的问题答疑,与各行各业开发者充分交流,期待您的加入。
|
||||
|
||||
<div align="center">
|
||||
<img src="./docs/images/wx_group.png" width = "200" />
|
||||
<img src="https://user-images.githubusercontent.com/12560511/150500411-fdb27d17-0c50-4ac1-a484-fb4a9c2454b3.jpg" width = "200" />
|
||||
</div>
|
||||
|
||||
## 快速体验
|
||||
@ -71,7 +71,7 @@ PP-ShiTu图像识别快速体验:[点击这里](./docs/zh_CN/quick_start/quick
|
||||
- [模型导出](./docs/zh_CN/inference_deployment/export_model.md)
|
||||
- Python/C++ 预测引擎
|
||||
- [基于Python预测引擎预测推理](./docs/zh_CN/inference_deployment/python_deploy.md)
|
||||
- [基于C++预测引擎预测推理](./docs/zh_CN/inference_deployment/cpp_deploy.md)(当前只支持图像分类任务,图像识别更新中)
|
||||
- [基于C++分类预测引擎预测推理](./docs/zh_CN/inference_deployment/cpp_deploy.md)、[基于C++的PP-ShiTu预测引擎预测推理](deploy/cpp_shitu/readme.md)
|
||||
- 服务化部署
|
||||
- [Paddle Serving服务化部署(推荐)](./docs/zh_CN/inference_deployment/paddle_serving_deploy.md)
|
||||
- [Hub serving服务化部署](./docs/zh_CN/inference_deployment/paddle_hub_serving_deploy.md)
|
||||
|
@ -8,7 +8,7 @@ PaddleClas is an image recognition toolset for industry and academia, helping us
|
||||
|
||||
**Recent updates**
|
||||
|
||||
- 2021.09.17 Add PP-LCNet series model developed by PaddleClas, these models show strong competitiveness on Intel CPUs.
|
||||
- 2021.09.17 Add PP-LCNet series model developed by PaddleClas, these models show strong competitiveness on Intel CPUs.
|
||||
For the introduction of PP-LCNet, please refer to [paper](https://arxiv.org/pdf/2109.15099.pdf) or [PP-LCNet model introduction](docs/en/models/PP-LCNet_en.md). The metrics and pretrained model are available [here](docs/en/ImageNet_models_en.md).
|
||||
|
||||
- 2021.06.29 Add Swin-transformer series model,Highest top1 acc on ImageNet1k dataset reaches 87.2%, training, evaluation and inference are all supported. Pretrained models can be downloaded [here](docs/en/models/models_intro_en.md).
|
||||
@ -41,7 +41,7 @@ Four sample solutions are provided, including product recognition, vehicle recog
|
||||
* You can also scan the QR code below to join the PaddleClas WeChat group to get more efficient answers to your questions and to communicate with developers from all walks of life. We look forward to hearing from you.
|
||||
|
||||
<div align="center">
|
||||
<img src="./docs/images/wx_group.png" width = "200" />
|
||||
<img src="https://user-images.githubusercontent.com/12560511/150500411-fdb27d17-0c50-4ac1-a484-fb4a9c2454b3.jpg" width = "200" />
|
||||
</div>
|
||||
|
||||
## Quick Start
|
||||
@ -68,7 +68,7 @@ Quick experience of image recognition:[Link](./docs/en/tutorials/quick_start_r
|
||||
- [Feature Learning](./docs/en/tutorials/getting_started_retrieval_en.md)
|
||||
- Inference Model Prediction
|
||||
- [Python Inference](./docs/en/inference.md)
|
||||
- [C++ Inference](./deploy/cpp/readme_en.md)(only support classification for now, recognition coming soon)
|
||||
- [C++ Classfication Inference](./deploy/cpp/readme_en.md), [C++ PP-ShiTu Inference](deploy/cpp_shitu/readme_en.md)
|
||||
- Model Deploy (only support classification for now, recognition coming soon)
|
||||
- [Hub Serving Deployment](./deploy/hubserving/readme_en.md)
|
||||
- [Mobile Deployment](./deploy/lite/readme_en.md)
|
||||
|
@ -122,7 +122,7 @@ def override(dl, ks, v):
|
||||
if len(ks) == 1:
|
||||
# assert ks[0] in dl, ('{} is not exist in {}'.format(ks[0], dl))
|
||||
if not ks[0] in dl:
|
||||
logger.warning('A new filed ({}) detected!'.format(ks[0], dl))
|
||||
logger.warning('A new filed ({}) detected!'.format(ks[0]))
|
||||
dl[ks[0]] = str2num(v)
|
||||
else:
|
||||
override(dl[ks[0]], ks[1:], v)
|
||||
|
@ -187,7 +187,7 @@ print("The result returned by update_res(): ", res)
|
||||
output = net(pd_input)
|
||||
print("The output's keys of processed net: ", output.keys())
|
||||
# The output's keys of net: dict_keys(['output', 'blocks[0]', 'blocks[2]', 'blocks[4]', 'blocks[10]'])
|
||||
# 网络前向输出 output 为 dict 类型对象,其中,output["key"] 为网络最终输出,output["blocks[0]"] 等为网络中间层输出结果
|
||||
# 网络前向输出 output 为 dict 类型对象,其中,output["output"] 为网络最终输出,output["blocks[0]"] 等为网络中间层输出结果
|
||||
```
|
||||
|
||||
除了通过调用方法 `update_res()` 的方式之外,也同样可以在实例化网络对象时,通过指定参数 `return_patterns` 实现相同效果:
|
||||
|
@ -4,7 +4,6 @@ Global:
|
||||
pretrained_model: null
|
||||
output_dir: ./output/
|
||||
device: gpu
|
||||
class_num: 1000
|
||||
save_interval: 1
|
||||
eval_during_train: True
|
||||
eval_interval: 1
|
||||
@ -17,6 +16,7 @@ Global:
|
||||
# model architecture
|
||||
Arch:
|
||||
name: ESNet_x0_25
|
||||
class_num: 1000
|
||||
|
||||
# loss function config for traing/eval process
|
||||
Loss:
|
||||
|
@ -4,7 +4,6 @@ Global:
|
||||
pretrained_model: null
|
||||
output_dir: ./output/
|
||||
device: gpu
|
||||
class_num: 1000
|
||||
save_interval: 1
|
||||
eval_during_train: True
|
||||
eval_interval: 1
|
||||
@ -17,6 +16,7 @@ Global:
|
||||
# model architecture
|
||||
Arch:
|
||||
name: ESNet_x0_5
|
||||
class_num: 1000
|
||||
|
||||
# loss function config for traing/eval process
|
||||
Loss:
|
||||
|
@ -4,7 +4,6 @@ Global:
|
||||
pretrained_model: null
|
||||
output_dir: ./output/
|
||||
device: gpu
|
||||
class_num: 1000
|
||||
save_interval: 1
|
||||
eval_during_train: True
|
||||
eval_interval: 1
|
||||
@ -17,6 +16,7 @@ Global:
|
||||
# model architecture
|
||||
Arch:
|
||||
name: ESNet_x0_75
|
||||
class_num: 1000
|
||||
|
||||
# loss function config for traing/eval process
|
||||
Loss:
|
||||
|
@ -4,7 +4,6 @@ Global:
|
||||
pretrained_model: null
|
||||
output_dir: ./output/
|
||||
device: gpu
|
||||
class_num: 1000
|
||||
save_interval: 1
|
||||
eval_during_train: True
|
||||
eval_interval: 1
|
||||
@ -17,6 +16,7 @@ Global:
|
||||
# model architecture
|
||||
Arch:
|
||||
name: ESNet_x1_0
|
||||
class_num: 1000
|
||||
|
||||
# loss function config for traing/eval process
|
||||
Loss:
|
||||
|
@ -71,7 +71,7 @@ DataLoader:
|
||||
drop_last: False
|
||||
shuffle: True
|
||||
loader:
|
||||
num_workers: 4
|
||||
num_workers: 8
|
||||
use_shared_memory: True
|
||||
|
||||
Eval:
|
||||
|
@ -69,7 +69,7 @@ DataLoader:
|
||||
drop_last: False
|
||||
shuffle: True
|
||||
loader:
|
||||
num_workers: 4
|
||||
num_workers: 8
|
||||
use_shared_memory: True
|
||||
|
||||
Eval:
|
||||
|
@ -70,7 +70,7 @@ DataLoader:
|
||||
drop_last: False
|
||||
shuffle: True
|
||||
loader:
|
||||
num_workers: 4
|
||||
num_workers: 8
|
||||
use_shared_memory: True
|
||||
|
||||
Eval:
|
||||
|
@ -22,7 +22,8 @@ Global:
|
||||
AMP:
|
||||
scale_loss: 128.0
|
||||
use_dynamic_loss_scaling: True
|
||||
use_pure_fp16: &use_pure_fp16 False
|
||||
# O1: mixed fp16
|
||||
level: O1
|
||||
|
||||
# model architecture
|
||||
Arch:
|
||||
@ -44,6 +45,7 @@ Loss:
|
||||
Optimizer:
|
||||
name: Momentum
|
||||
momentum: 0.9
|
||||
multi_precision: True
|
||||
lr:
|
||||
name: Piecewise
|
||||
learning_rate: 0.1
|
||||
@ -74,12 +76,11 @@ DataLoader:
|
||||
mean: [0.485, 0.456, 0.406]
|
||||
std: [0.229, 0.224, 0.225]
|
||||
order: ''
|
||||
output_fp16: *use_pure_fp16
|
||||
channel_num: *image_channel
|
||||
|
||||
sampler:
|
||||
name: DistributedBatchSampler
|
||||
batch_size: 256
|
||||
batch_size: 64
|
||||
drop_last: False
|
||||
shuffle: True
|
||||
loader:
|
||||
@ -104,7 +105,6 @@ DataLoader:
|
||||
mean: [0.485, 0.456, 0.406]
|
||||
std: [0.229, 0.224, 0.225]
|
||||
order: ''
|
||||
output_fp16: *use_pure_fp16
|
||||
channel_num: *image_channel
|
||||
sampler:
|
||||
name: DistributedBatchSampler
|
||||
@ -131,7 +131,6 @@ Infer:
|
||||
mean: [0.485, 0.456, 0.406]
|
||||
std: [0.229, 0.224, 0.225]
|
||||
order: ''
|
||||
output_fp16: *use_pure_fp16
|
||||
channel_num: *image_channel
|
||||
- ToCHWImage:
|
||||
PostProcess:
|
@ -10,8 +10,8 @@ Global:
|
||||
epochs: 120
|
||||
print_batch_step: 10
|
||||
use_visualdl: False
|
||||
# used for static mode and model export
|
||||
image_channel: &image_channel 4
|
||||
# used for static mode and model export
|
||||
image_shape: [*image_channel, 224, 224]
|
||||
save_inference_dir: ./inference
|
||||
# training model under @to_static
|
||||
@ -22,7 +22,8 @@ Global:
|
||||
AMP:
|
||||
scale_loss: 128.0
|
||||
use_dynamic_loss_scaling: True
|
||||
use_pure_fp16: &use_pure_fp16 True
|
||||
# O2: pure fp16
|
||||
level: O2
|
||||
|
||||
# model architecture
|
||||
Arch:
|
||||
@ -43,7 +44,7 @@ Loss:
|
||||
Optimizer:
|
||||
name: Momentum
|
||||
momentum: 0.9
|
||||
multi_precision: *use_pure_fp16
|
||||
multi_precision: True
|
||||
lr:
|
||||
name: Piecewise
|
||||
learning_rate: 0.1
|
||||
@ -74,7 +75,7 @@ DataLoader:
|
||||
mean: [0.485, 0.456, 0.406]
|
||||
std: [0.229, 0.224, 0.225]
|
||||
order: ''
|
||||
output_fp16: *use_pure_fp16
|
||||
output_fp16: True
|
||||
channel_num: *image_channel
|
||||
|
||||
sampler:
|
||||
@ -104,7 +105,7 @@ DataLoader:
|
||||
mean: [0.485, 0.456, 0.406]
|
||||
std: [0.229, 0.224, 0.225]
|
||||
order: ''
|
||||
output_fp16: *use_pure_fp16
|
||||
output_fp16: True
|
||||
channel_num: *image_channel
|
||||
sampler:
|
||||
name: DistributedBatchSampler
|
||||
@ -131,7 +132,7 @@ Infer:
|
||||
mean: [0.485, 0.456, 0.406]
|
||||
std: [0.229, 0.224, 0.225]
|
||||
order: ''
|
||||
output_fp16: *use_pure_fp16
|
||||
output_fp16: True
|
||||
channel_num: *image_channel
|
||||
- ToCHWImage:
|
||||
PostProcess:
|
@ -20,6 +20,7 @@ Arch:
|
||||
name: SE_ResNeXt101_32x4d
|
||||
class_num: 1000
|
||||
input_image_channel: *image_channel
|
||||
data_format: "NHWC"
|
||||
|
||||
# loss function config for traing/eval process
|
||||
Loss:
|
||||
@ -35,11 +36,13 @@ Loss:
|
||||
AMP:
|
||||
scale_loss: 128.0
|
||||
use_dynamic_loss_scaling: True
|
||||
use_pure_fp16: &use_pure_fp16 True
|
||||
# O2: pure fp16
|
||||
level: O2
|
||||
|
||||
Optimizer:
|
||||
name: Momentum
|
||||
momentum: 0.9
|
||||
multi_precision: True
|
||||
lr:
|
||||
name: Cosine
|
||||
learning_rate: 0.1
|
||||
@ -67,7 +70,7 @@ DataLoader:
|
||||
mean: [0.485, 0.456, 0.406]
|
||||
std: [0.229, 0.224, 0.225]
|
||||
order: ''
|
||||
output_fp16: *use_pure_fp16
|
||||
output_fp16: True
|
||||
channel_num: *image_channel
|
||||
sampler:
|
||||
name: DistributedBatchSampler
|
||||
@ -96,7 +99,7 @@ DataLoader:
|
||||
mean: [0.485, 0.456, 0.406]
|
||||
std: [0.229, 0.224, 0.225]
|
||||
order: ''
|
||||
output_fp16: *use_pure_fp16
|
||||
output_fp16: True
|
||||
channel_num: *image_channel
|
||||
sampler:
|
||||
name: BatchSampler
|
||||
@ -123,7 +126,7 @@ Infer:
|
||||
mean: [0.485, 0.456, 0.406]
|
||||
std: [0.229, 0.224, 0.225]
|
||||
order: ''
|
||||
output_fp16: *use_pure_fp16
|
||||
output_fp16: True
|
||||
channel_num: *image_channel
|
||||
- ToCHWImage:
|
||||
PostProcess:
|
@ -68,7 +68,7 @@ DataLoader:
|
||||
drop_last: False
|
||||
shuffle: True
|
||||
loader:
|
||||
num_workers: 4
|
||||
num_workers: 8
|
||||
use_shared_memory: True
|
||||
|
||||
Eval:
|
||||
|
@ -84,7 +84,8 @@ class Engine(object):
|
||||
|
||||
# for visualdl
|
||||
self.vdl_writer = None
|
||||
if self.config['Global']['use_visualdl'] and mode == "train":
|
||||
if self.config['Global'][
|
||||
'use_visualdl'] and mode == "train" and dist.get_rank() == 0:
|
||||
vdl_writer_path = os.path.join(self.output_dir, "vdl")
|
||||
if not os.path.exists(vdl_writer_path):
|
||||
os.makedirs(vdl_writer_path)
|
||||
@ -97,7 +98,7 @@ class Engine(object):
|
||||
paddle.__version__, self.device))
|
||||
|
||||
# AMP training
|
||||
self.amp = True if "AMP" in self.config else False
|
||||
self.amp = True if "AMP" in self.config and self.mode == "train" else False
|
||||
if self.amp and self.config["AMP"] is not None:
|
||||
self.scale_loss = self.config["AMP"].get("scale_loss", 1.0)
|
||||
self.use_dynamic_loss_scaling = self.config["AMP"].get(
|
||||
@ -112,6 +113,14 @@ class Engine(object):
|
||||
}
|
||||
paddle.fluid.set_flags(AMP_RELATED_FLAGS_SETTING)
|
||||
|
||||
if "class_num" in config["Global"]:
|
||||
global_class_num = config["Global"]["class_num"]
|
||||
if "class_num" not in config["Arch"]:
|
||||
config["Arch"]["class_num"] = global_class_num
|
||||
msg = f"The Global.class_num will be deprecated. Please use Arch.class_num instead. Arch.class_num has been set to {global_class_num}."
|
||||
else:
|
||||
msg = "The Global.class_num will be deprecated. Please use Arch.class_num instead. The Global.class_num has been ignored."
|
||||
logger.warning(msg)
|
||||
#TODO(gaotingquan): support rec
|
||||
class_num = config["Arch"].get("class_num", None)
|
||||
self.config["DataLoader"].update({"class_num": class_num})
|
||||
@ -211,21 +220,32 @@ class Engine(object):
|
||||
self.optimizer, self.lr_sch = build_optimizer(
|
||||
self.config["Optimizer"], self.config["Global"]["epochs"],
|
||||
len(self.train_dataloader), [self.model])
|
||||
|
||||
|
||||
# for amp training
|
||||
if self.amp:
|
||||
self.scaler = paddle.amp.GradScaler(
|
||||
init_loss_scaling=self.scale_loss,
|
||||
use_dynamic_loss_scaling=self.use_dynamic_loss_scaling)
|
||||
if self.config['AMP']['use_pure_fp16'] is True:
|
||||
self.model = paddle.amp.decorate(models=self.model, level='O2', save_dtype='float32')
|
||||
amp_level = self.config['AMP'].get("level", "O1")
|
||||
if amp_level not in ["O1", "O2"]:
|
||||
msg = "[Parameter Error]: The optimize level of AMP only support 'O1' and 'O2'. The level has been set 'O1'."
|
||||
logger.warning(msg)
|
||||
self.config['AMP']["level"] = "O1"
|
||||
amp_level = "O1"
|
||||
self.model, self.optimizer = paddle.amp.decorate(
|
||||
models=self.model,
|
||||
optimizers=self.optimizer,
|
||||
level=amp_level,
|
||||
save_dtype='float32')
|
||||
|
||||
# for distributed
|
||||
self.config["Global"][
|
||||
"distributed"] = paddle.distributed.get_world_size() != 1
|
||||
world_size = dist.get_world_size()
|
||||
self.config["Global"]["distributed"] = world_size != 1
|
||||
if world_size != 4 and self.mode == "train":
|
||||
msg = f"The training strategy in config files provided by PaddleClas is based on 4 gpus. But the number of gpus is {world_size} in current training. Please modify the stategy (learning rate, batch size and so on) if use config files in PaddleClas to train."
|
||||
logger.warning(msg)
|
||||
if self.config["Global"]["distributed"]:
|
||||
dist.init_parallel_env()
|
||||
if self.config["Global"]["distributed"]:
|
||||
self.model = paddle.DataParallel(self.model)
|
||||
|
||||
# build postprocess for infer
|
||||
@ -336,8 +356,8 @@ class Engine(object):
|
||||
@paddle.no_grad()
|
||||
def infer(self):
|
||||
assert self.mode == "infer" and self.eval_mode == "classification"
|
||||
total_trainer = paddle.distributed.get_world_size()
|
||||
local_rank = paddle.distributed.get_rank()
|
||||
total_trainer = dist.get_world_size()
|
||||
local_rank = dist.get_rank()
|
||||
image_list = get_image_list(self.config["Infer"]["infer_imgs"])
|
||||
# data split
|
||||
image_list = image_list[local_rank::total_trainer]
|
||||
|
@ -56,13 +56,15 @@ def classification_eval(engine, epoch_id=0):
|
||||
batch[0] = paddle.to_tensor(batch[0]).astype("float32")
|
||||
if not engine.config["Global"].get("use_multilabel", False):
|
||||
batch[1] = batch[1].reshape([-1, 1]).astype("int64")
|
||||
|
||||
|
||||
# image input
|
||||
if engine.amp:
|
||||
amp_level = 'O1'
|
||||
if engine.config['AMP']['use_pure_fp16'] is True:
|
||||
amp_level = 'O2'
|
||||
with paddle.amp.auto_cast(custom_black_list={"flatten_contiguous_range", "greater_than"}, level=amp_level):
|
||||
amp_level = engine.config['AMP'].get("level", "O1").upper()
|
||||
with paddle.amp.auto_cast(
|
||||
custom_black_list={
|
||||
"flatten_contiguous_range", "greater_than"
|
||||
},
|
||||
level=amp_level):
|
||||
out = engine.model(batch[0])
|
||||
# calc loss
|
||||
if engine.eval_loss_func is not None:
|
||||
@ -70,7 +72,8 @@ def classification_eval(engine, epoch_id=0):
|
||||
for key in loss_dict:
|
||||
if key not in output_info:
|
||||
output_info[key] = AverageMeter(key, '7.5f')
|
||||
output_info[key].update(loss_dict[key].numpy()[0], batch_size)
|
||||
output_info[key].update(loss_dict[key].numpy()[0],
|
||||
batch_size)
|
||||
else:
|
||||
out = engine.model(batch[0])
|
||||
# calc loss
|
||||
@ -79,7 +82,8 @@ def classification_eval(engine, epoch_id=0):
|
||||
for key in loss_dict:
|
||||
if key not in output_info:
|
||||
output_info[key] = AverageMeter(key, '7.5f')
|
||||
output_info[key].update(loss_dict[key].numpy()[0], batch_size)
|
||||
output_info[key].update(loss_dict[key].numpy()[0],
|
||||
batch_size)
|
||||
|
||||
# just for DistributedBatchSampler issue: repeat sampling
|
||||
current_samples = batch_size * paddle.distributed.get_world_size()
|
||||
|
@ -42,10 +42,12 @@ def train_epoch(engine, epoch_id, print_batch_step):
|
||||
|
||||
# image input
|
||||
if engine.amp:
|
||||
amp_level = 'O1'
|
||||
if engine.config['AMP']['use_pure_fp16'] is True:
|
||||
amp_level = 'O2'
|
||||
with paddle.amp.auto_cast(custom_black_list={"flatten_contiguous_range", "greater_than"}, level=amp_level):
|
||||
amp_level = engine.config['AMP'].get("level", "O1").upper()
|
||||
with paddle.amp.auto_cast(
|
||||
custom_black_list={
|
||||
"flatten_contiguous_range", "greater_than"
|
||||
},
|
||||
level=amp_level):
|
||||
out = forward(engine, batch)
|
||||
loss_dict = engine.train_loss_func(out, batch[1])
|
||||
else:
|
||||
|
@ -158,7 +158,7 @@ def create_strategy(config):
|
||||
exec_strategy.num_threads = 1
|
||||
exec_strategy.num_iteration_per_drop_scope = (
|
||||
10000
|
||||
if 'AMP' in config and config.AMP.get("use_pure_fp16", False) else 10)
|
||||
if 'AMP' in config and config.AMP.get("level", "O1") == "O2" else 10)
|
||||
|
||||
fuse_op = True if 'AMP' in config else False
|
||||
|
||||
@ -206,7 +206,7 @@ def mixed_precision_optimizer(config, optimizer):
|
||||
scale_loss = amp_cfg.get('scale_loss', 1.0)
|
||||
use_dynamic_loss_scaling = amp_cfg.get('use_dynamic_loss_scaling',
|
||||
False)
|
||||
use_pure_fp16 = amp_cfg.get('use_pure_fp16', False)
|
||||
use_pure_fp16 = amp_cfg.get("level", "O1") == "O2"
|
||||
optimizer = paddle.static.amp.decorate(
|
||||
optimizer,
|
||||
init_loss_scaling=scale_loss,
|
||||
|
@ -1,11 +1,8 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
export CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
|
||||
export FLAGS_fraction_of_gpu_memory_to_use=0.80
|
||||
export CUDA_VISIBLE_DEVICES="0,1,2,3"
|
||||
|
||||
python3.7 -m paddle.distributed.launch \
|
||||
--gpus="0,1,2,3,4,5,6,7" \
|
||||
--gpus="0,1,2,3" \
|
||||
ppcls/static/train.py \
|
||||
-c ./ppcls/configs/ImageNet/ResNet/ResNet50_fp16.yaml \
|
||||
-o Global.use_dali=True
|
||||
|
||||
-c ./ppcls/configs/ImageNet/ResNet/ResNet50_amp_O1.yaml
|
||||
|
@ -158,7 +158,7 @@ def main(args):
|
||||
# load pretrained models or checkpoints
|
||||
init_model(global_config, train_prog, exe)
|
||||
|
||||
if 'AMP' in config and config.AMP.get("use_pure_fp16", False):
|
||||
if 'AMP' in config and config.AMP.get("level", "O1") == "O2":
|
||||
optimizer.amp_init(
|
||||
device,
|
||||
scope=paddle.static.global_scope(),
|
||||
|
@ -0,0 +1,22 @@
|
||||
===========================train_params===========================
|
||||
model_name:ResNet50_vd
|
||||
python:python3.7
|
||||
gpu_list:0|0,1
|
||||
-o Global.device:gpu
|
||||
-o Global.auto_cast:null|amp
|
||||
-o Global.epochs:lite_train_lite_infer=2|whole_train_whole_infer=120
|
||||
-o Global.output_dir:./output/
|
||||
-o DataLoader.Train.sampler.batch_size:8
|
||||
-o Global.pretrained_model:null
|
||||
train_model_name:latest
|
||||
train_infer_img_dir:./dataset/ILSVRC2012/val
|
||||
null:null
|
||||
##
|
||||
trainer:norm_train
|
||||
norm_train:tools/train.py -c ppcls/configs/ImageNet/ResNet/ResNet50_vd.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False -o Global.use_dali=True
|
||||
pact_train:null
|
||||
fpgm_train:null
|
||||
distill_train:null
|
||||
null:null
|
||||
null:null
|
||||
##
|
@ -108,6 +108,11 @@ if [[ $FILENAME == *GeneralRecognition* ]];then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [[ $FILENAME == *use_dali* ]];then
|
||||
python_name=$(func_parser_value "${lines[2]}")
|
||||
${python_name} -m pip install --extra-index-url https://developer.download.nvidia.com/compute/redist/nightly --upgrade nvidia-dali-nightly-cuda102
|
||||
fi
|
||||
|
||||
if [ ${MODE} = "lite_train_lite_infer" ] || [ ${MODE} = "lite_train_whole_infer" ];then
|
||||
# pretrain lite train data
|
||||
cd dataset
|
||||
|
Loading…
x
Reference in New Issue
Block a user