From c46189bad04be7a971013085e2e1caa90cfadc9d Mon Sep 17 00:00:00 2001 From: gaotingquan Date: Tue, 12 Apr 2022 06:56:44 +0000 Subject: [PATCH 01/14] fix: fix bug about calc loss in dist --- ppcls/engine/evaluation/classification.py | 100 +++++++++++----------- 1 file changed, 51 insertions(+), 49 deletions(-) diff --git a/ppcls/engine/evaluation/classification.py b/ppcls/engine/evaluation/classification.py index e9836fcbb..446226fbf 100644 --- a/ppcls/engine/evaluation/classification.py +++ b/ppcls/engine/evaluation/classification.py @@ -66,68 +66,70 @@ def classification_eval(engine, epoch_id=0): }, level=amp_level): out = engine.model(batch[0]) - # calc loss - if engine.eval_loss_func is not None: - loss_dict = engine.eval_loss_func(out, batch[1]) - for key in loss_dict: - if key not in output_info: - output_info[key] = AverageMeter(key, '7.5f') - output_info[key].update(loss_dict[key].numpy()[0], - batch_size) else: out = engine.model(batch[0]) - # calc loss - if engine.eval_loss_func is not None: - loss_dict = engine.eval_loss_func(out, batch[1]) - for key in loss_dict: - if key not in output_info: - output_info[key] = AverageMeter(key, '7.5f') - output_info[key].update(loss_dict[key].numpy()[0], - batch_size) # just for DistributedBatchSampler issue: repeat sampling current_samples = batch_size * paddle.distributed.get_world_size() accum_samples += current_samples - # calc metric - if engine.eval_metric_func is not None: - if paddle.distributed.get_world_size() > 1: - label_list = [] - paddle.distributed.all_gather(label_list, batch[1]) - labels = paddle.concat(label_list, 0) + # gather Tensor when distributed + if paddle.distributed.get_world_size() > 1: + label_list = [] + paddle.distributed.all_gather(label_list, batch[1]) + labels = paddle.concat(label_list, 0) - if isinstance(out, dict): - if "Student" in out: - out = out["Student"] - if isinstance(out, dict): - out = out["logits"] - elif "logits" in out: + if isinstance(out, dict): + if "Student" in out: + out = out["Student"] + if isinstance(out, dict): out = out["logits"] - else: - msg = "Error: Wrong key in out!" - raise Exception(msg) - if isinstance(out, list): - pred = [] - for x in out: - pred_list = [] - paddle.distributed.all_gather(pred_list, x) - pred_x = paddle.concat(pred_list, 0) - pred.append(pred_x) + elif "logits" in out: + out = out["logits"] else: + msg = "Error: Wrong key in out!" + raise Exception(msg) + if isinstance(out, list): + preds = [] + for x in out: pred_list = [] - paddle.distributed.all_gather(pred_list, out) - pred = paddle.concat(pred_list, 0) - - if accum_samples > total_samples and not engine.use_dali: - pred = pred[:total_samples + current_samples - - accum_samples] - labels = labels[:total_samples + current_samples - - accum_samples] - current_samples = total_samples + current_samples - accum_samples - metric_dict = engine.eval_metric_func(pred, labels) + paddle.distributed.all_gather(pred_list, x) + pred_x = paddle.concat(pred_list, 0) + preds.append(pred_x) else: - metric_dict = engine.eval_metric_func(out, batch[1]) + pred_list = [] + paddle.distributed.all_gather(pred_list, out) + preds = paddle.concat(pred_list, 0) + if accum_samples > total_samples and not engine.use_dali: + preds = preds[:total_samples + current_samples - accum_samples] + labels = labels[:total_samples + current_samples - + accum_samples] + current_samples = total_samples + current_samples - accum_samples + else: + labels = batch[1] + preds = out + + # calc loss + if engine.eval_loss_func is not None: + if engine.amp and engine.config["AMP"].get("use_fp16_test", False): + amp_level = engine.config['AMP'].get("level", "O1").upper() + with paddle.amp.auto_cast( + custom_black_list={ + "flatten_contiguous_range", "greater_than" + }, + level=amp_level): + loss_dict = engine.eval_loss_func(preds, labels) + else: + loss_dict = engine.eval_loss_func(preds, labels) + + for key in loss_dict: + if key not in output_info: + output_info[key] = AverageMeter(key, '7.5f') + output_info[key].update(loss_dict[key].numpy()[0], batch_size) + # calc metric + if engine.eval_metric_func is not None: + metric_dict = engine.eval_metric_func(preds, labels) for key in metric_dict: if metric_key is None: metric_key = key From 474c918b279bcbe42d9c994e3cf90b20b9c23c9f Mon Sep 17 00:00:00 2001 From: gaotingquan Date: Wed, 13 Apr 2022 09:19:30 +0000 Subject: [PATCH 02/14] fix: fix bug of batch_size statistics error --- ppcls/engine/evaluation/classification.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ppcls/engine/evaluation/classification.py b/ppcls/engine/evaluation/classification.py index 446226fbf..d7c697460 100644 --- a/ppcls/engine/evaluation/classification.py +++ b/ppcls/engine/evaluation/classification.py @@ -126,7 +126,8 @@ def classification_eval(engine, epoch_id=0): for key in loss_dict: if key not in output_info: output_info[key] = AverageMeter(key, '7.5f') - output_info[key].update(loss_dict[key].numpy()[0], batch_size) + output_info[key].update(loss_dict[key].numpy()[0], + current_samples) # calc metric if engine.eval_metric_func is not None: metric_dict = engine.eval_metric_func(preds, labels) From 13d5e5905185dc2d42d890782823a0c91e691d23 Mon Sep 17 00:00:00 2001 From: gaotingquan Date: Thu, 14 Apr 2022 07:36:39 +0000 Subject: [PATCH 03/14] fix: convert bn to sync_bn the running_mean and running_var of bn would not be synchronized in dist, so which leads to bug that eval loss in training is inconsistent with eval only. --- ppcls/engine/engine.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ppcls/engine/engine.py b/ppcls/engine/engine.py index 7ab29d8d2..bc3a2a160 100644 --- a/ppcls/engine/engine.py +++ b/ppcls/engine/engine.py @@ -242,6 +242,11 @@ class Engine(object): level=amp_level, save_dtype='float32') + # TODO(gaotingquan): convert_sync_batchnorm is not effective + # eval loss in training is inconsistent with the eval only if bn is used, + # because the running_mean and running_var of bn are not synced in dist. + self.model = nn.SyncBatchNorm.convert_sync_batchnorm(self.model) + # for distributed world_size = dist.get_world_size() self.config["Global"]["distributed"] = world_size != 1 From a35cdd2aecca8803691a178e4f71e240b91ccd27 Mon Sep 17 00:00:00 2001 From: gaotingquan Date: Thu, 14 Apr 2022 08:19:39 +0000 Subject: [PATCH 04/14] uncommit: sync bn is too slow to use and convert_sync_batchnorm() is not effective for BatchNorm --- ppcls/engine/engine.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/ppcls/engine/engine.py b/ppcls/engine/engine.py index bc3a2a160..7ab29d8d2 100644 --- a/ppcls/engine/engine.py +++ b/ppcls/engine/engine.py @@ -242,11 +242,6 @@ class Engine(object): level=amp_level, save_dtype='float32') - # TODO(gaotingquan): convert_sync_batchnorm is not effective - # eval loss in training is inconsistent with the eval only if bn is used, - # because the running_mean and running_var of bn are not synced in dist. - self.model = nn.SyncBatchNorm.convert_sync_batchnorm(self.model) - # for distributed world_size = dist.get_world_size() self.config["Global"]["distributed"] = world_size != 1 From cf33390713f9a56548f50f6e1c90fb191a85a13a Mon Sep 17 00:00:00 2001 From: lubin10 Date: Fri, 22 Apr 2022 09:08:58 +0000 Subject: [PATCH 05/14] update Readme.md: models v1.0->v1.1 --- deploy/lite_shitu/README.md | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/deploy/lite_shitu/README.md b/deploy/lite_shitu/README.md index b0ec6f6b3..52871c3c1 100644 --- a/deploy/lite_shitu/README.md +++ b/deploy/lite_shitu/README.md @@ -92,9 +92,9 @@ PaddleClas 提供了转换并优化后的推理模型,可以直接参考下方 ```shell # 进入lite_ppshitu目录 cd $PaddleClas/deploy/lite_shitu -wget https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/lite/ppshitu_lite_models_v1.0.tar -tar -xf ppshitu_lite_models_v1.0.tar -rm -f ppshitu_lite_models_v1.0.tar +wget https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/lite/ppshitu_lite_models_v1.1.tar +tar -xf ppshitu_lite_models_v1.1.tar +rm -f ppshitu_lite_models_v1.1.tar ``` #### 2.1.2 使用其他模型 @@ -173,15 +173,11 @@ cp $code_path/PaddleDetection/inference/picodet_lcnet_x2_5_640_mainbody/mainbody 2. 转换识别模型 -请先参考 [识别模型转分类模型](../../docs/zh_CN/advanced_tutorials/gallery2fc.md) 完成识别模型到分类模型的转换。 -在得到 inference 推理模型(后缀名为 `.pdmodel`、`.pdiparams`)以及 `label.txt` 后,再使用 PaddleLite opt 工具完成模型优化,命令如下: - ```shell # 转换为Paddle-Lite模型 paddle_lite_opt --model_file=inference/inference.pdmodel --param_file=inference/inference.pdiparams --optimize_out=inference/rec -# 将模型、label文件拷贝到lite_shitu下 +# 将模型文件拷贝到lite_shitu下 cp inference/rec.nb deploy/lite_shitu/models/ -cp inference/label.txt deploy/lite_shitu/models/ cd deploy/lite_shitu ``` @@ -191,10 +187,10 @@ cd deploy/lite_shitu ```shell # 如果测试单张图像 -python generate_json_config.py --det_model_path ppshitu_lite_models_v1.0/mainbody_PPLCNet_x2_5_640_quant_v1.0_lite.nb --rec_model_path ppshitu_lite_models_v1.0/general_PPLCNet_x2_5_lite_v1.0_infer.nb --img_path images/demo.jpg +python generate_json_config.py --det_model_path ppshitu_lite_models_v1.1/mainbody_PPLCNet_x2_5_640_quant_v1.1_lite.nb --rec_model_path ppshitu_lite_models_v1.1/general_PPLCNet_x2_5_lite_v1.1_infer.nb --img_path images/demo.jpg # or # 如果测试多张图像 -python generate_json_config.py --det_model_path ppshitu_lite_models_v1.0/mainbody_PPLCNet_x2_5_640_quant_v1.0_lite.nb --rec_model_path ppshitu_lite_models_v1.0/general_PPLCNet_x2_5_lite_v1.0_infer.nb --img_dir images +python generate_json_config.py --det_model_path ppshitu_lite_models_v1.1/mainbody_PPLCNet_x2_5_640_quant_v1.1_lite.nb --rec_model_path ppshitu_lite_models_v1.1/general_PPLCNet_x2_5_lite_v1.1_infer.nb --img_dir images # 执行完成后,会在lit_shitu下生成shitu_config.json配置文件 ``` @@ -263,7 +259,7 @@ make ARM_ABI=arm8 ```shell mkdir deploy -mv ppshitu_lite_models_v1.0 deploy/ +mv ppshitu_lite_models_v1.1 deploy/ mv drink_dataset_v1.0 deploy/ mv images deploy/ mv shitu_config.json deploy/ @@ -277,12 +273,12 @@ cp ../../../cxx/lib/libpaddle_light_api_shared.so deploy/ ```shell deploy/ -|-- ppshitu_lite_models_v1.0/ -| |--mainbody_PPLCNet_x2_5_lite_v1.0_infer.nb 优化后的主体检测模型文件 -| |--general_PPLCNet_x2_5_quant_v1.0_lite.nb 优化后的识别模型文件 +|-- ppshitu_lite_models_v1.1/ +| |--mainbody_PPLCNet_x2_5_640_quant_v1.1_lite.nb 优化后的主体检测模型文件 +| |--general_PPLCNet_x2_5_lite_v1.1_infer.nb 优化后的识别模型文件 |-- images/ | |--demo.jpg 图片文件 -|-- drink_dataset_v1.0/ 瓶装饮料demo数据 +|-- drink_dataset_v1.0/ 瓶装饮料demo数据 | |--index 检索index目录 |-- pp_shitu 生成的移动端执行文件 |-- shitu_config.json 执行时参数配置文件 From b5a7eea504fea05bbab7d513526bab70de42d968 Mon Sep 17 00:00:00 2001 From: gaotingquan Date: Fri, 22 Apr 2022 07:19:19 +0000 Subject: [PATCH 06/14] fix: fix the bug that DistributedBatchSampler may sample repeatedly --- ppcls/engine/evaluation/retrieval.py | 51 ++++++++++++++++------------ 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/ppcls/engine/evaluation/retrieval.py b/ppcls/engine/evaluation/retrieval.py index 8471a42c7..3dfe6337c 100644 --- a/ppcls/engine/evaluation/retrieval.py +++ b/ppcls/engine/evaluation/retrieval.py @@ -89,9 +89,6 @@ def retrieval_eval(engine, epoch_id=0): def cal_feature(engine, name='gallery'): - all_feas = None - all_image_id = None - all_unique_id = None has_unique_id = False if name == 'gallery': @@ -103,6 +100,9 @@ def cal_feature(engine, name='gallery'): else: raise RuntimeError("Only support gallery or query dataset") + batch_feas_list = [] + img_id_list = [] + unique_id_list = [] max_iter = len(dataloader) - 1 if platform.system() == "Windows" else len( dataloader) for idx, batch in enumerate(dataloader): # load is very time-consuming @@ -140,32 +140,39 @@ def cal_feature(engine, name='gallery'): if engine.config["Global"].get("feature_binarize") == "sign": batch_feas = paddle.sign(batch_feas).astype("float32") - if all_feas is None: - all_feas = batch_feas + if paddle.distributed.get_world_size() > 1: + batch_feas_gather = [] + img_id_gather = [] + unique_id_gather = [] + paddle.distributed.all_gather(batch_feas_gather, batch_feas) + paddle.distributed.all_gather(img_id_gather, batch[1]) + batch_feas_list.append(paddle.concat(batch_feas_gather)) + img_id_list.append(paddle.concat(img_id_gather)) if has_unique_id: - all_unique_id = batch[2] - all_image_id = batch[1] + paddle.distributed.all_gather(unique_id_gather, batch[2]) + unique_id_list.append(paddle.concat(unique_id_gather)) else: - all_feas = paddle.concat([all_feas, batch_feas]) - all_image_id = paddle.concat([all_image_id, batch[1]]) + batch_feas_list.append(batch_feas) + img_id_list.append(batch[1]) if has_unique_id: - all_unique_id = paddle.concat([all_unique_id, batch[2]]) + unique_id_list.append(batch[2]) if engine.use_dali: dataloader.reset() - if paddle.distributed.get_world_size() > 1: - feat_list = [] - img_id_list = [] - unique_id_list = [] - paddle.distributed.all_gather(feat_list, all_feas) - paddle.distributed.all_gather(img_id_list, all_image_id) - all_feas = paddle.concat(feat_list, axis=0) - all_image_id = paddle.concat(img_id_list, axis=0) - if has_unique_id: - paddle.distributed.all_gather(unique_id_list, all_unique_id) - all_unique_id = paddle.concat(unique_id_list, axis=0) + all_feas = paddle.concat(batch_feas_list) + all_img_id = paddle.concat(img_id_list) + if has_unique_id: + all_unique_id = paddle.concat(unique_id_list) + + # just for DistributedBatchSampler issue: repeat sampling + total_samples = len( + dataloader.dataset) if not engine.use_dali else dataloader.size + all_feas = all_feas[:total_samples] + all_img_id = all_img_id[:total_samples] + if has_unique_id: + all_unique_id = all_unique_id[:total_samples] logger.info("Build {} done, all feat shape: {}, begin to eval..".format( name, all_feas.shape)) - return all_feas, all_image_id, all_unique_id + return all_feas, all_img_id, all_unique_id From b86ddf470eeb7579115feaff4ad6a0fcfa50b688 Mon Sep 17 00:00:00 2001 From: gaotingquan Date: Thu, 21 Apr 2022 06:59:16 +0000 Subject: [PATCH 07/14] fix: when eval only, AMP can be enabled --- ppcls/engine/engine.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ppcls/engine/engine.py b/ppcls/engine/engine.py index 7a7bbde6b..b36aeb70c 100644 --- a/ppcls/engine/engine.py +++ b/ppcls/engine/engine.py @@ -98,8 +98,8 @@ class Engine(object): logger.info('train with paddle {} and device {}'.format( paddle.__version__, self.device)) - # AMP training - self.amp = True if "AMP" in self.config and self.mode == "train" else False + # AMP training and evaluating + self.amp = "AMP" in self.config if self.amp and self.config["AMP"] is not None: self.scale_loss = self.config["AMP"].get("scale_loss", 1.0) self.use_dynamic_loss_scaling = self.config["AMP"].get( From afe53ab30ca3bec7470b611563c0ee8d9d638277 Mon Sep 17 00:00:00 2001 From: cuicheng01 Date: Mon, 25 Apr 2022 05:57:19 +0000 Subject: [PATCH 08/14] fix typo in README --- README_ch.md | 2 +- README_en.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README_ch.md b/README_ch.md index b3a0011e5..9219857fd 100644 --- a/README_ch.md +++ b/README_ch.md @@ -7,7 +7,7 @@ 飞桨图像识别套件PaddleClas是飞桨为工业界和学术界所准备的一个图像识别任务的工具集,助力使用者训练出更好的视觉模型和应用落地。 **近期更新** -- 2022.4.21 新增 CVPR2022 oral论文 [MixFormmer](https://arxiv.org/pdf/2204.02557.pdf) 相关[代码](https://github.com/PaddlePaddle/PaddleClas/pull/1820/files)。 +- 2022.4.21 新增 CVPR2022 oral论文 [MixFormer](https://arxiv.org/pdf/2204.02557.pdf) 相关[代码](https://github.com/PaddlePaddle/PaddleClas/pull/1820/files)。 - 2022.1.27 全面升级文档;新增[PaddleServing C++ pipeline部署方式](./deploy/paddleserving)和[18M图像识别安卓部署Demo](./deploy/lite_shitu)。 - 2021.11.1 发布[PP-ShiTu技术报告](https://arxiv.org/pdf/2111.00775.pdf),新增饮料识别demo - 2021.10.23 发布轻量级图像识别系统PP-ShiTu,CPU上0.2s即可完成在10w+库的图像识别。 diff --git a/README_en.md b/README_en.md index 7ea40c63d..9b0d7c85d 100644 --- a/README_en.md +++ b/README_en.md @@ -8,7 +8,7 @@ PaddleClas is an image recognition toolset for industry and academia, helping us **Recent updates** -- 2022.4.21 Added the related [code](https://github.com/PaddlePaddle/PaddleClas/pull/1820/files) of the CVPR2022 oral paper [MixFormmer](https://arxiv.org/pdf/2204.02557.pdf). +- 2022.4.21 Added the related [code](https://github.com/PaddlePaddle/PaddleClas/pull/1820/files) of the CVPR2022 oral paper [MixFormer](https://arxiv.org/pdf/2204.02557.pdf). - 2021.09.17 Add PP-LCNet series model developed by PaddleClas, these models show strong competitiveness on Intel CPUs. For the introduction of PP-LCNet, please refer to [paper](https://arxiv.org/pdf/2109.15099.pdf) or [PP-LCNet model introduction](docs/en/models/PP-LCNet_en.md). The metrics and pretrained model are available [here](docs/en/ImageNet_models_en.md). From fea9522a69958a5cb60e5b85beeb4c8caf979ca8 Mon Sep 17 00:00:00 2001 From: gaotingquan Date: Tue, 26 Apr 2022 03:26:37 +0000 Subject: [PATCH 09/14] fix: dbg --- ppcls/engine/evaluation/retrieval.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ppcls/engine/evaluation/retrieval.py b/ppcls/engine/evaluation/retrieval.py index 3dfe6337c..b481efae1 100644 --- a/ppcls/engine/evaluation/retrieval.py +++ b/ppcls/engine/evaluation/retrieval.py @@ -90,6 +90,7 @@ def retrieval_eval(engine, epoch_id=0): def cal_feature(engine, name='gallery'): has_unique_id = False + all_unique_id = None if name == 'gallery': dataloader = engine.gallery_dataloader From 645a125c4075ede496823b26d20d0fd25bfc5287 Mon Sep 17 00:00:00 2001 From: HydrogenSulfate <490868991@qq.com> Date: Fri, 29 Apr 2022 13:34:56 +0800 Subject: [PATCH 10/14] refine paper and code ref, and remove trailing spaces --- ppcls/loss/deephashloss.py | 3 +++ ppcls/loss/googlenetloss.py | 6 ++++-- ppcls/loss/msmloss.py | 10 ++++++---- ppcls/loss/npairsloss.py | 5 +++++ ppcls/loss/pairwisecosface.py | 23 ++++++++++++++++------- ppcls/loss/rkdloss.py | 2 ++ ppcls/loss/supconloss.py | 1 + ppcls/loss/trihardloss.py | 8 +++++--- ppcls/loss/triplet.py | 16 ++++++++++++++++ 9 files changed, 58 insertions(+), 16 deletions(-) diff --git a/ppcls/loss/deephashloss.py b/ppcls/loss/deephashloss.py index 959fd11ad..7dda519a8 100644 --- a/ppcls/loss/deephashloss.py +++ b/ppcls/loss/deephashloss.py @@ -20,6 +20,7 @@ class DSHSDLoss(nn.Layer): """ # DSHSD(IEEE ACCESS 2019) # paper [Deep Supervised Hashing Based on Stable Distribution](https://ieeexplore.ieee.org/document/8648432/) + # code reference: https://github.com/swuxyj/DeepHash-pytorch/blob/master/DSHSD.py """ def __init__(self, alpha, multi_label=False): @@ -62,6 +63,7 @@ class DSHSDLoss(nn.Layer): class LCDSHLoss(nn.Layer): """ # paper [Locality-Constrained Deep Supervised Hashing for Image Retrieval](https://www.ijcai.org/Proceedings/2017/0499.pdf) + # code reference: https://github.com/swuxyj/DeepHash-pytorch/blob/master/LCDSH.py """ def __init__(self, n_class, _lambda): @@ -100,6 +102,7 @@ class DCHLoss(paddle.nn.Layer): """ # paper [Deep Cauchy Hashing for Hamming Space Retrieval] URL:(http://ise.thss.tsinghua.edu.cn/~mlong/doc/deep-cauchy-hashing-cvpr18.pdf) + # code reference: https://github.com/swuxyj/DeepHash-pytorch/blob/master/DCH.py """ def __init__(self, gamma, _lambda, n_class): diff --git a/ppcls/loss/googlenetloss.py b/ppcls/loss/googlenetloss.py index c580aa617..491311831 100644 --- a/ppcls/loss/googlenetloss.py +++ b/ppcls/loss/googlenetloss.py @@ -18,11 +18,13 @@ import paddle.nn.functional as F class GoogLeNetLoss(nn.Layer): """ Cross entropy loss used after googlenet + reference paper: [https://arxiv.org/pdf/1409.4842v1.pdf](Going Deeper with Convolutions) """ + def __init__(self, epsilon=None): super().__init__() - assert (epsilon is None or epsilon <= 0 or epsilon >= 1), "googlenet is not support label_smooth" - + assert (epsilon is None or epsilon <= 0 or + epsilon >= 1), "googlenet is not support label_smooth" def forward(self, inputs, label): input0, input1, input2 = inputs diff --git a/ppcls/loss/msmloss.py b/ppcls/loss/msmloss.py index 3aa0dd8bf..adf03ef8e 100644 --- a/ppcls/loss/msmloss.py +++ b/ppcls/loss/msmloss.py @@ -21,10 +21,12 @@ from .comfunc import rerange_index class MSMLoss(paddle.nn.Layer): """ - MSMLoss Loss, based on triplet loss. USE P * K samples. + paper : [Margin Sample Mining Loss: A Deep Learning Based Method for Person Re-identification](https://arxiv.org/pdf/1710.00478.pdf) + code reference: https://github.com/michuanhaohao/keras_reid/blob/master/reid_tripletcls.py + Margin Sample Mining Loss, based on triplet loss. USE P * K samples. the batch size is fixed. Batch_size = P * K; but the K may vary between batches. same label gather together - + supported_metrics = [ 'euclidean', 'sqeuclidean', @@ -41,7 +43,7 @@ class MSMLoss(paddle.nn.Layer): self.rerange_index = rerange_index(batch_size, samples_each_class) def forward(self, input, target=None): - #normalization + #normalization features = input["features"] features = self._nomalize(features) samples_each_class = self.samples_each_class @@ -53,7 +55,7 @@ class MSMLoss(paddle.nn.Layer): features, axis=0) similary_matrix = paddle.sum(paddle.square(diffs), axis=-1) - #rerange + #rerange tmp = paddle.reshape(similary_matrix, shape=[-1, 1]) tmp = paddle.gather(tmp, index=rerange_index) similary_matrix = paddle.reshape(tmp, shape=[-1, self.batch_size]) diff --git a/ppcls/loss/npairsloss.py b/ppcls/loss/npairsloss.py index d4b359e88..131c799a4 100644 --- a/ppcls/loss/npairsloss.py +++ b/ppcls/loss/npairsloss.py @@ -5,6 +5,11 @@ import paddle class NpairsLoss(paddle.nn.Layer): + """Npair_loss_ + paper [Improved deep metric learning with multi-class N-pair loss objective](https://dl.acm.org/doi/10.5555/3157096.3157304) + code reference: https://www.tensorflow.org/versions/r1.15/api_docs/python/tf/contrib/losses/metric_learning/npairs_loss + """ + def __init__(self, reg_lambda=0.01): super(NpairsLoss, self).__init__() self.reg_lambda = reg_lambda diff --git a/ppcls/loss/pairwisecosface.py b/ppcls/loss/pairwisecosface.py index beb806863..7f146dea5 100644 --- a/ppcls/loss/pairwisecosface.py +++ b/ppcls/loss/pairwisecosface.py @@ -23,6 +23,11 @@ import paddle.nn.functional as F class PairwiseCosface(nn.Layer): + """ + paper: Circle Loss: A Unified Perspective of Pair Similarity Optimization + code reference: https://github.com/leoluopy/circle-loss-demonstration/blob/main/circle_loss.py + """ + def __init__(self, margin, gamma): super(PairwiseCosface, self).__init__() self.margin = margin @@ -36,8 +41,10 @@ class PairwiseCosface(nn.Layer): dist_mat = paddle.matmul(embedding, embedding, transpose_y=True) N = dist_mat.shape[0] - is_pos = targets.reshape([N,1]).expand([N,N]).equal(paddle.t(targets.reshape([N,1]).expand([N,N]))).astype('float') - is_neg = targets.reshape([N,1]).expand([N,N]).not_equal(paddle.t(targets.reshape([N,1]).expand([N,N]))).astype('float') + is_pos = targets.reshape([N, 1]).expand([N, N]).equal( + paddle.t(targets.reshape([N, 1]).expand([N, N]))).astype('float') + is_neg = targets.reshape([N, 1]).expand([N, N]).not_equal( + paddle.t(targets.reshape([N, 1]).expand([N, N]))).astype('float') # Mask scores related to itself is_pos = is_pos - paddle.eye(N, N) @@ -46,10 +53,12 @@ class PairwiseCosface(nn.Layer): s_n = dist_mat * is_neg logit_p = -self.gamma * s_p + (-99999999.) * (1 - is_pos) - logit_n = self.gamma * (s_n + self.margin) + (-99999999.) * (1 - is_neg) + logit_n = self.gamma * (s_n + self.margin) + (-99999999.) * (1 - is_neg + ) + + loss = F.softplus( + paddle.logsumexp( + logit_p, axis=1) + paddle.logsumexp( + logit_n, axis=1)).mean() - loss = F.softplus(paddle.logsumexp(logit_p, axis=1) + paddle.logsumexp(logit_n, axis=1)).mean() - return {"PairwiseCosface": loss} - - diff --git a/ppcls/loss/rkdloss.py b/ppcls/loss/rkdloss.py index e6ffea273..aa6ae2324 100644 --- a/ppcls/loss/rkdloss.py +++ b/ppcls/loss/rkdloss.py @@ -29,6 +29,7 @@ def pdist(e, squared=False, eps=1e-12): class RKdAngle(nn.Layer): + # paper : [Relational Knowledge Distillation](https://arxiv.org/abs/1904.05068?context=cs.LG) # reference: https://github.com/lenscloth/RKD/blob/master/metric/loss.py def __init__(self, target_size=None): super().__init__() @@ -64,6 +65,7 @@ class RKdAngle(nn.Layer): class RkdDistance(nn.Layer): + # paper : [Relational Knowledge Distillation](https://arxiv.org/abs/1904.05068?context=cs.LG) # reference: https://github.com/lenscloth/RKD/blob/master/metric/loss.py def __init__(self, eps=1e-12, target_size=1): super().__init__() diff --git a/ppcls/loss/supconloss.py b/ppcls/loss/supconloss.py index 3dd33bc19..753ceaf41 100644 --- a/ppcls/loss/supconloss.py +++ b/ppcls/loss/supconloss.py @@ -4,6 +4,7 @@ from paddle import nn class SupConLoss(nn.Layer): """Supervised Contrastive Learning: https://arxiv.org/pdf/2004.11362.pdf. + code reference: https://github.com/HobbitLong/SupContrast/blob/master/losses.py It also supports the unsupervised contrastive loss in SimCLR""" def __init__(self, diff --git a/ppcls/loss/trihardloss.py b/ppcls/loss/trihardloss.py index 132c604d5..96cb42cb4 100644 --- a/ppcls/loss/trihardloss.py +++ b/ppcls/loss/trihardloss.py @@ -22,10 +22,12 @@ from .comfunc import rerange_index class TriHardLoss(paddle.nn.Layer): """ + paper: In Defense of the Triplet Loss for Person Re-Identification + code reference: https://github.com/VisualComputingInstitute/triplet-reid/blob/master/loss.py TriHard Loss, based on triplet loss. USE P * K samples. the batch size is fixed. Batch_size = P * K; but the K may vary between batches. same label gather together - + supported_metrics = [ 'euclidean', 'sqeuclidean', @@ -45,7 +47,7 @@ class TriHardLoss(paddle.nn.Layer): features = input["features"] assert (self.batch_size == features.shape[0]) - #normalization + #normalization features = self._nomalize(features) samples_each_class = self.samples_each_class rerange_index = paddle.to_tensor(self.rerange_index) @@ -56,7 +58,7 @@ class TriHardLoss(paddle.nn.Layer): features, axis=0) similary_matrix = paddle.sum(paddle.square(diffs), axis=-1) - #rerange + #rerange tmp = paddle.reshape(similary_matrix, shape=[-1, 1]) tmp = paddle.gather(tmp, index=rerange_index) similary_matrix = paddle.reshape(tmp, shape=[-1, self.batch_size]) diff --git a/ppcls/loss/triplet.py b/ppcls/loss/triplet.py index d1c7eec9e..458ee2e27 100644 --- a/ppcls/loss/triplet.py +++ b/ppcls/loss/triplet.py @@ -1,3 +1,17 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -8,6 +22,8 @@ import paddle.nn as nn class TripletLossV2(nn.Layer): """Triplet loss with hard positive/negative mining. + paper : [Facenet: A unified embedding for face recognition and clustering](https://arxiv.org/pdf/1503.03832.pdf) + code reference: https://github.com/okzhili/Cartoon-face-recognition/blob/master/loss/triplet_loss.py Args: margin (float): margin for triplet. """ From 92cdbe6fb9ae715373a12712a66d839436aed1d2 Mon Sep 17 00:00:00 2001 From: HydrogenSulfate <490868991@qq.com> Date: Fri, 29 Apr 2022 13:53:21 +0800 Subject: [PATCH 11/14] add ref for emlloss --- ppcls/loss/emlloss.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ppcls/loss/emlloss.py b/ppcls/loss/emlloss.py index 973570389..38b707fe1 100644 --- a/ppcls/loss/emlloss.py +++ b/ppcls/loss/emlloss.py @@ -23,6 +23,11 @@ from .comfunc import rerange_index class EmlLoss(paddle.nn.Layer): + """Ensemble Metric Learning Loss + paper: [Large Scale Strongly Supervised Ensemble Metric Learning, with Applications to Face Verification and Retrieval](https://arxiv.org/pdf/1212.6094.pdf) + code reference: https://github.com/PaddlePaddle/models/blob/develop/PaddleCV/metric_learning/losses/emlloss.py + """ + def __init__(self, batch_size=40, samples_each_class=2): super(EmlLoss, self).__init__() assert (batch_size % samples_each_class == 0) From 24372cc6e5deac02649bc1570314af4d047e3bcc Mon Sep 17 00:00:00 2001 From: gaotingquan Date: Thu, 28 Apr 2022 14:19:01 +0000 Subject: [PATCH 12/14] update: update the default gpu num to 8 when using AdamW --- .../CSWinTransformer/CSWinTransformer_base_224.yaml | 7 ++++--- .../CSWinTransformer/CSWinTransformer_base_384.yaml | 7 ++++--- .../CSWinTransformer_large_224.yaml | 7 ++++--- .../CSWinTransformer_large_384.yaml | 7 ++++--- .../CSWinTransformer_small_224.yaml | 7 ++++--- .../CSWinTransformer/CSWinTransformer_tiny_224.yaml | 7 ++++--- .../DeiT/DeiT_base_distilled_patch16_224.yaml | 7 ++++--- .../DeiT/DeiT_base_distilled_patch16_384.yaml | 7 ++++--- .../ImageNet/DeiT/DeiT_base_patch16_224.yaml | 7 ++++--- .../ImageNet/DeiT/DeiT_base_patch16_384.yaml | 7 ++++--- .../DeiT/DeiT_small_distilled_patch16_224.yaml | 6 +++--- .../ImageNet/DeiT/DeiT_small_patch16_224.yaml | 7 ++++--- .../DeiT/DeiT_tiny_distilled_patch16_224.yaml | 7 ++++--- .../ImageNet/DeiT/DeiT_tiny_patch16_224.yaml | 7 ++++--- ppcls/configs/ImageNet/PVTV2/PVT_V2_B0.yaml | 7 ++++--- ppcls/configs/ImageNet/PVTV2/PVT_V2_B1.yaml | 7 ++++--- ppcls/configs/ImageNet/PVTV2/PVT_V2_B2.yaml | 7 ++++--- ppcls/configs/ImageNet/PVTV2/PVT_V2_B2_Linear.yaml | 7 ++++--- ppcls/configs/ImageNet/PVTV2/PVT_V2_B3.yaml | 7 ++++--- ppcls/configs/ImageNet/PVTV2/PVT_V2_B4.yaml | 7 ++++--- ppcls/configs/ImageNet/PVTV2/PVT_V2_B5.yaml | 7 ++++--- .../SwinTransformer_base_patch4_window12_384.yaml | 7 ++++--- .../SwinTransformer_base_patch4_window7_224.yaml | 7 ++++--- .../SwinTransformer_large_patch4_window12_384.yaml | 7 ++++--- .../SwinTransformer_large_patch4_window7_224.yaml | 7 ++++--- .../SwinTransformer_small_patch4_window7_224.yaml | 7 ++++--- .../SwinTransformer_tiny_patch4_window7_224.yaml | 7 ++++--- ppcls/configs/ImageNet/Twins/alt_gvt_base.yaml | 7 ++++--- ppcls/configs/ImageNet/Twins/alt_gvt_large.yaml | 7 ++++--- ppcls/configs/ImageNet/Twins/alt_gvt_small.yaml | 7 ++++--- ppcls/configs/ImageNet/Twins/pcpvt_base.yaml | 7 ++++--- ppcls/configs/ImageNet/Twins/pcpvt_large.yaml | 7 ++++--- ppcls/configs/ImageNet/Twins/pcpvt_small.yaml | 7 ++++--- ppcls/engine/engine.py | 13 +++++++++---- 34 files changed, 140 insertions(+), 103 deletions(-) diff --git a/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_base_224.yaml b/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_base_224.yaml index 4655e02b3..a7697840e 100644 --- a/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_base_224.yaml +++ b/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_base_224.yaml @@ -42,11 +42,12 @@ Optimizer: no_weight_decay_name: pos_embed cls_token .bias norm one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 1.25e-4 - eta_min: 1.25e-6 + learning_rate: 2.5e-4 + eta_min: 2.5e-6 warmup_epoch: 20 - warmup_start_lr: 1.25e-7 + warmup_start_lr: 2.5e-7 # data loader for train and eval diff --git a/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_base_384.yaml b/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_base_384.yaml index 1e6b1f79f..a7100289c 100644 --- a/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_base_384.yaml +++ b/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_base_384.yaml @@ -42,11 +42,12 @@ Optimizer: no_weight_decay_name: pos_embed cls_token .bias norm one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 6.25e-5 - eta_min: 6.25e-7 + learning_rate: 1.25e-4 + eta_min: 1.25e-6 warmup_epoch: 20 - warmup_start_lr: 6.25e-8 + warmup_start_lr: 1.25e-7 # data loader for train and eval diff --git a/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_large_224.yaml b/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_large_224.yaml index ddeacadf0..7c96343df 100644 --- a/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_large_224.yaml +++ b/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_large_224.yaml @@ -42,11 +42,12 @@ Optimizer: no_weight_decay_name: pos_embed cls_token .bias norm one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 1.25e-4 - eta_min: 1.25e-6 + learning_rate: 2.5e-4 + eta_min: 2.5e-6 warmup_epoch: 20 - warmup_start_lr: 1.25e-7 + warmup_start_lr: 2.5e-7 # data loader for train and eval diff --git a/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_large_384.yaml b/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_large_384.yaml index ab477ef2e..4b682fec6 100644 --- a/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_large_384.yaml +++ b/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_large_384.yaml @@ -42,11 +42,12 @@ Optimizer: no_weight_decay_name: pos_embed cls_token .bias norm one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 3.125e-5 - eta_min: 3.125e-7 + learning_rate: 6.25e-5 + eta_min: 6.25e-7 warmup_epoch: 20 - warmup_start_lr: 3.125e-8 + warmup_start_lr: 6.25e-8 # data loader for train and eval diff --git a/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_small_224.yaml b/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_small_224.yaml index ec3c5a145..a191f4160 100644 --- a/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_small_224.yaml +++ b/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_small_224.yaml @@ -42,11 +42,12 @@ Optimizer: no_weight_decay_name: pos_embed cls_token .bias norm one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 2.5e-4 - eta_min: 2.5e-6 + learning_rate: 5e-4 + eta_min: 5e-6 warmup_epoch: 20 - warmup_start_lr: 2.5e-7 + warmup_start_lr: 5e-7 # data loader for train and eval diff --git a/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_tiny_224.yaml b/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_tiny_224.yaml index 3e3f92525..3a2be2837 100644 --- a/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_tiny_224.yaml +++ b/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_tiny_224.yaml @@ -42,11 +42,12 @@ Optimizer: no_weight_decay_name: pos_embed cls_token .bias norm one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 5e-4 - eta_min: 5e-6 + learning_rate: 1e-3 + eta_min: 1e-5 warmup_epoch: 20 - warmup_start_lr: 5e-7 + warmup_start_lr: 1e-6 # data loader for train and eval diff --git a/ppcls/configs/ImageNet/DeiT/DeiT_base_distilled_patch16_224.yaml b/ppcls/configs/ImageNet/DeiT/DeiT_base_distilled_patch16_224.yaml index 979a04a38..8c3cc4c34 100644 --- a/ppcls/configs/ImageNet/DeiT/DeiT_base_distilled_patch16_224.yaml +++ b/ppcls/configs/ImageNet/DeiT/DeiT_base_distilled_patch16_224.yaml @@ -40,11 +40,12 @@ Optimizer: no_weight_decay_name: norm cls_token pos_embed dist_token one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 1e-3 - eta_min: 1e-5 + learning_rate: 2e-3 + eta_min: 2e-5 warmup_epoch: 5 - warmup_start_lr: 1e-6 + warmup_start_lr: 2e-6 # data loader for train and eval DataLoader: diff --git a/ppcls/configs/ImageNet/DeiT/DeiT_base_distilled_patch16_384.yaml b/ppcls/configs/ImageNet/DeiT/DeiT_base_distilled_patch16_384.yaml index 859f57d72..0b8c2e808 100644 --- a/ppcls/configs/ImageNet/DeiT/DeiT_base_distilled_patch16_384.yaml +++ b/ppcls/configs/ImageNet/DeiT/DeiT_base_distilled_patch16_384.yaml @@ -40,11 +40,12 @@ Optimizer: no_weight_decay_name: norm cls_token pos_embed dist_token one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 1e-3 - eta_min: 1e-5 + learning_rate: 2e-3 + eta_min: 2e-5 warmup_epoch: 5 - warmup_start_lr: 1e-6 + warmup_start_lr: 2e-6 # data loader for train and eval DataLoader: diff --git a/ppcls/configs/ImageNet/DeiT/DeiT_base_patch16_224.yaml b/ppcls/configs/ImageNet/DeiT/DeiT_base_patch16_224.yaml index 3cdd10202..938916caa 100644 --- a/ppcls/configs/ImageNet/DeiT/DeiT_base_patch16_224.yaml +++ b/ppcls/configs/ImageNet/DeiT/DeiT_base_patch16_224.yaml @@ -40,11 +40,12 @@ Optimizer: no_weight_decay_name: norm cls_token pos_embed dist_token one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 1e-3 - eta_min: 1e-5 + learning_rate: 2e-3 + eta_min: 2e-5 warmup_epoch: 5 - warmup_start_lr: 1e-6 + warmup_start_lr: 2e-6 # data loader for train and eval DataLoader: diff --git a/ppcls/configs/ImageNet/DeiT/DeiT_base_patch16_384.yaml b/ppcls/configs/ImageNet/DeiT/DeiT_base_patch16_384.yaml index 88a8fbae9..4cbe6ffde 100644 --- a/ppcls/configs/ImageNet/DeiT/DeiT_base_patch16_384.yaml +++ b/ppcls/configs/ImageNet/DeiT/DeiT_base_patch16_384.yaml @@ -40,11 +40,12 @@ Optimizer: no_weight_decay_name: norm cls_token pos_embed dist_token one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 1e-3 - eta_min: 1e-5 + learning_rate: 2e-3 + eta_min: 2e-5 warmup_epoch: 5 - warmup_start_lr: 1e-6 + warmup_start_lr: 2e-6 # data loader for train and eval DataLoader: diff --git a/ppcls/configs/ImageNet/DeiT/DeiT_small_distilled_patch16_224.yaml b/ppcls/configs/ImageNet/DeiT/DeiT_small_distilled_patch16_224.yaml index 54d962e68..d5ba0cee7 100644 --- a/ppcls/configs/ImageNet/DeiT/DeiT_small_distilled_patch16_224.yaml +++ b/ppcls/configs/ImageNet/DeiT/DeiT_small_distilled_patch16_224.yaml @@ -41,10 +41,10 @@ Optimizer: one_dim_param_no_weight_decay: True lr: name: Cosine - learning_rate: 1e-3 - eta_min: 1e-5 + learning_rate: 2e-3 + eta_min: 2e-5 warmup_epoch: 5 - warmup_start_lr: 1e-6 + warmup_start_lr: 2e-6 # data loader for train and eval DataLoader: diff --git a/ppcls/configs/ImageNet/DeiT/DeiT_small_patch16_224.yaml b/ppcls/configs/ImageNet/DeiT/DeiT_small_patch16_224.yaml index 05c3ac1f3..a167c896e 100644 --- a/ppcls/configs/ImageNet/DeiT/DeiT_small_patch16_224.yaml +++ b/ppcls/configs/ImageNet/DeiT/DeiT_small_patch16_224.yaml @@ -40,11 +40,12 @@ Optimizer: no_weight_decay_name: norm cls_token pos_embed dist_token one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 1e-3 - eta_min: 1e-5 + learning_rate: 2e-3 + eta_min: 2e-5 warmup_epoch: 5 - warmup_start_lr: 1e-6 + warmup_start_lr: 2e-6 # data loader for train and eval DataLoader: diff --git a/ppcls/configs/ImageNet/DeiT/DeiT_tiny_distilled_patch16_224.yaml b/ppcls/configs/ImageNet/DeiT/DeiT_tiny_distilled_patch16_224.yaml index f66617613..319e17025 100644 --- a/ppcls/configs/ImageNet/DeiT/DeiT_tiny_distilled_patch16_224.yaml +++ b/ppcls/configs/ImageNet/DeiT/DeiT_tiny_distilled_patch16_224.yaml @@ -40,11 +40,12 @@ Optimizer: no_weight_decay_name: norm cls_token pos_embed dist_token one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 1e-3 - eta_min: 1e-5 + learning_rate: 2e-3 + eta_min: 2e-5 warmup_epoch: 5 - warmup_start_lr: 1e-6 + warmup_start_lr: 2e-6 # data loader for train and eval DataLoader: diff --git a/ppcls/configs/ImageNet/DeiT/DeiT_tiny_patch16_224.yaml b/ppcls/configs/ImageNet/DeiT/DeiT_tiny_patch16_224.yaml index 647050a77..1234d79b6 100644 --- a/ppcls/configs/ImageNet/DeiT/DeiT_tiny_patch16_224.yaml +++ b/ppcls/configs/ImageNet/DeiT/DeiT_tiny_patch16_224.yaml @@ -40,11 +40,12 @@ Optimizer: no_weight_decay_name: norm cls_token pos_embed dist_token one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 1e-3 - eta_min: 1e-5 + learning_rate: 2e-3 + eta_min: 2e-5 warmup_epoch: 5 - warmup_start_lr: 1e-6 + warmup_start_lr: 2e-6 # data loader for train and eval DataLoader: diff --git a/ppcls/configs/ImageNet/PVTV2/PVT_V2_B0.yaml b/ppcls/configs/ImageNet/PVTV2/PVT_V2_B0.yaml index 6c0854cb4..27fc20b99 100644 --- a/ppcls/configs/ImageNet/PVTV2/PVT_V2_B0.yaml +++ b/ppcls/configs/ImageNet/PVTV2/PVT_V2_B0.yaml @@ -43,11 +43,12 @@ Optimizer: no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 5e-4 - eta_min: 5e-6 + learning_rate: 1e-3 + eta_min: 1e-5 warmup_epoch: 20 - warmup_start_lr: 5e-7 + warmup_start_lr: 1e-6 # data loader for train and eval diff --git a/ppcls/configs/ImageNet/PVTV2/PVT_V2_B1.yaml b/ppcls/configs/ImageNet/PVTV2/PVT_V2_B1.yaml index 42134c740..20fa39773 100644 --- a/ppcls/configs/ImageNet/PVTV2/PVT_V2_B1.yaml +++ b/ppcls/configs/ImageNet/PVTV2/PVT_V2_B1.yaml @@ -43,11 +43,12 @@ Optimizer: no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 5e-4 - eta_min: 5e-6 + learning_rate: 1e-3 + eta_min: 1e-5 warmup_epoch: 20 - warmup_start_lr: 5e-7 + warmup_start_lr: 1e-6 # data loader for train and eval diff --git a/ppcls/configs/ImageNet/PVTV2/PVT_V2_B2.yaml b/ppcls/configs/ImageNet/PVTV2/PVT_V2_B2.yaml index 4d0d5a432..cda94496e 100644 --- a/ppcls/configs/ImageNet/PVTV2/PVT_V2_B2.yaml +++ b/ppcls/configs/ImageNet/PVTV2/PVT_V2_B2.yaml @@ -43,11 +43,12 @@ Optimizer: no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 5e-4 - eta_min: 5e-6 + learning_rate: 1e-3 + eta_min: 1e-5 warmup_epoch: 20 - warmup_start_lr: 5e-7 + warmup_start_lr: 1e-6 # data loader for train and eval diff --git a/ppcls/configs/ImageNet/PVTV2/PVT_V2_B2_Linear.yaml b/ppcls/configs/ImageNet/PVTV2/PVT_V2_B2_Linear.yaml index a5feb260b..2d48178f0 100644 --- a/ppcls/configs/ImageNet/PVTV2/PVT_V2_B2_Linear.yaml +++ b/ppcls/configs/ImageNet/PVTV2/PVT_V2_B2_Linear.yaml @@ -43,11 +43,12 @@ Optimizer: no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 5e-4 - eta_min: 5e-6 + learning_rate: 1e-3 + eta_min: 1e-5 warmup_epoch: 20 - warmup_start_lr: 5e-7 + warmup_start_lr: 1e-6 # data loader for train and eval diff --git a/ppcls/configs/ImageNet/PVTV2/PVT_V2_B3.yaml b/ppcls/configs/ImageNet/PVTV2/PVT_V2_B3.yaml index be300aca6..581a70605 100644 --- a/ppcls/configs/ImageNet/PVTV2/PVT_V2_B3.yaml +++ b/ppcls/configs/ImageNet/PVTV2/PVT_V2_B3.yaml @@ -44,11 +44,12 @@ Optimizer: no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 5e-4 - eta_min: 5e-6 + learning_rate: 1e-3 + eta_min: 1e-5 warmup_epoch: 20 - warmup_start_lr: 5e-7 + warmup_start_lr: 1e-6 # data loader for train and eval diff --git a/ppcls/configs/ImageNet/PVTV2/PVT_V2_B4.yaml b/ppcls/configs/ImageNet/PVTV2/PVT_V2_B4.yaml index b6a895339..92da84d1e 100644 --- a/ppcls/configs/ImageNet/PVTV2/PVT_V2_B4.yaml +++ b/ppcls/configs/ImageNet/PVTV2/PVT_V2_B4.yaml @@ -44,11 +44,12 @@ Optimizer: no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 5e-4 - eta_min: 5e-6 + learning_rate: 1e-3 + eta_min: 1e-5 warmup_epoch: 20 - warmup_start_lr: 5e-7 + warmup_start_lr: 1e-6 # data loader for train and eval diff --git a/ppcls/configs/ImageNet/PVTV2/PVT_V2_B5.yaml b/ppcls/configs/ImageNet/PVTV2/PVT_V2_B5.yaml index 9d36b2807..4bb2449a4 100644 --- a/ppcls/configs/ImageNet/PVTV2/PVT_V2_B5.yaml +++ b/ppcls/configs/ImageNet/PVTV2/PVT_V2_B5.yaml @@ -44,11 +44,12 @@ Optimizer: no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 5e-4 - eta_min: 5e-6 + learning_rate: 1e-3 + eta_min: 1e-5 warmup_epoch: 20 - warmup_start_lr: 5e-7 + warmup_start_lr: 1e-6 # data loader for train and eval diff --git a/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_base_patch4_window12_384.yaml b/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_base_patch4_window12_384.yaml index 4dd0ac4cf..afc3fdcd2 100644 --- a/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_base_patch4_window12_384.yaml +++ b/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_base_patch4_window12_384.yaml @@ -41,11 +41,12 @@ Optimizer: no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 5e-4 - eta_min: 1e-5 + learning_rate: 1e-3 + eta_min: 2e-5 warmup_epoch: 20 - warmup_start_lr: 1e-6 + warmup_start_lr: 2e-6 # data loader for train and eval diff --git a/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_base_patch4_window7_224.yaml b/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_base_patch4_window7_224.yaml index a42dea1f9..4920fae6c 100644 --- a/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_base_patch4_window7_224.yaml +++ b/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_base_patch4_window7_224.yaml @@ -41,11 +41,12 @@ Optimizer: no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 5e-4 - eta_min: 1e-5 + learning_rate: 1e-3 + eta_min: 2e-5 warmup_epoch: 20 - warmup_start_lr: 1e-6 + warmup_start_lr: 2e-6 # data loader for train and eval diff --git a/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_large_patch4_window12_384.yaml b/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_large_patch4_window12_384.yaml index 36b5e5e38..a6dd74267 100644 --- a/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_large_patch4_window12_384.yaml +++ b/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_large_patch4_window12_384.yaml @@ -41,11 +41,12 @@ Optimizer: no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 5e-4 - eta_min: 1e-5 + learning_rate: 1e-3 + eta_min: 2e-5 warmup_epoch: 20 - warmup_start_lr: 1e-6 + warmup_start_lr: 2e-6 # data loader for train and eval diff --git a/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_large_patch4_window7_224.yaml b/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_large_patch4_window7_224.yaml index 96a9befd2..564da72f1 100644 --- a/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_large_patch4_window7_224.yaml +++ b/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_large_patch4_window7_224.yaml @@ -41,11 +41,12 @@ Optimizer: no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 5e-4 - eta_min: 1e-5 + learning_rate: 1e-3 + eta_min: 2e-5 warmup_epoch: 20 - warmup_start_lr: 1e-6 + warmup_start_lr: 2e-6 # data loader for train and eval diff --git a/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_small_patch4_window7_224.yaml b/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_small_patch4_window7_224.yaml index ffbbcf080..ba42f1efb 100644 --- a/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_small_patch4_window7_224.yaml +++ b/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_small_patch4_window7_224.yaml @@ -41,11 +41,12 @@ Optimizer: no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 5e-4 - eta_min: 1e-5 + learning_rate: 1e-3 + eta_min: 2e-5 warmup_epoch: 20 - warmup_start_lr: 1e-6 + warmup_start_lr: 2e-6 # data loader for train and eval diff --git a/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_tiny_patch4_window7_224.yaml b/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_tiny_patch4_window7_224.yaml index 066db715d..26fa0ba61 100644 --- a/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_tiny_patch4_window7_224.yaml +++ b/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_tiny_patch4_window7_224.yaml @@ -41,11 +41,12 @@ Optimizer: no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 5e-4 - eta_min: 1e-5 + learning_rate: 1e-3 + eta_min: 2e-5 warmup_epoch: 20 - warmup_start_lr: 1e-6 + warmup_start_lr: 2e-6 # data loader for train and eval diff --git a/ppcls/configs/ImageNet/Twins/alt_gvt_base.yaml b/ppcls/configs/ImageNet/Twins/alt_gvt_base.yaml index 74c402ee7..36e5b086d 100644 --- a/ppcls/configs/ImageNet/Twins/alt_gvt_base.yaml +++ b/ppcls/configs/ImageNet/Twins/alt_gvt_base.yaml @@ -43,11 +43,12 @@ Optimizer: no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 5e-4 - eta_min: 1e-5 + learning_rate: 1e-3 + eta_min: 2e-5 warmup_epoch: 5 - warmup_start_lr: 1e-6 + warmup_start_lr: 2e-6 # data loader for train and eval diff --git a/ppcls/configs/ImageNet/Twins/alt_gvt_large.yaml b/ppcls/configs/ImageNet/Twins/alt_gvt_large.yaml index ca66e9a33..6e19d6461 100644 --- a/ppcls/configs/ImageNet/Twins/alt_gvt_large.yaml +++ b/ppcls/configs/ImageNet/Twins/alt_gvt_large.yaml @@ -43,11 +43,12 @@ Optimizer: no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 5e-4 - eta_min: 1e-5 + learning_rate: 1e-3 + eta_min: 2e-5 warmup_epoch: 5 - warmup_start_lr: 1e-6 + warmup_start_lr: 2e-6 # data loader for train and eval diff --git a/ppcls/configs/ImageNet/Twins/alt_gvt_small.yaml b/ppcls/configs/ImageNet/Twins/alt_gvt_small.yaml index 9e97c0f99..66235960a 100644 --- a/ppcls/configs/ImageNet/Twins/alt_gvt_small.yaml +++ b/ppcls/configs/ImageNet/Twins/alt_gvt_small.yaml @@ -43,11 +43,12 @@ Optimizer: no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 5e-4 - eta_min: 1e-5 + learning_rate: 1e-3 + eta_min: 2e-5 warmup_epoch: 5 - warmup_start_lr: 1e-6 + warmup_start_lr: 2e-6 # data loader for train and eval diff --git a/ppcls/configs/ImageNet/Twins/pcpvt_base.yaml b/ppcls/configs/ImageNet/Twins/pcpvt_base.yaml index 7831e9289..96745495a 100644 --- a/ppcls/configs/ImageNet/Twins/pcpvt_base.yaml +++ b/ppcls/configs/ImageNet/Twins/pcpvt_base.yaml @@ -43,11 +43,12 @@ Optimizer: no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 5e-4 - eta_min: 1e-5 + learning_rate: 1e-3 + eta_min: 2e-5 warmup_epoch: 5 - warmup_start_lr: 1e-6 + warmup_start_lr: 2e-6 # data loader for train and eval diff --git a/ppcls/configs/ImageNet/Twins/pcpvt_large.yaml b/ppcls/configs/ImageNet/Twins/pcpvt_large.yaml index 8e160b3c2..ca4baf942 100644 --- a/ppcls/configs/ImageNet/Twins/pcpvt_large.yaml +++ b/ppcls/configs/ImageNet/Twins/pcpvt_large.yaml @@ -43,11 +43,12 @@ Optimizer: no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 5e-4 - eta_min: 1e-5 + learning_rate: 1e-3 + eta_min: 2e-5 warmup_epoch: 5 - warmup_start_lr: 1e-6 + warmup_start_lr: 2e-6 # data loader for train and eval diff --git a/ppcls/configs/ImageNet/Twins/pcpvt_small.yaml b/ppcls/configs/ImageNet/Twins/pcpvt_small.yaml index 582382d4d..a5e5f7e05 100644 --- a/ppcls/configs/ImageNet/Twins/pcpvt_small.yaml +++ b/ppcls/configs/ImageNet/Twins/pcpvt_small.yaml @@ -43,11 +43,12 @@ Optimizer: no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 5e-4 - eta_min: 1e-5 + learning_rate: 1e-3 + eta_min: 2e-5 warmup_epoch: 5 - warmup_start_lr: 1e-6 + warmup_start_lr: 2e-6 # data loader for train and eval diff --git a/ppcls/engine/engine.py b/ppcls/engine/engine.py index b36aeb70c..aacde2f76 100644 --- a/ppcls/engine/engine.py +++ b/ppcls/engine/engine.py @@ -250,12 +250,17 @@ class Engine(object): level=amp_level, save_dtype='float32') - # for distributed + # check the gpu num world_size = dist.get_world_size() self.config["Global"]["distributed"] = world_size != 1 - if world_size != 4 and self.mode == "train": - msg = f"The training strategy in config files provided by PaddleClas is based on 4 gpus. But the number of gpus is {world_size} in current training. Please modify the stategy (learning rate, batch size and so on) if use config files in PaddleClas to train." - logger.warning(msg) + if self.mode == "train": + std_gpu_num = 8 if self.config["Optimizer"][ + "name"] == "AdamW" else 4 + if world_size != std_gpu_num: + msg = f"The training strategy provided by PaddleClas is based on {std_gpu_num} gpus. But the number of gpu is {world_size} in current training. Please modify the stategy (learning rate, batch size and so on) if use this config to train." + logger.warning(msg) + + # for distributed if self.config["Global"]["distributed"]: dist.init_parallel_env() self.model = paddle.DataParallel(self.model) From c7a6fdda1752c1547e7caaa23e6e8efe41702b50 Mon Sep 17 00:00:00 2001 From: gaotingquan Date: Wed, 27 Apr 2022 14:10:29 +0000 Subject: [PATCH 13/14] fix --- .../CSWinTransformer_tiny_224_train_infer_python.txt | 4 ++-- .../MobileViT/MobileViT_S_train_infer_python.txt | 4 ++-- .../PVTV2/PVT_V2_B2_Linear_train_infer_python.txt | 2 +- test_tipc/prepare.sh | 12 ++++++------ 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/test_tipc/config/CSWinTransformer/CSWinTransformer_tiny_224_train_infer_python.txt b/test_tipc/config/CSWinTransformer/CSWinTransformer_tiny_224_train_infer_python.txt index 11b2f9dd9..03f5e3eed 100644 --- a/test_tipc/config/CSWinTransformer/CSWinTransformer_tiny_224_train_infer_python.txt +++ b/test_tipc/config/CSWinTransformer/CSWinTransformer_tiny_224_train_infer_python.txt @@ -13,14 +13,14 @@ train_infer_img_dir:./dataset/ILSVRC2012/val null:null ## trainer:norm_train -norm_train:tools/train.py -c ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_tiny_224.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False +norm_train:tools/train.py -c ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_tiny_224.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False -o Global.print_batch_step=1 pact_train:null fpgm_train:null distill_train:null null:null null:null ## -===========================eval_params=========================== +===========================eval_params=========================== eval:tools/eval.py -c ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_tiny_224.yaml null:null ## diff --git a/test_tipc/config/MobileViT/MobileViT_S_train_infer_python.txt b/test_tipc/config/MobileViT/MobileViT_S_train_infer_python.txt index d69f26412..06fda8fe6 100644 --- a/test_tipc/config/MobileViT/MobileViT_S_train_infer_python.txt +++ b/test_tipc/config/MobileViT/MobileViT_S_train_infer_python.txt @@ -13,14 +13,14 @@ train_infer_img_dir:./dataset/ILSVRC2012/val null:null ## trainer:norm_train -norm_train:tools/train.py -c ppcls/configs/ImageNet/MobileViT/MobileViT_S.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False +norm_train:tools/train.py -c ppcls/configs/ImageNet/MobileViT/MobileViT_S.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False -o Global.print_batch_step=1 pact_train:null fpgm_train:null distill_train:null null:null null:null ## -===========================eval_params=========================== +===========================eval_params=========================== eval:tools/eval.py -c ppcls/configs/ImageNet/MobileViT/MobileViT_S.yaml null:null ## diff --git a/test_tipc/config/PVTV2/PVT_V2_B2_Linear_train_infer_python.txt b/test_tipc/config/PVTV2/PVT_V2_B2_Linear_train_infer_python.txt index b2aa7df69..f50107fea 100644 --- a/test_tipc/config/PVTV2/PVT_V2_B2_Linear_train_infer_python.txt +++ b/test_tipc/config/PVTV2/PVT_V2_B2_Linear_train_infer_python.txt @@ -13,7 +13,7 @@ train_infer_img_dir:./dataset/ILSVRC2012/val null:null ## trainer:norm_train -norm_train:tools/train.py -c ppcls/configs/ImageNet/PVTV2/PVT_V2_B2_Linear.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False +norm_train:tools/train.py -c ppcls/configs/ImageNet/PVTV2/PVT_V2_B2_Linear.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False -o Global.print_batch_step=1 pact_train:null fpgm_train:null distill_train:null diff --git a/test_tipc/prepare.sh b/test_tipc/prepare.sh index c5be87074..70040dc8b 100644 --- a/test_tipc/prepare.sh +++ b/test_tipc/prepare.sh @@ -1,7 +1,7 @@ #!/bin/bash FILENAME=$1 -# MODE be one of ['lite_train_lite_infer' 'lite_train_whole_infer' 'whole_train_whole_infer', +# MODE be one of ['lite_train_lite_infer' 'lite_train_whole_infer' 'whole_train_whole_infer', # 'whole_infer', 'klquant_whole_infer', # 'cpp_infer', 'serving_infer', 'lite_infer'] @@ -67,9 +67,9 @@ if [ ${MODE} = "cpp_infer" ];then model_dir=${tar_name%.*} eval "tar xf ${tar_name}" eval "mv ${model_dir} ${cls_inference_model_dir}" - + eval "wget -nc $det_inference_url" - tar_name=$(func_get_url_file_name "$det_inference_url") + tar_name=$(func_get_url_file_name "$det_inference_url") model_dir=${tar_name%.*} eval "tar xf ${tar_name}" eval "mv ${model_dir} ${det_inference_model_dir}" @@ -120,7 +120,7 @@ if [ ${MODE} = "lite_train_lite_infer" ] || [ ${MODE} = "lite_train_whole_infer" wget -nc https://paddle-imagenet-models-name.bj.bcebos.com/data/whole_chain/whole_chain_little_train.tar tar xf whole_chain_little_train.tar ln -s whole_chain_little_train ILSVRC2012 - cd ILSVRC2012 + cd ILSVRC2012 mv train.txt train_list.txt mv val.txt val_list.txt cp -r train/* val/ @@ -132,7 +132,7 @@ elif [ ${MODE} = "whole_infer" ] || [ ${MODE} = "klquant_whole_infer" ];then wget -nc https://paddle-imagenet-models-name.bj.bcebos.com/data/whole_chain/whole_chain_infer.tar tar xf whole_chain_infer.tar ln -s whole_chain_infer ILSVRC2012 - cd ILSVRC2012 + cd ILSVRC2012 mv val.txt val_list.txt ln -s val_list.txt train_list.txt cd ../../ @@ -153,7 +153,7 @@ elif [ ${MODE} = "whole_train_whole_infer" ];then wget -nc https://paddle-imagenet-models-name.bj.bcebos.com/data/whole_chain/whole_chain_CIFAR100.tar tar xf whole_chain_CIFAR100.tar ln -s whole_chain_CIFAR100 ILSVRC2012 - cd ILSVRC2012 + cd ILSVRC2012 mv train.txt train_list.txt mv test.txt val_list.txt cd ../../ From bb13f3c4f5c495b35bddcf2d1574033df8fe1b55 Mon Sep 17 00:00:00 2001 From: littletomatodonkey Date: Thu, 5 May 2022 09:48:56 +0800 Subject: [PATCH 14/14] fix single card dist (#1889) * fix single card logit * fix distillation yaml files --- .../mv3_large_x1_0_distill_mv3_small_x1_0.yaml | 3 +-- .../resnet34_distill_resnet18_afd.yaml | 4 +--- ppcls/engine/evaluation/classification.py | 15 +++++---------- ppcls/optimizer/__init__.py | 2 -- 4 files changed, 7 insertions(+), 17 deletions(-) diff --git a/ppcls/configs/ImageNet/Distillation/mv3_large_x1_0_distill_mv3_small_x1_0.yaml b/ppcls/configs/ImageNet/Distillation/mv3_large_x1_0_distill_mv3_small_x1_0.yaml index a7265b066..b230f11cb 100644 --- a/ppcls/configs/ImageNet/Distillation/mv3_large_x1_0_distill_mv3_small_x1_0.yaml +++ b/ppcls/configs/ImageNet/Distillation/mv3_large_x1_0_distill_mv3_small_x1_0.yaml @@ -49,9 +49,8 @@ Loss: model_name_pairs: - ["Student", "Teacher"] Eval: - - DistillationGTCELoss: + - CELoss: weight: 1.0 - model_names: ["Student"] Optimizer: diff --git a/ppcls/configs/ImageNet/Distillation/resnet34_distill_resnet18_afd.yaml b/ppcls/configs/ImageNet/Distillation/resnet34_distill_resnet18_afd.yaml index e5b8b7162..000cb9add 100644 --- a/ppcls/configs/ImageNet/Distillation/resnet34_distill_resnet18_afd.yaml +++ b/ppcls/configs/ImageNet/Distillation/resnet34_distill_resnet18_afd.yaml @@ -88,10 +88,8 @@ Loss: s_shapes: *s_shapes t_shapes: *t_shapes Eval: - - DistillationGTCELoss: + - CELoss: weight: 1.0 - model_names: ["Student"] - Optimizer: name: Momentum diff --git a/ppcls/engine/evaluation/classification.py b/ppcls/engine/evaluation/classification.py index 6e7fc1a76..f4c90a393 100644 --- a/ppcls/engine/evaluation/classification.py +++ b/ppcls/engine/evaluation/classification.py @@ -80,22 +80,17 @@ def classification_eval(engine, epoch_id=0): current_samples = batch_size * paddle.distributed.get_world_size() accum_samples += current_samples + if isinstance(out, dict) and "Student" in out: + out = out["Student"] + if isinstance(out, dict) and "logits" in out: + out = out["logits"] + # gather Tensor when distributed if paddle.distributed.get_world_size() > 1: label_list = [] paddle.distributed.all_gather(label_list, batch[1]) labels = paddle.concat(label_list, 0) - if isinstance(out, dict): - if "Student" in out: - out = out["Student"] - if isinstance(out, dict): - out = out["logits"] - elif "logits" in out: - out = out["logits"] - else: - msg = "Error: Wrong key in out!" - raise Exception(msg) if isinstance(out, list): preds = [] for x in out: diff --git a/ppcls/optimizer/__init__.py b/ppcls/optimizer/__init__.py index d27f1100e..44d7b5ac0 100644 --- a/ppcls/optimizer/__init__.py +++ b/ppcls/optimizer/__init__.py @@ -118,8 +118,6 @@ def build_optimizer(config, epochs, step_each_epoch, model_list=None): if hasattr(model_list[i], optim_scope): optim_model.append(getattr(model_list[i], optim_scope)) - assert len(optim_model) == 1, \ - "Invalid optim model for optim scope({}), number of optim_model={}".format(optim_scope, len(optim_model)) optim = getattr(optimizer, optim_name)( learning_rate=lr, grad_clip=grad_clip, **optim_cfg)(model_list=optim_model)