diff --git a/ppcls/arch/__init__.py b/ppcls/arch/__init__.py index 0c45cf6fc..2d5e29db8 100644 --- a/ppcls/arch/__init__.py +++ b/ppcls/arch/__init__.py @@ -28,7 +28,6 @@ from ppcls.utils import logger from ppcls.utils.save_load import load_dygraph_pretrain from ppcls.arch.slim import prune_model, quantize_model - __all__ = ["build_model", "RecModel", "DistillationModel"] @@ -82,13 +81,11 @@ class RecModel(TheseusLayer): out["backbone"] = x if self.neck is not None: x = self.neck(x) + out["neck"] = x out["features"] = x if self.head is not None: y = self.head(x, label) - out["neck"] = x - else: - y = None - out["logits"] = y + out["logits"] = y return out diff --git a/ppcls/configs/GeneralRecognition/GeneralRecognition_PPLCNet_x2_5_dml.yaml b/ppcls/configs/GeneralRecognition/GeneralRecognition_PPLCNet_x2_5_dml.yaml index c8973b064..b6c45363b 100644 --- a/ppcls/configs/GeneralRecognition/GeneralRecognition_PPLCNet_x2_5_dml.yaml +++ b/ppcls/configs/GeneralRecognition/GeneralRecognition_PPLCNet_x2_5_dml.yaml @@ -1,5 +1,4 @@ # global configs -# global configs Global: checkpoints: null pretrained_model: null @@ -85,11 +84,6 @@ Loss: key: "logits" model_name_pairs: - ["Student", "Teacher"] - - DistillationDMLLoss: - weight: 1.0 - key: "logits" - model_name_pairs: - - ["Student", "Teacher"] Eval: - DistillationGTCELoss: weight: 1.0 diff --git a/ppcls/configs/ImageNet/Distillation/mv3_large_x1_0_distill_mv3_small_x1_0.yaml b/ppcls/configs/ImageNet/Distillation/mv3_large_x1_0_distill_mv3_small_x1_0.yaml index e7147694c..d67704e09 100644 --- a/ppcls/configs/ImageNet/Distillation/mv3_large_x1_0_distill_mv3_small_x1_0.yaml +++ b/ppcls/configs/ImageNet/Distillation/mv3_large_x1_0_distill_mv3_small_x1_0.yaml @@ -57,7 +57,7 @@ Optimizer: momentum: 0.9 lr: name: Cosine - learning_rate: 1.3 + learning_rate: 0.65 warmup_epoch: 5 regularizer: name: 'L2' diff --git a/ppcls/loss/distillationloss.py b/ppcls/loss/distillationloss.py index ab6187f5a..0340234b9 100644 --- a/ppcls/loss/distillationloss.py +++ b/ppcls/loss/distillationloss.py @@ -69,7 +69,7 @@ class DistillationGTCELoss(CELoss): def forward(self, predicts, batch): loss_dict = dict() - for _, name in enumerate(self.model_names): + for name in self.model_names: out = predicts[name] if self.key is not None: out = out[self.key] diff --git a/ppcls/loss/dmlloss.py b/ppcls/loss/dmlloss.py index 16ea76467..48bf6c024 100644 --- a/ppcls/loss/dmlloss.py +++ b/ppcls/loss/dmlloss.py @@ -42,8 +42,8 @@ class DMLLoss(nn.Layer): def forward(self, x, target): if self.act is not None: - x = F.softmax(x) - target = F.softmax(target) + x = self.act(x) + target = self.act(target) loss = self._kldiv(x, target) + self._kldiv(target, x) loss = loss / 2 loss = paddle.mean(loss)