Merge pull request #1390 from Intsigstephon/feature_binary_model

add Binary general recog configure
2025-06-03 21:55:06 +08:00 · 2021-11-12 15:25:00 +08:00 · 2021-11-12 15:25:00 +08:00 · 6e4bf593fb
commit 6e4bf593fb
parent 680961dc38 3b4a45f8ab
6 changed files with 199 additions and 17 deletions
--- a/ppcls/arch/backbone/init.py
+++ b/ppcls/arch/backbone/init.py
@ -62,6 +62,7 @@ from ppcls.arch.backbone.model_zoo.hardnet import HarDNet68, HarDNet85, HarDNet3
 from ppcls.arch.backbone.model_zoo.cspnet import CSPDarkNet53
 from ppcls.arch.backbone.variant_models.resnet_variant import ResNet50_last_stage_stride1
 from ppcls.arch.backbone.variant_models.vgg_variant import VGG19Sigmoid
 from ppcls.arch.backbone.variant_models.pp_lcnet_variant import PPLCNet_x2_5_Tanh
 def get_apis():
--- a/ppcls/arch/backbone/variant_models/init.py
+++ b/ppcls/arch/backbone/variant_models/init.py
@ -1,2 +1,3 @@
 from .resnet_variant import ResNet50_last_stage_stride1
 from .vgg_variant import VGG19Sigmoid
 from .pp_lcnet_variant import PPLCNet_x2_5_Tanh
--- a/ppcls/arch/backbone/variant_models/pp_lcnet_variant.py
+++ b/ppcls/arch/backbone/variant_models/pp_lcnet_variant.py
@ -0,0 +1,29 @@
 import paddle
 from paddle.nn import Sigmoid
 from paddle.nn import Tanh
 from ppcls.arch.backbone.legendary_models.pp_lcnet import PPLCNet_x2_5
 __all__ = ["PPLCNet_x2_5_Tanh"]
 class TanhSuffix(paddle.nn.Layer):
    def __init__(self, origin_layer):
        super(TanhSuffix, self).__init__()
        self.origin_layer = origin_layer
        self.tanh = Tanh()
    def forward(self, input, res_dict=None, **kwargs):
        x = self.origin_layer(input)
        x = self.tanh(x)
        return x
 def PPLCNet_x2_5_Tanh(pretrained=False, use_ssld=False, **kwargs):
    def replace_function(origin_layer):
        new_layer = TanhSuffix(origin_layer)
        return new_layer
    match_re = "linear_0"
    model = PPLCNet_x2_5(pretrained=pretrained, use_ssld=use_ssld, **kwargs)
    model.replace_sub(match_re, replace_function, True)
    return model
--- a/ppcls/configs/GeneralRecognition/GeneralRecognition_PPLCNet_x2_5_binary.yaml
+++ b/ppcls/configs/GeneralRecognition/GeneralRecognition_PPLCNet_x2_5_binary.yaml
@ -0,0 +1,147 @@
 # global configs
 Global:
  checkpoints: null
  pretrained_model: null
  output_dir: ./output/
  device: gpu
  save_interval: 1
  eval_during_train: True
  eval_interval: 1
  epochs: 100
  print_batch_step: 10
  use_visualdl: False
  # used for static mode and model export
  image_shape: [3, 224, 224]
  save_inference_dir: ./inference
  eval_mode: retrieval
  use_dali: False
  to_static: False
  #feature postprocess
  feature_normalize: False
  feature_binarize: "sign"
 # model architecture
 Arch:
  name: RecModel
  infer_output_key: features
  infer_add_softmax: False
  Backbone:
    name: PPLCNet_x2_5_Tanh
    pretrained: True
    use_ssld: True
    class_num: 512
  Head:
    name: FC
    embedding_size: 512
    class_num: 185341
 # loss function config for traing/eval process
 Loss:
  Train:
    - DSHSDLoss:
        weight: 1.0
        alpha: 0.1
  Eval:
    - DSHSDLoss:
        weight: 1.0
        alpha: 0.1
 Optimizer:
  name: Momentum
  momentum: 0.9
  lr:
    name: Cosine
    learning_rate: 0.04
    warmup_epoch: 5
  regularizer:
    name: 'L2'
    coeff: 0.00001
 # data loader for train and eval
 DataLoader:
  Train:
    dataset:
      name: ImageNetDataset
      image_root: ./dataset/all_data
      cls_label_path: ./dataset/all_data/train_reg_all_data.txt
      transform_ops:
        - DecodeImage:
            to_rgb: True
            channel_first: False
        - RandCropImage:
            size: 224
        - RandFlipImage:
            flip_code: 1
        - NormalizeImage:
            scale: 1.0/255.0
            mean: [0.485, 0.456, 0.406]
            std: [0.229, 0.224, 0.225]
            order: ''
    sampler:
      name: DistributedBatchSampler
      batch_size: 256
      drop_last: False
      shuffle: True
    loader:
      num_workers: 4
      use_shared_memory: True
  Eval:
    Query:
      dataset: 
        name: VeriWild
        image_root: ./dataset/Aliproduct/
        cls_label_path: ./dataset/Aliproduct/val_list.txt
        transform_ops:
          - DecodeImage:
              to_rgb: True
              channel_first: False
          - ResizeImage:
              size: 224
          - NormalizeImage:
              scale: 0.00392157
              mean: [0.485, 0.456, 0.406]
              std: [0.229, 0.224, 0.225]
              order: ''
      sampler:
        name: DistributedBatchSampler
        batch_size: 64
        drop_last: False
        shuffle: False
      loader:
        num_workers: 4
        use_shared_memory: True
    Gallery:
      dataset: 
        name: VeriWild
        image_root: ./dataset/Aliproduct/
        cls_label_path: ./dataset/Aliproduct/val_list.txt
        transform_ops:
          - DecodeImage:
              to_rgb: True
              channel_first: False
          - ResizeImage:
              size: 224
          - NormalizeImage:
              scale: 0.00392157
              mean: [0.485, 0.456, 0.406]
              std: [0.229, 0.224, 0.225]
              order: ''
      sampler:
        name: DistributedBatchSampler
        batch_size: 64
        drop_last: False
        shuffle: False
      loader:
        num_workers: 4
        use_shared_memory: True
 Metric:
  Eval:
    - Recallk:
        topk: [1, 5]
--- a/ppcls/loss/init.py
+++ b/ppcls/loss/init.py
@ -22,6 +22,8 @@ from .distillationloss import DistillationGTCELoss
 from .distillationloss import DistillationDMLLoss
 from .multilabelloss import MultiLabelLoss
 from .deephashloss import DSHSDLoss, LCDSHLoss
 class CombinedLoss(nn.Layer):
    def __init__(self, config_list):
--- a/ppcls/loss/deephashloss.py
+++ b/ppcls/loss/deephashloss.py
@ -23,40 +23,42 @@ class DSHSDLoss(nn.Layer):
    # [DSHSD] epoch:250, bit:48,  dataset:nuswide_21, MAP:0.809, Best MAP: 0.815
    # [DSHSD] epoch:135, bit:48,  dataset:imagenet,   MAP:0.647, Best MAP: 0.647
    """
-    def __init__(self, n_class, bit, alpha, multi_label=False):
+    def __init__(self, alpha, multi_label=False):
        super(DSHSDLoss, self).__init__()
        self.m = 2 * bit     
        self.alpha = alpha
        self.multi_label = multi_label
        self.n_class = n_class
        self.fc = paddle.nn.Linear(bit, n_class, bias_attr=False)
-    def forward(self, input, label):        
+    def forward(self, input, label):
        feature = input["features"]
-        feature = feature.tanh().astype("float32")
+        logits = input["logits"]
        dist = paddle.sum(paddle.square(
            (paddle.unsqueeze(feature, 1) - paddle.unsqueeze(feature, 0))),
                          axis=2)
        dist = paddle.sum(
                    paddle.square((paddle.unsqueeze(feature, 1) - paddle.unsqueeze(feature, 0))), 
                    axis=2)
        # label to ont-hot
        label = paddle.flatten(label)
-        label = paddle.nn.functional.one_hot(label,  self.n_class).astype("float32")
+        n_class = logits.shape[1]
        label = paddle.nn.functional.one_hot(label, n_class).astype("float32")
-        s = (paddle.matmul(label, label, transpose_y=True) == 0).astype("float32")
+        s = (paddle.matmul(
-        Ld = (1 - s) / 2 * dist + s / 2 * (self.m - dist).clip(min=0)
+            label, label, transpose_y=True) == 0).astype("float32")
        margin = 2 * feature.shape[1]
        Ld = (1 - s) / 2 * dist + s / 2 * (margin - dist).clip(min=0)
        Ld = Ld.mean()
-        
+
        logits = self.fc(feature)
        if self.multi_label:
            # multiple labels classification loss
-            Lc = (logits - label * logits + ((1 + (-logits).exp()).log())).sum(axis=1).mean()
+            Lc = (logits - label * logits + (
                (1 + (-logits).exp()).log())).sum(axis=1).mean()
        else:
            # single labels classification loss
-            Lc = (-paddle.nn.functional.softmax(logits).log() * label).sum(axis=1).mean()
+            Lc = (-paddle.nn.functional.softmax(logits).log() * label).sum(
                axis=1).mean()
        return {"dshsdloss": Lc + Ld * self.alpha}
 class LCDSHLoss(nn.Layer):
    """
    # paper [Locality-Constrained Deep Supervised Hashing for Image Retrieval](https://www.ijcai.org/Proceedings/2017/0499.pdf)