# Copyright (c) Alibaba, Inc. and its affiliates. from functools import reduce import torch import torch.nn as nn from packaging import version from easycv.models.utils import GeMPooling, ResLayer from ..backbones.hrnet import Bottleneck from ..registry import NECKS from ..utils import ConvModule, _init_weights, build_norm_layer @NECKS.register_module class LinearNeck(nn.Module): '''Linear neck: fc only ''' def __init__(self, in_channels, out_channels, with_avg_pool=True, with_norm=False): super(LinearNeck, self).__init__() self.with_avg_pool = with_avg_pool if with_avg_pool: self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.fc = nn.Linear(in_channels, out_channels) self.with_norm = with_norm def init_weights(self, init_linear='normal'): _init_weights(self, init_linear) def forward(self, x): assert len(x) == 1 or len(x) == 2 # to fit vit model x = x[0] if self.with_avg_pool: x = self.avgpool(x) x = self.fc(x.view(x.size(0), -1)) if self.with_norm: x = nn.functional.normalize(x, p=2, dim=1) return [x] @NECKS.register_module class RetrivalNeck(nn.Module): '''RetrivalNeck: refer, Combination of Multiple Global Descriptors for Image Retrieval https://arxiv.org/pdf/1903.10663.pdf CGD feature : only use avg pool + gem pooling + max pooling, by pool -> fc -> norm -> concat -> norm Avg feature : use avg pooling, avg pool -> syncbn -> fc len(cgd_config) > 0: return [CGD, Avg] len(cgd_config) = 0 : return [Avg] ''' def __init__( self, in_channels, out_channels, with_avg_pool=True, cdg_config=[ 'G', 'M' ]): # with_avg_pool=True, with_gem_pool=True, with_norm=False): """ Init RetrivalNeck, faceid neck doesn't pool for input feature map, doesn't support dynamic input Args: in_channels: Int - input feature map channels out_channels: Int - output feature map channels with_avg_pool: bool do avg pool for BNneck or not cdg_config : list('G','M','S'), to configure output feature, CGD = [gempooling] + [maxpooling] + [meanpooling], if len(cgd_config) > 0: return [CGD, Avg] if len(cgd_config) = 0 : return [Avg] """ super(RetrivalNeck, self).__init__() self.with_avg_pool = with_avg_pool self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.fc = nn.Linear(in_channels, out_channels, bias=False) self.dropout = nn.Dropout(p=0.3) _, self.bn_output = build_norm_layer(dict(type='BN'), in_channels) # dict(type='SyncBN'), in_channels) self.cdg_config = cdg_config cgd_length = int(len(cdg_config)) if cgd_length > 0: assert (out_channels % cgd_length == 0) if 'M' in cdg_config: self.mpool = nn.AdaptiveMaxPool2d((1, 1)) self.fc_mx = nn.Linear( in_channels, int(out_channels / cgd_length), bias=False) if 'S' in cdg_config: self.spool = nn.AdaptiveAvgPool2d((1, 1)) self.fc_sx = nn.Linear( in_channels, int(out_channels / cgd_length), bias=False) if 'G' in cdg_config: self.gpool = GeMPooling() self.fc_gx = nn.Linear( in_channels, int(out_channels / cgd_length), bias=False) def init_weights(self, init_linear='normal'): _init_weights(self, init_linear) def forward(self, x): assert len(x) == 1 or len(x) == 2 # to fit vit model x = x[0] # BNNeck with avg pool if self.with_avg_pool: ax = self.avgpool(x) else: ax = x cls_x = self.bn_output(ax) cls_x = self.fc(cls_x.view(x.size(0), -1)) cls_x = self.dropout(cls_x) if len(self.cdg_config) > 0: concat_list = [] if 'S' in self.cdg_config: sx = self.spool(x).view(x.size(0), -1) sx = self.fc_sx(sx) sx = nn.functional.normalize(sx, p=2, dim=1) concat_list.append(sx) if 'G' in self.cdg_config: gx = self.gpool(x).view(x.size(0), -1) gx = self.fc_gx(gx) gx = nn.functional.normalize(gx, p=2, dim=1) concat_list.append(gx) if 'M' in self.cdg_config: mx = self.mpool(x).view(x.size(0), -1) mx = self.fc_mx(mx) mx = nn.functional.normalize(mx, p=2, dim=1) concat_list.append(mx) concatx = torch.cat(concat_list, dim=1) concatx = concatx.view(concatx.size(0), -1) # concatx = nn.functional.normalize(concatx, p=2, dim=1) return [concatx, cls_x] else: return [cls_x] @NECKS.register_module class FaceIDNeck(nn.Module): '''FaceID neck: Include BN, dropout, flatten, linear, bn ''' def __init__(self, in_channels, out_channels, map_shape=1, dropout_ratio=0.4, with_norm=False, bn_type='SyncBN'): """ Init FaceIDNeck, faceid neck doesn't pool for input feature map, doesn't support dynamic input Args: in_channels: Int - input feature map channels out_channels: Int - output feature map channels map_shape: Int or list(int,...), input feature map (w,h) or w when w=h, dropout_ratio : float, drop out ratio with_norm : normalize output feature or not bn_type : SyncBN or BN """ super(FaceIDNeck, self).__init__() if version.parse(torch.__version__) < version.parse('1.4.0'): self.expand_for_syncbn = True else: self.expand_for_syncbn = False # self.bn_input = nn.BatchNorm2d(in_channels) _, self.bn_input = build_norm_layer(dict(type=bn_type), in_channels) self.dropout = nn.Dropout(p=dropout_ratio) if type(map_shape) == list: in_ = int(reduce(lambda x, y: x * y, map_shape) * in_channels) else: assert type(map_shape) == int in_ = in_channels * map_shape * map_shape self.fc = nn.Linear(in_, out_channels) self.with_norm = with_norm self.syncbn = bn_type == 'SyncBN' if self.syncbn: _, self.bn_output = build_norm_layer( dict(type=bn_type), out_channels) else: self.bn_output = nn.BatchNorm1d(out_channels) def _forward_syncbn(self, module, x): assert x.dim() == 2 if self.expand_for_syncbn: x = module(x.unsqueeze(-1).unsqueeze(-1)).squeeze(-1).squeeze(-1) else: x = module(x) return x def init_weights(self, init_linear='normal'): _init_weights(self, init_linear) def forward(self, x): assert len(x) == 1 or len(x) == 2 # to fit vit model x = x[0] x = self.bn_input(x) x = self.dropout(x) x = self.fc(x.view(x.size(0), -1)) # if self.syncbn: x = self._forward_syncbn(self.bn_output, x) # else: # x = self.bn_output(x) if self.with_norm: x = nn.functional.normalize(x, p=2, dim=1) return [x] @NECKS.register_module class MultiLinearNeck(nn.Module): '''MultiLinearNeck neck: MultiFc head ''' def __init__(self, in_channels, out_channels, num_layers=1, with_avg_pool=True): """ Args: in_channels: int or list[int] out_channels: int or list[int] num_layers : total fc num with_avg_pool : input will be avgPool if True Returns: None Raises: len(in_channel) != len(out_channels) len(in_channel) != len(num_layers) """ super(MultiLinearNeck, self).__init__() self.with_avg_pool = with_avg_pool self.num_layers = num_layers if with_avg_pool: self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) if num_layers == 1: self.fc = nn.Linear(in_channels, out_channels) else: assert len(in_channels) == len(out_channels) assert len(in_channels) == num_layers self.fc = nn.ModuleList( [nn.Linear(i, j) for i, j in zip(in_channels, out_channels)]) def init_weights(self, init_linear='normal'): _init_weights(self, init_linear) def forward(self, x): assert len(x) == 1 or len(x) == 2 # to fit vit model x = x[0] if self.with_avg_pool: x = self.avgpool(x) x = self.fc(x.view(x.size(0), -1)) return [x] @NECKS.register_module() class HRFuseScales(nn.Module): """Fuse feature map of multiple scales in HRNet. Args: in_channels (list[int]): The input channels of all scales. out_channels (int): The channels of fused feature map. Defaults to 2048. norm_cfg (dict): dictionary to construct norm layers. Defaults to ``dict(type='BN', momentum=0.1)``. init_cfg (dict | list[dict], optional): Initialization config dict. Defaults to ``dict(type='Normal', layer='Linear', std=0.01))``. """ def __init__(self, in_channels, out_channels=2048, norm_cfg=dict(type='BN', momentum=0.1)): super(HRFuseScales, self).__init__() self.in_channels = in_channels self.out_channels = out_channels self.norm_cfg = norm_cfg block_type = Bottleneck out_channels = [128, 256, 512, 1024] # Increase the channels on each resolution # from C, 2C, 4C, 8C to 128, 256, 512, 1024 increase_layers = [] for i in range(len(in_channels)): increase_layers.append( ResLayer( block_type, in_channels=in_channels[i], out_channels=out_channels[i], num_blocks=1, stride=1, )) self.increase_layers = nn.ModuleList(increase_layers) # Downsample feature maps in each scale. downsample_layers = [] for i in range(len(in_channels) - 1): downsample_layers.append( ConvModule( in_channels=out_channels[i], out_channels=out_channels[i + 1], kernel_size=3, stride=2, padding=1, norm_cfg=self.norm_cfg, bias=False, )) self.downsample_layers = nn.ModuleList(downsample_layers) # The final conv block before final classifier linear layer. self.final_layer = ConvModule( in_channels=out_channels[3], out_channels=self.out_channels, kernel_size=1, norm_cfg=self.norm_cfg, bias=False, ) def init_weights(self, init_linear='normal'): _init_weights(self, init_linear) def forward(self, x): assert len(x) == len(self.in_channels) feat = self.increase_layers[0](x[0]) for i in range(len(self.downsample_layers)): feat = self.downsample_layers[i](feat) + \ self.increase_layers[i + 1](x[i + 1]) return [self.final_layer(feat)]