support regnet backbone

2020-07-17 19:13:45 +08:00 · 2020-07-17 19:13:45 +08:00 · 3b57dea49f
parent eb88076714
commit 3b57dea49f
17 changed files with 1030 additions and 24 deletions
--- a/fastreid/config/defaults.py
+++ b/fastreid/config/defaults.py
@ -31,6 +31,8 @@ _C.MODEL.BACKBONE = CN()

 _C.MODEL.BACKBONE.NAME = "build_resnet_backbone"
 _C.MODEL.BACKBONE.DEPTH = 50
+# RegNet volume
+_C.MODEL.BACKBONE.VOLUME = "800y"
 _C.MODEL.BACKBONE.LAST_STRIDE = 1
 # Normalization method for the convolution layers.
 _C.MODEL.BACKBONE.NORM = "BN"
@ -69,7 +71,7 @@ _C.MODEL.HEADS.NECK_FEAT = "before"  # options: before, after
 _C.MODEL.HEADS.POOL_LAYER = "avgpool"

 # Classification layer type
-_C.MODEL.HEADS.CLS_LAYER = "linear"  # "arcface" or "circle"
+_C.MODEL.HEADS.CLS_LAYER = "linear"  # "arcSoftmax" or "circleSoftmax"

 # Margin and Scale for margin-based classification layer
 _C.MODEL.HEADS.MARGIN = 0.15
--- a/fastreid/engine/defaults.py
+++ b/fastreid/engine/defaults.py
@ -375,7 +375,7 @@ class DefaultTrainer(SimpleTrainer):
        Overwrite it if you'd like a different model.
        """
        model = build_model(cfg)
-        logger = logging.getLogger(__name__)
+        # logger = logging.getLogger(__name__)
        # logger.info("Model:\n{}".format(model))
        return model

--- a/fastreid/evaluation/testing.py
+++ b/fastreid/evaluation/testing.py
@ -6,6 +6,7 @@ from collections import Mapping, OrderedDict

 import numpy as np
 from tabulate import tabulate
+from termcolor import colored


 def print_csv_format(results):
@ -33,7 +34,7 @@ def print_csv_format(results):
        numalign="left",
    )

-    logger.info("Evaluation results in csv format: \n" + table)
+    logger.info("Evaluation results in csv format: \n" + colored(table, "cyan"))


 def verify_results(cfg, results):
--- a/fastreid/layers/init.py
+++ b/fastreid/layers/init.py
@ -5,10 +5,10 @@
 """

 from .activation import *
-from .arcface import Arcface
+from .arc_softmax import ArcSoftmax
 from .batch_drop import BatchDrop
 from .batch_norm import *
-from .circle import Circle
+from .circle_softmax import CircleSoftmax
 from .context_block import ContextBlock
 from .frn import FRN, TLU
 from .non_local import Non_local
--- a/fastreid/layers/arc_softmax.py
+++ b/fastreid/layers/arc_softmax.py
@ -12,7 +12,7 @@ import torch.nn.functional as F
 from torch.nn import Parameter


-class Arcface(nn.Module):
+class ArcSoftmax(nn.Module):
    def __init__(self, cfg, in_feat, num_classes):
        super().__init__()
        self.in_feat = in_feat
--- a/fastreid/layers/circle_softmax.py
+++ b/fastreid/layers/circle_softmax.py
@ -10,7 +10,7 @@ import torch.nn.functional as F
 from torch.nn import Parameter


-class Circle(nn.Module):
+class CircleSoftmax(nn.Module):
    def __init__(self, cfg, in_feat, num_classes):
        super().__init__()
        self.in_feat = in_feat
--- a/fastreid/modeling/backbones/regnet/RegNetX-800MF_dds_8gpu.yaml
+++ b/fastreid/modeling/backbones/regnet/RegNetX-800MF_dds_8gpu.yaml
@ -0,0 +1,26 @@
+MODEL:
+  TYPE: regnet
+  NUM_CLASSES: 1000
+REGNET:
+  DEPTH: 16
+  W0: 56
+  WA: 35.73
+  WM: 2.28
+  GROUP_W: 16
+OPTIM:
+  LR_POLICY: cos
+  BASE_LR: 0.8
+  MAX_EPOCH: 100
+  MOMENTUM: 0.9
+  WEIGHT_DECAY: 5e-5
+  WARMUP_EPOCHS: 5
+TRAIN:
+  DATASET: imagenet
+  IM_SIZE: 224
+  BATCH_SIZE: 1024
+TEST:
+  DATASET: imagenet
+  IM_SIZE: 256
+  BATCH_SIZE: 800
+NUM_GPUS: 8
+OUT_DIR: .
--- a/fastreid/modeling/backbones/regnet/RegNetY-1.6GF_dds_8gpu.yaml
+++ b/fastreid/modeling/backbones/regnet/RegNetY-1.6GF_dds_8gpu.yaml
@ -0,0 +1,27 @@
+MODEL:
+  TYPE: regnet
+  NUM_CLASSES: 1000
+REGNET:
+  SE_ON: True
+  DEPTH: 27
+  W0: 48
+  WA: 20.71
+  WM: 2.65
+  GROUP_W: 24
+OPTIM:
+  LR_POLICY: cos
+  BASE_LR: 0.8
+  MAX_EPOCH: 100
+  MOMENTUM: 0.9
+  WEIGHT_DECAY: 5e-5
+  WARMUP_EPOCHS: 5
+TRAIN:
+  DATASET: imagenet
+  IM_SIZE: 224
+  BATCH_SIZE: 1024
+TEST:
+  DATASET: imagenet
+  IM_SIZE: 256
+  BATCH_SIZE: 800
+NUM_GPUS: 8
+OUT_DIR: .
--- a/fastreid/modeling/backbones/regnet/RegNetY-3.2GF_dds_8gpu.yaml
+++ b/fastreid/modeling/backbones/regnet/RegNetY-3.2GF_dds_8gpu.yaml
@ -0,0 +1,28 @@
+
+MODEL:
+  TYPE: regnet
+  NUM_CLASSES: 1000
+REGNET:
+  SE_ON: True
+  DEPTH: 21
+  W0: 80
+  WA: 42.63
+  WM: 2.66
+  GROUP_W: 24
+OPTIM:
+  LR_POLICY: cos
+  BASE_LR: 0.4
+  MAX_EPOCH: 100
+  MOMENTUM: 0.9
+  WEIGHT_DECAY: 5e-5
+  WARMUP_EPOCHS: 5
+TRAIN:
+  DATASET: imagenet
+  IM_SIZE: 224
+  BATCH_SIZE: 512
+TEST:
+  DATASET: imagenet
+  IM_SIZE: 256
+  BATCH_SIZE: 400
+NUM_GPUS: 8
+OUT_DIR: .
--- a/fastreid/modeling/backbones/regnet/RegNetY-800MF_dds_8gpu.yaml
+++ b/fastreid/modeling/backbones/regnet/RegNetY-800MF_dds_8gpu.yaml
@ -0,0 +1,27 @@
+MODEL:
+  TYPE: regnet
+  NUM_CLASSES: 1000
+REGNET:
+  SE_ON: True
+  DEPTH: 14
+  W0: 56
+  WA: 38.84
+  WM: 2.4
+  GROUP_W: 16
+OPTIM:
+  LR_POLICY: cos
+  BASE_LR: 0.8
+  MAX_EPOCH: 100
+  MOMENTUM: 0.9
+  WEIGHT_DECAY: 5e-5
+  WARMUP_EPOCHS: 5
+TRAIN:
+  DATASET: imagenet
+  IM_SIZE: 224
+  BATCH_SIZE: 1024
+TEST:
+  DATASET: imagenet
+  IM_SIZE: 256
+  BATCH_SIZE: 800
+NUM_GPUS: 8
+OUT_DIR: .
--- a/fastreid/modeling/backbones/regnet/init.py
+++ b/fastreid/modeling/backbones/regnet/init.py
@ -0,0 +1,3 @@
+
+
+from .regnet import build_regnet_backbone
--- a/fastreid/modeling/backbones/regnet/config.py
+++ b/fastreid/modeling/backbones/regnet/config.py
@ -0,0 +1,389 @@
+import os
+from yacs.config import CfgNode as CN
+
+
+# Global config object
+_C = CN()
+
+# Example usage:
+#   from core.config import cfg
+regnet_cfg = _C
+
+
+# ---------------------------------------------------------------------------- #
+# Model options
+# ---------------------------------------------------------------------------- #
+_C.MODEL = CN()
+
+# Model type
+_C.MODEL.TYPE = ""
+
+# Number of weight layers
+_C.MODEL.DEPTH = 0
+
+# Number of classes
+_C.MODEL.NUM_CLASSES = 10
+
+# Loss function (see pycls/models/loss.py for options)
+_C.MODEL.LOSS_FUN = "cross_entropy"
+
+
+# ---------------------------------------------------------------------------- #
+# ResNet options
+# ---------------------------------------------------------------------------- #
+_C.RESNET = CN()
+
+# Transformation function (see pycls/models/resnet.py for options)
+_C.RESNET.TRANS_FUN = "basic_transform"
+
+# Number of groups to use (1 -> ResNet; > 1 -> ResNeXt)
+_C.RESNET.NUM_GROUPS = 1
+
+# Width of each group (64 -> ResNet; 4 -> ResNeXt)
+_C.RESNET.WIDTH_PER_GROUP = 64
+
+# Apply stride to 1x1 conv (True -> MSRA; False -> fb.torch)
+_C.RESNET.STRIDE_1X1 = True
+
+
+# ---------------------------------------------------------------------------- #
+# AnyNet options
+# ---------------------------------------------------------------------------- #
+_C.ANYNET = CN()
+
+# Stem type
+_C.ANYNET.STEM_TYPE = "plain_block"
+
+# Stem width
+_C.ANYNET.STEM_W = 32
+
+# Block type
+_C.ANYNET.BLOCK_TYPE = "plain_block"
+
+# Depth for each stage (number of blocks in the stage)
+_C.ANYNET.DEPTHS = []
+
+# Width for each stage (width of each block in the stage)
+_C.ANYNET.WIDTHS = []
+
+# Strides for each stage (applies to the first block of each stage)
+_C.ANYNET.STRIDES = []
+
+# Bottleneck multipliers for each stage (applies to bottleneck block)
+_C.ANYNET.BOT_MULS = []
+
+# Group widths for each stage (applies to bottleneck block)
+_C.ANYNET.GROUP_WS = []
+
+# Whether SE is enabled for res_bottleneck_block
+_C.ANYNET.SE_ON = False
+
+# SE ratio
+_C.ANYNET.SE_R = 0.25
+
+# ---------------------------------------------------------------------------- #
+# RegNet options
+# ---------------------------------------------------------------------------- #
+_C.REGNET = CN()
+
+# Stem type
+_C.REGNET.STEM_TYPE = "simple_stem_in"
+# Stem width
+_C.REGNET.STEM_W = 32
+# Block type
+_C.REGNET.BLOCK_TYPE = "res_bottleneck_block"
+# Stride of each stage
+_C.REGNET.STRIDE = 2
+# Squeeze-and-Excitation (RegNetY)
+_C.REGNET.SE_ON = False
+_C.REGNET.SE_R = 0.25
+
+# Depth
+_C.REGNET.DEPTH = 10
+# Initial width
+_C.REGNET.W0 = 32
+# Slope
+_C.REGNET.WA = 5.0
+# Quantization
+_C.REGNET.WM = 2.5
+# Group width
+_C.REGNET.GROUP_W = 16
+# Bottleneck multiplier (bm = 1 / b from the paper)
+_C.REGNET.BOT_MUL = 1.0
+
+
+# ---------------------------------------------------------------------------- #
+# EfficientNet options
+# ---------------------------------------------------------------------------- #
+_C.EN = CN()
+
+# Stem width
+_C.EN.STEM_W = 32
+
+# Depth for each stage (number of blocks in the stage)
+_C.EN.DEPTHS = []
+
+# Width for each stage (width of each block in the stage)
+_C.EN.WIDTHS = []
+
+# Expansion ratios for MBConv blocks in each stage
+_C.EN.EXP_RATIOS = []
+
+# Squeeze-and-Excitation (SE) ratio
+_C.EN.SE_R = 0.25
+
+# Strides for each stage (applies to the first block of each stage)
+_C.EN.STRIDES = []
+
+# Kernel sizes for each stage
+_C.EN.KERNELS = []
+
+# Head width
+_C.EN.HEAD_W = 1280
+
+# Drop connect ratio
+_C.EN.DC_RATIO = 0.0
+
+# Dropout ratio
+_C.EN.DROPOUT_RATIO = 0.0
+
+
+# ---------------------------------------------------------------------------- #
+# Batch norm options
+# ---------------------------------------------------------------------------- #
+_C.BN = CN()
+
+# BN epsilon
+_C.BN.EPS = 1e-5
+
+# BN momentum (BN momentum in PyTorch = 1 - BN momentum in Caffe2)
+_C.BN.MOM = 0.1
+
+# Precise BN stats
+_C.BN.USE_PRECISE_STATS = False
+_C.BN.NUM_SAMPLES_PRECISE = 1024
+
+# Initialize the gamma of the final BN of each block to zero
+_C.BN.ZERO_INIT_FINAL_GAMMA = False
+
+# Use a different weight decay for BN layers
+_C.BN.USE_CUSTOM_WEIGHT_DECAY = False
+_C.BN.CUSTOM_WEIGHT_DECAY = 0.0
+
+# ---------------------------------------------------------------------------- #
+# Optimizer options
+# ---------------------------------------------------------------------------- #
+_C.OPTIM = CN()
+
+# Base learning rate
+_C.OPTIM.BASE_LR = 0.1
+
+# Learning rate policy select from {'cos', 'exp', 'steps'}
+_C.OPTIM.LR_POLICY = "cos"
+
+# Exponential decay factor
+_C.OPTIM.GAMMA = 0.1
+
+# Steps for 'steps' policy (in epochs)
+_C.OPTIM.STEPS = []
+
+# Learning rate multiplier for 'steps' policy
+_C.OPTIM.LR_MULT = 0.1
+
+# Maximal number of epochs
+_C.OPTIM.MAX_EPOCH = 200
+
+# Momentum
+_C.OPTIM.MOMENTUM = 0.9
+
+# Momentum dampening
+_C.OPTIM.DAMPENING = 0.0
+
+# Nesterov momentum
+_C.OPTIM.NESTEROV = True
+
+# L2 regularization
+_C.OPTIM.WEIGHT_DECAY = 5e-4
+
+# Start the warm up from OPTIM.BASE_LR * OPTIM.WARMUP_FACTOR
+_C.OPTIM.WARMUP_FACTOR = 0.1
+
+# Gradually warm up the OPTIM.BASE_LR over this number of epochs
+_C.OPTIM.WARMUP_EPOCHS = 0
+
+
+# ---------------------------------------------------------------------------- #
+# Training options
+# ---------------------------------------------------------------------------- #
+_C.TRAIN = CN()
+
+# Dataset and split
+_C.TRAIN.DATASET = ""
+_C.TRAIN.SPLIT = "train"
+
+# Total mini-batch size
+_C.TRAIN.BATCH_SIZE = 128
+
+# Image size
+_C.TRAIN.IM_SIZE = 224
+
+# Evaluate model on test data every eval period epochs
+_C.TRAIN.EVAL_PERIOD = 1
+
+# Save model checkpoint every checkpoint period epochs
+_C.TRAIN.CHECKPOINT_PERIOD = 1
+
+# Resume training from the latest checkpoint in the output directory
+_C.TRAIN.AUTO_RESUME = True
+
+# Weights to start training from
+_C.TRAIN.WEIGHTS = ""
+
+
+# ---------------------------------------------------------------------------- #
+# Testing options
+# ---------------------------------------------------------------------------- #
+_C.TEST = CN()
+
+# Dataset and split
+_C.TEST.DATASET = ""
+_C.TEST.SPLIT = "val"
+
+# Total mini-batch size
+_C.TEST.BATCH_SIZE = 200
+
+# Image size
+_C.TEST.IM_SIZE = 256
+
+# Weights to use for testing
+_C.TEST.WEIGHTS = ""
+
+
+# ---------------------------------------------------------------------------- #
+# Common train/test data loader options
+# ---------------------------------------------------------------------------- #
+_C.DATA_LOADER = CN()
+
+# Number of data loader workers per training process
+_C.DATA_LOADER.NUM_WORKERS = 4
+
+# Load data to pinned host memory
+_C.DATA_LOADER.PIN_MEMORY = True
+
+
+# ---------------------------------------------------------------------------- #
+# Memory options
+# ---------------------------------------------------------------------------- #
+_C.MEM = CN()
+
+# Perform ReLU inplace
+_C.MEM.RELU_INPLACE = True
+
+
+# ---------------------------------------------------------------------------- #
+# CUDNN options
+# ---------------------------------------------------------------------------- #
+_C.CUDNN = CN()
+
+# Perform benchmarking to select the fastest CUDNN algorithms to use
+# Note that this may increase the memory usage and will likely not result
+# in overall speedups when variable size inputs are used (e.g. COCO training)
+_C.CUDNN.BENCHMARK = True
+
+
+# ---------------------------------------------------------------------------- #
+# Precise timing options
+# ---------------------------------------------------------------------------- #
+_C.PREC_TIME = CN()
+
+# Perform precise timing at the start of training
+_C.PREC_TIME.ENABLED = False
+
+# Total mini-batch size
+_C.PREC_TIME.BATCH_SIZE = 128
+
+# Number of iterations to warm up the caches
+_C.PREC_TIME.WARMUP_ITER = 3
+
+# Number of iterations to compute avg time
+_C.PREC_TIME.NUM_ITER = 30
+
+
+# ---------------------------------------------------------------------------- #
+# Misc options
+# ---------------------------------------------------------------------------- #
+
+# Number of GPUs to use (applies to both training and testing)
+_C.NUM_GPUS = 1
+
+# Output directory
+_C.OUT_DIR = "/tmp"
+
+# Config destination (in OUT_DIR)
+_C.CFG_DEST = "config.yaml"
+
+# Note that non-determinism may still be present due to non-deterministic
+# operator implementations in GPU operator libraries
+_C.RNG_SEED = 1
+
+# Log destination ('stdout' or 'file')
+_C.LOG_DEST = "stdout"
+
+# Log period in iters
+_C.LOG_PERIOD = 10
+
+# Distributed backend
+_C.DIST_BACKEND = "nccl"
+
+# Hostname and port for initializing multi-process groups
+_C.HOST = "localhost"
+_C.PORT = 10001
+
+# Models weights referred to by URL are downloaded to this local cache
+_C.DOWNLOAD_CACHE = "/tmp/pycls-download-cache"
+
+
+def assert_and_infer_cfg(cache_urls=True):
+    """Checks config values invariants."""
+    assert (
+        not _C.OPTIM.STEPS or _C.OPTIM.STEPS[0] == 0
+    ), "The first lr step must start at 0"
+    assert _C.TRAIN.SPLIT in [
+        "train",
+        "val",
+        "test",
+    ], "Train split '{}' not supported".format(_C.TRAIN.SPLIT)
+    assert (
+        _C.TRAIN.BATCH_SIZE % _C.NUM_GPUS == 0
+    ), "Train mini-batch size should be a multiple of NUM_GPUS."
+    assert _C.TEST.SPLIT in [
+        "train",
+        "val",
+        "test",
+    ], "Test split '{}' not supported".format(_C.TEST.SPLIT)
+    assert (
+        _C.TEST.BATCH_SIZE % _C.NUM_GPUS == 0
+    ), "Test mini-batch size should be a multiple of NUM_GPUS."
+    assert (
+        not _C.BN.USE_PRECISE_STATS or _C.NUM_GPUS == 1
+    ), "Precise BN stats computation not verified for > 1 GPU"
+    assert _C.LOG_DEST in [
+        "stdout",
+        "file",
+    ], "Log destination '{}' not supported".format(_C.LOG_DEST)
+    assert (
+        not _C.PREC_TIME.ENABLED or _C.NUM_GPUS == 1
+    ), "Precise iter time computation not verified for > 1 GPU"
+
+
+def dump_cfg():
+    """Dumps the config to the output directory."""
+    cfg_file = os.path.join(_C.OUT_DIR, _C.CFG_DEST)
+    with open(cfg_file, "w") as f:
+        _C.dump(stream=f)
+
+
+def load_cfg(out_dir, cfg_dest="config.yaml"):
+    """Loads config from specified output directory."""
+    cfg_file = os.path.join(out_dir, cfg_dest)
+    _C.merge_from_file(cfg_file)
--- a/fastreid/modeling/backbones/regnet/regnet.py
+++ b/fastreid/modeling/backbones/regnet/regnet.py
@ -0,0 +1,506 @@
+import torch
+import os
+import logging
+import math
+import torch.nn as nn
+import numpy as np
+from fastreid.layers import get_norm
+from fastreid.utils.checkpoint import get_missing_parameters_message, get_unexpected_parameters_message
+from ..build import BACKBONE_REGISTRY
+from .config import regnet_cfg
+
+logger = logging.getLogger(__name__)
+
+
+def init_weights(m):
+    """Performs ResNet-style weight initialization."""
+    if isinstance(m, nn.Conv2d):
+        # Note that there is no bias due to BN
+        fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+        m.weight.data.normal_(mean=0.0, std=math.sqrt(2.0 / fan_out))
+    elif isinstance(m, nn.BatchNorm2d):
+        zero_init_gamma = (
+                hasattr(m, "final_bn") and m.final_bn and regnet_cfg.BN.ZERO_INIT_FINAL_GAMMA
+        )
+        m.weight.data.fill_(0.0 if zero_init_gamma else 1.0)
+        m.bias.data.zero_()
+    elif isinstance(m, nn.Linear):
+        m.weight.data.normal_(mean=0.0, std=0.01)
+        m.bias.data.zero_()
+
+
+def get_stem_fun(stem_type):
+    """Retrives the stem function by name."""
+    stem_funs = {
+        "res_stem_cifar": ResStemCifar,
+        "res_stem_in": ResStemIN,
+        "simple_stem_in": SimpleStemIN,
+    }
+    assert stem_type in stem_funs.keys(), "Stem type '{}' not supported".format(
+        stem_type
+    )
+    return stem_funs[stem_type]
+
+
+def get_block_fun(block_type):
+    """Retrieves the block function by name."""
+    block_funs = {
+        "vanilla_block": VanillaBlock,
+        "res_basic_block": ResBasicBlock,
+        "res_bottleneck_block": ResBottleneckBlock,
+    }
+    assert block_type in block_funs.keys(), "Block type '{}' not supported".format(
+        block_type
+    )
+    return block_funs[block_type]
+
+
+class AnyHead(nn.Module):
+    """AnyNet head."""
+
+    def __init__(self, w_in, nc):
+        super(AnyHead, self).__init__()
+        self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
+        self.fc = nn.Linear(w_in, nc, bias=True)
+
+    def forward(self, x):
+        x = self.avg_pool(x)
+        x = x.view(x.size(0), -1)
+        x = self.fc(x)
+        return x
+
+
+class VanillaBlock(nn.Module):
+    """Vanilla block: [3x3 conv, BN, Relu] x2"""
+
+    def __init__(self, w_in, w_out, stride, bn_norm, bm=None, gw=None, se_r=None):
+        assert (
+                bm is None and gw is None and se_r is None
+        ), "Vanilla block does not support bm, gw, and se_r options"
+        super(VanillaBlock, self).__init__()
+        self.construct(w_in, w_out, stride, bn_norm)
+
+    def construct(self, w_in, w_out, stride, bn_norm):
+        # 3x3, BN, ReLU
+        self.a = nn.Conv2d(
+            w_in, w_out, kernel_size=3, stride=stride, padding=1, bias=False
+        )
+        self.a_bn = get_norm(bn_norm, w_out)
+        self.a_relu = nn.ReLU(inplace=regnet_cfg.MEM.RELU_INPLACE)
+        # 3x3, BN, ReLU
+        self.b = nn.Conv2d(w_out, w_out, kernel_size=3, stride=1, padding=1, bias=False)
+        self.b_bn = get_norm(bn_norm, w_out)
+        self.b_relu = nn.ReLU(inplace=regnet_cfg.MEM.RELU_INPLACE)
+
+    def forward(self, x):
+        for layer in self.children():
+            x = layer(x)
+        return x
+
+
+class BasicTransform(nn.Module):
+    """Basic transformation: [3x3 conv, BN, Relu] x2"""
+
+    def __init__(self, w_in, w_out, stride, bn_norm):
+        super(BasicTransform, self).__init__()
+        self.construct(w_in, w_out, stride, bn_norm)
+
+    def construct(self, w_in, w_out, stride, bn_norm, num_split):
+        # 3x3, BN, ReLU
+        self.a = nn.Conv2d(
+            w_in, w_out, kernel_size=3, stride=stride, padding=1, bias=False
+        )
+        self.a_bn = get_norm(bn_norm, w_out)
+        self.a_relu = nn.ReLU(inplace=regnet_cfg.MEM.RELU_INPLACE)
+        # 3x3, BN
+        self.b = nn.Conv2d(w_out, w_out, kernel_size=3, stride=1, padding=1, bias=False)
+        self.b_bn = get_norm(bn_norm, w_out)
+        self.b_bn.final_bn = True
+
+    def forward(self, x):
+        for layer in self.children():
+            x = layer(x)
+        return x
+
+
+class ResBasicBlock(nn.Module):
+    """Residual basic block: x + F(x), F = basic transform"""
+
+    def __init__(self, w_in, w_out, stride, bn_norm, bm=None, gw=None, se_r=None):
+        assert (
+                bm is None and gw is None and se_r is None
+        ), "Basic transform does not support bm, gw, and se_r options"
+        super(ResBasicBlock, self).__init__()
+        self.construct(w_in, w_out, stride, bn_norm)
+
+    def _add_skip_proj(self, w_in, w_out, stride, bn_norm):
+        self.proj = nn.Conv2d(
+            w_in, w_out, kernel_size=1, stride=stride, padding=0, bias=False
+        )
+        self.bn = get_norm(bn_norm, w_out)
+
+    def construct(self, w_in, w_out, stride, bn_norm):
+        # Use skip connection with projection if shape changes
+        self.proj_block = (w_in != w_out) or (stride != 1)
+        if self.proj_block:
+            self._add_skip_proj(w_in, w_out, stride, bn_norm)
+        self.f = BasicTransform(w_in, w_out, stride, bn_norm)
+        self.relu = nn.ReLU(regnet_cfg.MEM.RELU_INPLACE)
+
+    def forward(self, x):
+        if self.proj_block:
+            x = self.bn(self.proj(x)) + self.f(x)
+        else:
+            x = x + self.f(x)
+        x = self.relu(x)
+        return x
+
+
+class SE(nn.Module):
+    """Squeeze-and-Excitation (SE) block"""
+
+    def __init__(self, w_in, w_se):
+        super(SE, self).__init__()
+        self.construct(w_in, w_se)
+
+    def construct(self, w_in, w_se):
+        # AvgPool
+        self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
+        # FC, Activation, FC, Sigmoid
+        self.f_ex = nn.Sequential(
+            nn.Conv2d(w_in, w_se, kernel_size=1, bias=True),
+            nn.ReLU(inplace=regnet_cfg.MEM.RELU_INPLACE),
+            nn.Conv2d(w_se, w_in, kernel_size=1, bias=True),
+            nn.Sigmoid(),
+        )
+
+    def forward(self, x):
+        return x * self.f_ex(self.avg_pool(x))
+
+
+class BottleneckTransform(nn.Module):
+    """Bottlenect transformation: 1x1, 3x3, 1x1"""
+
+    def __init__(self, w_in, w_out, stride, bn_norm, bm, gw, se_r):
+        super(BottleneckTransform, self).__init__()
+        self.construct(w_in, w_out, stride, bn_norm, bm, gw, se_r)
+
+    def construct(self, w_in, w_out, stride, bn_norm, bm, gw, se_r):
+        # Compute the bottleneck width
+        w_b = int(round(w_out * bm))
+        # Compute the number of groups
+        num_gs = w_b // gw
+        # 1x1, BN, ReLU
+        self.a = nn.Conv2d(w_in, w_b, kernel_size=1, stride=1, padding=0, bias=False)
+        self.a_bn = get_norm(bn_norm, w_b)
+        self.a_relu = nn.ReLU(inplace=regnet_cfg.MEM.RELU_INPLACE)
+        # 3x3, BN, ReLU
+        self.b = nn.Conv2d(
+            w_b, w_b, kernel_size=3, stride=stride, padding=1, groups=num_gs, bias=False
+        )
+        self.b_bn = get_norm(bn_norm, w_b)
+        self.b_relu = nn.ReLU(inplace=regnet_cfg.MEM.RELU_INPLACE)
+        # Squeeze-and-Excitation (SE)
+        if se_r:
+            w_se = int(round(w_in * se_r))
+            self.se = SE(w_b, w_se)
+        # 1x1, BN
+        self.c = nn.Conv2d(w_b, w_out, kernel_size=1, stride=1, padding=0, bias=False)
+        self.c_bn = get_norm(bn_norm, w_out)
+        self.c_bn.final_bn = True
+
+    def forward(self, x):
+        for layer in self.children():
+            x = layer(x)
+        return x
+
+
+class ResBottleneckBlock(nn.Module):
+    """Residual bottleneck block: x + F(x), F = bottleneck transform"""
+
+    def __init__(self, w_in, w_out, stride, bn_norm, bm=1.0, gw=1, se_r=None):
+        super(ResBottleneckBlock, self).__init__()
+        self.construct(w_in, w_out, stride, bn_norm, bm, gw, se_r)
+
+    def _add_skip_proj(self, w_in, w_out, stride, bn_norm):
+        self.proj = nn.Conv2d(
+            w_in, w_out, kernel_size=1, stride=stride, padding=0, bias=False
+        )
+        self.bn = get_norm(bn_norm, w_out)
+
+    def construct(self, w_in, w_out, stride, bn_norm, bm, gw, se_r):
+        # Use skip connection with projection if shape changes
+        self.proj_block = (w_in != w_out) or (stride != 1)
+        if self.proj_block:
+            self._add_skip_proj(w_in, w_out, stride, bn_norm)
+        self.f = BottleneckTransform(w_in, w_out, stride, bn_norm, bm, gw, se_r)
+        self.relu = nn.ReLU(regnet_cfg.MEM.RELU_INPLACE)
+
+    def forward(self, x):
+        if self.proj_block:
+            x = self.bn(self.proj(x)) + self.f(x)
+        else:
+            x = x + self.f(x)
+        x = self.relu(x)
+        return x
+
+
+class ResStemCifar(nn.Module):
+    """ResNet stem for CIFAR."""
+
+    def __init__(self, w_in, w_out, bn_norm):
+        super(ResStemCifar, self).__init__()
+        self.construct(w_in, w_out, bn_norm)
+
+    def construct(self, w_in, w_out, bn_norm):
+        # 3x3, BN, ReLU
+        self.conv = nn.Conv2d(
+            w_in, w_out, kernel_size=3, stride=1, padding=1, bias=False
+        )
+        self.bn = get_norm(bn_norm, w_out, 1)
+        self.relu = nn.ReLU(regnet_cfg.MEM.RELU_INPLACE)
+
+    def forward(self, x):
+        for layer in self.children():
+            x = layer(x)
+        return x
+
+
+class ResStemIN(nn.Module):
+    """ResNet stem for ImageNet."""
+
+    def __init__(self, w_in, w_out, bn_norm):
+        super(ResStemIN, self).__init__()
+        self.construct(w_in, w_out, bn_norm)
+
+    def construct(self, w_in, w_out, bn_norm):
+        # 7x7, BN, ReLU, maxpool
+        self.conv = nn.Conv2d(
+            w_in, w_out, kernel_size=7, stride=2, padding=3, bias=False
+        )
+        self.bn = get_norm(bn_norm, w_out)
+        self.relu = nn.ReLU(regnet_cfg.MEM.RELU_INPLACE)
+        self.pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+
+    def forward(self, x):
+        for layer in self.children():
+            x = layer(x)
+        return x
+
+
+class SimpleStemIN(nn.Module):
+    """Simple stem for ImageNet."""
+
+    def __init__(self, in_w, out_w, bn_norm):
+        super(SimpleStemIN, self).__init__()
+        self.construct(in_w, out_w, bn_norm)
+
+    def construct(self, in_w, out_w, bn_norm):
+        # 3x3, BN, ReLU
+        self.conv = nn.Conv2d(
+            in_w, out_w, kernel_size=3, stride=2, padding=1, bias=False
+        )
+        self.bn = get_norm(bn_norm, out_w)
+        self.relu = nn.ReLU(regnet_cfg.MEM.RELU_INPLACE)
+
+    def forward(self, x):
+        for layer in self.children():
+            x = layer(x)
+        return x
+
+
+class AnyStage(nn.Module):
+    """AnyNet stage (sequence of blocks w/ the same output shape)."""
+
+    def __init__(self, w_in, w_out, stride, bn_norm, d, block_fun, bm, gw, se_r):
+        super(AnyStage, self).__init__()
+        self.construct(w_in, w_out, stride, bn_norm, d, block_fun, bm, gw, se_r)
+
+    def construct(self, w_in, w_out, stride, bn_norm, d, block_fun, bm, gw, se_r):
+        # Construct the blocks
+        for i in range(d):
+            # Stride and w_in apply to the first block of the stage
+            b_stride = stride if i == 0 else 1
+            b_w_in = w_in if i == 0 else w_out
+            # Construct the block
+            self.add_module(
+                "b{}".format(i + 1), block_fun(b_w_in, w_out, b_stride, bn_norm, bm, gw, se_r)
+            )
+
+    def forward(self, x):
+        for block in self.children():
+            x = block(x)
+        return x
+
+
+class AnyNet(nn.Module):
+    """AnyNet model."""
+
+    def __init__(self, **kwargs):
+        super(AnyNet, self).__init__()
+        if kwargs:
+            self.construct(
+                stem_type=kwargs["stem_type"],
+                stem_w=kwargs["stem_w"],
+                block_type=kwargs["block_type"],
+                ds=kwargs["ds"],
+                ws=kwargs["ws"],
+                ss=kwargs["ss"],
+                bn_norm=kwargs["bn_norm"],
+                bms=kwargs["bms"],
+                gws=kwargs["gws"],
+                se_r=kwargs["se_r"],
+            )
+        else:
+            self.construct(
+                stem_type=regnet_cfg.ANYNET.STEM_TYPE,
+                stem_w=regnet_cfg.ANYNET.STEM_W,
+                block_type=regnet_cfg.ANYNET.BLOCK_TYPE,
+                ds=regnet_cfg.ANYNET.DEPTHS,
+                ws=regnet_cfg.ANYNET.WIDTHS,
+                ss=regnet_cfg.ANYNET.STRIDES,
+                bn_norm=regnet_cfg.ANYNET.BN_NORM,
+                bms=regnet_cfg.ANYNET.BOT_MULS,
+                gws=regnet_cfg.ANYNET.GROUP_WS,
+                se_r=regnet_cfg.ANYNET.SE_R if regnet_cfg.ANYNET.SE_ON else None,
+            )
+        self.apply(init_weights)
+
+    def construct(self, stem_type, stem_w, block_type, ds, ws, ss, bn_norm, bms, gws, se_r):
+        # Generate dummy bot muls and gs for models that do not use them
+        bms = bms if bms else [1.0 for _d in ds]
+        gws = gws if gws else [1 for _d in ds]
+        # Group params by stage
+        stage_params = list(zip(ds, ws, ss, bms, gws))
+        # Construct the stem
+        stem_fun = get_stem_fun(stem_type)
+        self.stem = stem_fun(3, stem_w, bn_norm)
+        # Construct the stages
+        block_fun = get_block_fun(block_type)
+        prev_w = stem_w
+        for i, (d, w, s, bm, gw) in enumerate(stage_params):
+            self.add_module(
+                "s{}".format(i + 1), AnyStage(prev_w, w, s, bn_norm, d, block_fun, bm, gw, se_r)
+            )
+            prev_w = w
+        # Construct the head
+        self.in_planes = prev_w
+        # self.head = AnyHead(w_in=prev_w, nc=nc)
+
+    def forward(self, x):
+        for module in self.children():
+            x = module(x)
+        return x
+
+
+def quantize_float(f, q):
+    """Converts a float to closest non-zero int divisible by q."""
+    return int(round(f / q) * q)
+
+
+def adjust_ws_gs_comp(ws, bms, gs):
+    """Adjusts the compatibility of widths and groups."""
+    ws_bot = [int(w * b) for w, b in zip(ws, bms)]
+    gs = [min(g, w_bot) for g, w_bot in zip(gs, ws_bot)]
+    ws_bot = [quantize_float(w_bot, g) for w_bot, g in zip(ws_bot, gs)]
+    ws = [int(w_bot / b) for w_bot, b in zip(ws_bot, bms)]
+    return ws, gs
+
+
+def get_stages_from_blocks(ws, rs):
+    """Gets ws/ds of network at each stage from per block values."""
+    ts_temp = zip(ws + [0], [0] + ws, rs + [0], [0] + rs)
+    ts = [w != wp or r != rp for w, wp, r, rp in ts_temp]
+    s_ws = [w for w, t in zip(ws, ts[:-1]) if t]
+    s_ds = np.diff([d for d, t in zip(range(len(ts)), ts) if t]).tolist()
+    return s_ws, s_ds
+
+
+def generate_regnet(w_a, w_0, w_m, d, q=8):
+    """Generates per block ws from RegNet parameters."""
+    assert w_a >= 0 and w_0 > 0 and w_m > 1 and w_0 % q == 0
+    ws_cont = np.arange(d) * w_a + w_0
+    ks = np.round(np.log(ws_cont / w_0) / np.log(w_m))
+    ws = w_0 * np.power(w_m, ks)
+    ws = np.round(np.divide(ws, q)) * q
+    num_stages, max_stage = len(np.unique(ws)), ks.max() + 1
+    ws, ws_cont = ws.astype(int).tolist(), ws_cont.tolist()
+    return ws, num_stages, max_stage, ws_cont
+
+
+class RegNet(AnyNet):
+    """RegNet model."""
+
+    def __init__(self, last_stride, bn_norm):
+        # Generate RegNet ws per block
+        b_ws, num_s, _, _ = generate_regnet(
+            regnet_cfg.REGNET.WA, regnet_cfg.REGNET.W0, regnet_cfg.REGNET.WM, regnet_cfg.REGNET.DEPTH
+        )
+        # Convert to per stage format
+        ws, ds = get_stages_from_blocks(b_ws, b_ws)
+        # Generate group widths and bot muls
+        gws = [regnet_cfg.REGNET.GROUP_W for _ in range(num_s)]
+        bms = [regnet_cfg.REGNET.BOT_MUL for _ in range(num_s)]
+        # Adjust the compatibility of ws and gws
+        ws, gws = adjust_ws_gs_comp(ws, bms, gws)
+        # Use the same stride for each stage
+        ss = [regnet_cfg.REGNET.STRIDE for _ in range(num_s)]
+        ss[-1] = last_stride
+        # Use SE for RegNetY
+        se_r = regnet_cfg.REGNET.SE_R if regnet_cfg.REGNET.SE_ON else None
+        # Construct the model
+        kwargs = {
+            "stem_type": regnet_cfg.REGNET.STEM_TYPE,
+            "stem_w": regnet_cfg.REGNET.STEM_W,
+            "block_type": regnet_cfg.REGNET.BLOCK_TYPE,
+            "ss": ss,
+            "ds": ds,
+            "ws": ws,
+            "bn_norm": bn_norm,
+            "bms": bms,
+            "gws": gws,
+            "se_r": se_r,
+        }
+        super(RegNet, self).__init__(**kwargs)
+
+
+@BACKBONE_REGISTRY.register()
+def build_regnet_backbone(cfg):
+    # fmt: off
+    pretrain = cfg.MODEL.BACKBONE.PRETRAIN
+    pretrain_path = cfg.MODEL.BACKBONE.PRETRAIN_PATH
+    last_stride = cfg.MODEL.BACKBONE.LAST_STRIDE
+    bn_norm = cfg.MODEL.BACKBONE.NORM
+    volume = cfg.MODEL.BACKBONE.VOLUME
+
+    cfg_files = {
+        '800x': 'fastreid/modeling/backbones/regnet/RegNetX-800MF_dds_8gpu.yaml',
+        '800y': 'fastreid/modeling/backbones/regnet/RegNetY-800MF_dds_8gpu.yaml',
+        '1600x': 'fastreid/modeling/backbones/regnet/RegNetX-1600MF_dds_8gpu.yaml',
+        '1600y': 'fastreid/modeling/backbones/regnet/RegNetY-1600MF_dds_8gpu.yaml',
+        '3200x': 'fastreid/modeling/backbones/regnet/RegNetX-3200MF_dds_8gpu.yaml',
+        '3200y': 'fastreid/modeling/backbones/regnet/RegNetY-3200MF_dds_8gpu.yaml',
+    }[volume]
+
+    regnet_cfg.merge_from_file(cfg_files)
+    model = RegNet(last_stride, bn_norm)
+
+    if pretrain:
+        try:
+            state_dict = torch.load(pretrain_path, map_location=torch.device('cpu'))['model_state']
+        except FileNotFoundError as e:
+            logger.info(f'{pretrain_path} is not found! Please check this path.')
+            raise e
+
+        logger.info(f"Loading pretrained model from {pretrain_path}")
+
+        incompatible = model.load_state_dict(state_dict, strict=False)
+        if incompatible.missing_keys:
+            logger.info(
+                get_missing_parameters_message(incompatible.missing_keys)
+            )
+        if incompatible.unexpected_keys:
+            logger.info(
+                get_unexpected_parameters_message(incompatible.unexpected_keys)
+            )
+    return model
--- a/fastreid/modeling/backbones/resnet.py
+++ b/fastreid/modeling/backbones/resnet.py
@ -249,7 +249,7 @@ def build_resnet_backbone(cfg):
    depth = cfg.MODEL.BACKBONE.DEPTH

    num_blocks_per_stage = {34: [3, 4, 6, 3], 50: [3, 4, 6, 3], 101: [3, 4, 23, 3], 152: [3, 8, 36, 3], }[depth]
-    nl_layers_per_stage = {34: [3, 4, 6, 3], 50: [0, 2, 3, 0], 101: [0, 2, 9, 0]}[depth]
+    nl_layers_per_stage = {34: [0, 2, 3, 0], 50: [0, 2, 3, 0], 101: [0, 2, 9, 0]}[depth]
    block = {34: BasicBlock, 50: Bottleneck, 101: Bottleneck}[depth]
    model = ResNet(last_stride, bn_norm, num_splits, with_ibn, with_se, with_nl, block,
                   num_blocks_per_stage, nl_layers_per_stage)
--- a/fastreid/modeling/heads/bnneck_head.py
+++ b/fastreid/modeling/heads/bnneck_head.py
@ -5,7 +5,6 @@
 """

 from fastreid.layers import *
-from fastreid.modeling.losses import *
 from fastreid.utils.weight_init import weights_init_kaiming, weights_init_classifier
 from .build import REID_HEADS_REGISTRY

@ -22,12 +21,12 @@ class BNneckHead(nn.Module):

        # identity classification layer
        cls_type = cfg.MODEL.HEADS.CLS_LAYER
-        if cls_type == 'linear':    self.classifier = nn.Linear(in_feat, num_classes, bias=False)
-        elif cls_type == 'arcface': self.classifier = Arcface(cfg, in_feat, num_classes)
-        elif cls_type == 'circle':  self.classifier = Circle(cfg, in_feat, num_classes)
+        if cls_type == 'linear':          self.classifier = nn.Linear(in_feat, num_classes, bias=False)
+        elif cls_type == 'arcSoftmax':    self.classifier = ArcSoftmax(cfg, in_feat, num_classes)
+        elif cls_type == 'circleSoftmax': self.classifier = CircleSoftmax(cfg, in_feat, num_classes)
        else:
            raise KeyError(f"{cls_type} is invalid, please choose from "
-                           f"'linear', 'arcface' and 'circle'.")
+                           f"'linear', 'arcSoftmax' and 'circleSoftmax'.")

        self.classifier.apply(weights_init_classifier)

--- a/fastreid/modeling/heads/linear_head.py
+++ b/fastreid/modeling/heads/linear_head.py
@ -5,9 +5,8 @@
 """

 from fastreid.layers import *
-from fastreid.modeling.losses import *
-from .build import REID_HEADS_REGISTRY
 from fastreid.utils.weight_init import weights_init_classifier
+from .build import REID_HEADS_REGISTRY


@REID_HEADS_REGISTRY.register()
@ -18,12 +17,12 @@ class LinearHead(nn.Module):

        # identity classification layer
        cls_type = cfg.MODEL.HEADS.CLS_LAYER
-        if cls_type == 'linear':    self.classifier = nn.Linear(in_feat, num_classes, bias=False)
-        elif cls_type == 'arcface': self.classifier = Arcface(cfg, in_feat, num_classes)
-        elif cls_type == 'circle':  self.classifier = Circle(cfg, in_feat, num_classes)
+        if cls_type == 'linear':          self.classifier = nn.Linear(in_feat, num_classes, bias=False)
+        elif cls_type == 'arcSoftmax':    self.classifier = ArcSoftmax(cfg, in_feat, num_classes)
+        elif cls_type == 'circleSoftmax': self.classifier = CircleSoftmax(cfg, in_feat, num_classes)
        else:
            raise KeyError(f"{cls_type} is invalid, please choose from "
-                           f"'linear', 'arcface' and 'circle'.")
+                           f"'linear', 'arcSoftmax' and 'circleSoftmax'.")

        self.classifier.apply(weights_init_classifier)

--- a/fastreid/modeling/heads/reduction_head.py
+++ b/fastreid/modeling/heads/reduction_head.py
@ -5,7 +5,6 @@
 """

 from fastreid.layers import *
-from fastreid.modeling.losses import *
 from fastreid.utils.weight_init import weights_init_kaiming, weights_init_classifier
 from .build import REID_HEADS_REGISTRY

@ -33,12 +32,12 @@ class ReductionHead(nn.Module):

        # identity classification layer
        cls_type = cfg.MODEL.HEADS.CLS_LAYER
-        if cls_type == 'linear':    self.classifier = nn.Linear(reduction_dim, num_classes, bias=False)
-        elif cls_type == 'arcface': self.classifier = Arcface(cfg, reduction_dim, num_classes)
-        elif cls_type == 'circle':  self.classifier = Circle(cfg, reduction_dim, num_classes)
+        if cls_type == 'linear':          self.classifier = nn.Linear(in_feat, num_classes, bias=False)
+        elif cls_type == 'arcSoftmax':    self.classifier = ArcSoftmax(cfg, in_feat, num_classes)
+        elif cls_type == 'circleSoftmax': self.classifier = CircleSoftmax(cfg, in_feat, num_classes)
        else:
            raise KeyError(f"{cls_type} is invalid, please choose from "
-                           f"'linear', 'arcface' and 'circle'.")
+                           f"'linear', 'arcSoftmax' and 'circleSoftmax'.")

        self.classifier.apply(weights_init_classifier)