update fastreid V1.0

pull/389/head
liaoxingyu 2021-01-18 11:36:38 +08:00
parent 15213dde4b
commit 15e1729a27
177 changed files with 3828 additions and 1667 deletions

View File

@ -1,11 +1,11 @@
_BASE_: "Base-bagtricks.yml"
_BASE_: Base-bagtricks.yml
MODEL:
BACKBONE:
WITH_NL: True
HEADS:
POOL_LAYER: "gempool"
POOL_LAYER: gempool
LOSSES:
NAME: ("CrossEntropyLoss", "TripletLoss")

View File

@ -1,25 +1,12 @@
_BASE_: "Base-SBS.yml"
_BASE_: Base-SBS.yml
MODEL:
META_ARCHITECTURE: 'MGN'
META_ARCHITECTURE: MGN
FREEZE_LAYERS: ["backbone", "b1", "b2", "b3",]
FREEZE_LAYERS: [backbone, b1, b2, b3,]
BACKBONE:
WITH_NL: False
HEADS:
EMBEDDING_DIM: 256
LOSSES:
NAME: ("CrossEntropyLoss", "TripletLoss",)
CE:
EPSILON: 0.1
SCALE: 1.0
TRI:
MARGIN: 0.0
HARD_MINING: True
NORM_FEAT: False
SCALE: 1.0

View File

@ -1,15 +1,15 @@
_BASE_: "Base-bagtricks.yml"
_BASE_: Base-bagtricks.yml
MODEL:
FREEZE_LAYERS: ["backbone"]
FREEZE_LAYERS: [ backbone ]
BACKBONE:
WITH_NL: True
HEADS:
NECK_FEAT: "after"
POOL_LAYER: "gempoolP"
CLS_LAYER: "circleSoftmax"
NECK_FEAT: after
POOL_LAYER: gempoolP
CLS_LAYER: circleSoftmax
SCALE: 64
MARGIN: 0.35
@ -26,8 +26,8 @@ MODEL:
SCALE: 1.0
INPUT:
SIZE_TRAIN: [384, 128]
SIZE_TEST: [384, 128]
SIZE_TRAIN: [ 384, 128 ]
SIZE_TEST: [ 384, 128 ]
DO_AUTOAUG: True
AUTOAUG_PROB: 0.1
@ -36,7 +36,8 @@ DATALOADER:
NUM_INSTANCE: 16
SOLVER:
OPT: "Adam"
FP16_ENABLED: False
OPT: Adam
MAX_EPOCH: 60
BASE_LR: 0.00035
BIAS_LR_FACTOR: 1.
@ -44,19 +45,19 @@ SOLVER:
WEIGHT_DECAY_BIAS: 0.0005
IMS_PER_BATCH: 64
SCHED: "CosineAnnealingLR"
SCHED: CosineAnnealingLR
DELAY_EPOCHS: 30
ETA_MIN_LR: 0.00000077
ETA_MIN_LR: 0.0000007
WARMUP_FACTOR: 0.1
WARMUP_ITERS: 2000
WARMUP_EPOCHS: 10
FREEZE_ITERS: 2000
FREEZE_ITERS: 1000
CHECKPOINT_PERIOD: 20
TEST:
EVAL_PERIOD: 20
EVAL_PERIOD: 10
IMS_PER_BATCH: 128
CUDNN_BENCHMARK: True

View File

@ -1,23 +1,22 @@
MODEL:
META_ARCHITECTURE: "Baseline"
META_ARCHITECTURE: Baseline
BACKBONE:
NAME: "build_resnet_backbone"
NORM: "BN"
DEPTH: "50x"
NAME: build_resnet_backbone
NORM: BN
DEPTH: 50x
LAST_STRIDE: 1
FEAT_DIM: 2048
WITH_IBN: False
PRETRAIN: True
PRETRAIN_PATH: "/export/home/lxy/.cache/torch/checkpoints/resnet50-19c8e357.pth"
HEADS:
NAME: "EmbeddingHead"
NORM: "BN"
NAME: EmbeddingHead
NORM: BN
WITH_BNNECK: True
POOL_LAYER: "avgpool"
NECK_FEAT: "before"
CLS_LAYER: "linear"
POOL_LAYER: avgpool
NECK_FEAT: before
CLS_LAYER: linear
LOSSES:
NAME: ("CrossEntropyLoss", "TripletLoss",)
@ -33,8 +32,8 @@ MODEL:
SCALE: 1.
INPUT:
SIZE_TRAIN: [256, 128]
SIZE_TEST: [256, 128]
SIZE_TRAIN: [ 256, 128 ]
SIZE_TEST: [ 256, 128 ]
REA:
ENABLED: True
PROB: 0.5
@ -48,7 +47,7 @@ DATALOADER:
SOLVER:
FP16_ENABLED: True
OPT: "Adam"
OPT: Adam
MAX_EPOCH: 120
BASE_LR: 0.00035
BIAS_LR_FACTOR: 2.
@ -56,12 +55,12 @@ SOLVER:
WEIGHT_DECAY_BIAS: 0.0005
IMS_PER_BATCH: 64
SCHED: "MultiStepLR"
STEPS: [40, 90]
SCHED: MultiStepLR
STEPS: [ 40, 90 ]
GAMMA: 0.1
WARMUP_FACTOR: 0.1
WARMUP_ITERS: 2000
WARMUP_EPOCHS: 10
CHECKPOINT_PERIOD: 30

View File

@ -1,12 +1,12 @@
_BASE_: "../Base-AGW.yml"
_BASE_: ../Base-AGW.yml
MODEL:
BACKBONE:
DEPTH: "101x"
DEPTH: 101x
WITH_IBN: True
DATASETS:
NAMES: ("DukeMTMC",)
TESTS: ("DukeMTMC",)
OUTPUT_DIR: "logs/dukemtmc/agw_R101-ibn"
OUTPUT_DIR: logs/dukemtmc/agw_R101-ibn

View File

@ -1,4 +1,4 @@
_BASE_: "../Base-AGW.yml"
_BASE_: ../Base-AGW.yml
MODEL:
BACKBONE:
@ -8,4 +8,4 @@ DATASETS:
NAMES: ("DukeMTMC",)
TESTS: ("DukeMTMC",)
OUTPUT_DIR: "logs/dukemtmc/agw_R50-ibn"
OUTPUT_DIR: logs/dukemtmc/agw_R50-ibn

View File

@ -1,7 +1,7 @@
_BASE_: "../Base-AGW.yml"
_BASE_: ../Base-AGW.yml
DATASETS:
NAMES: ("DukeMTMC",)
TESTS: ("DukeMTMC",)
OUTPUT_DIR: "logs/dukemtmc/agw_R50"
OUTPUT_DIR: logs/dukemtmc/agw_R50

View File

@ -1,11 +1,11 @@
_BASE_: "../Base-AGW.yml"
_BASE_: ../Base-AGW.yml
MODEL:
BACKBONE:
NAME: "build_resnest_backbone"
NAME: build_resnest_backbone
DATASETS:
NAMES: ("DukeMTMC",)
TESTS: ("DukeMTMC",)
OUTPUT_DIR: "logs/dukemtmc/agw_S50"
OUTPUT_DIR: logs/dukemtmc/agw_S50

View File

@ -1,12 +1,12 @@
_BASE_: "../Base-bagtricks.yml"
_BASE_: ../Base-bagtricks.yml
MODEL:
BACKBONE:
DEPTH: "101x"
DEPTH: 101x
WITH_IBN: True
DATASETS:
NAMES: ("DukeMTMC",)
TESTS: ("DukeMTMC",)
OUTPUT_DIR: "logs/dukemtmc/bagtricks_R101-ibn"
OUTPUT_DIR: logs/dukemtmc/bagtricks_R101-ibn

View File

@ -1,4 +1,4 @@
_BASE_: "../Base-bagtricks.yml"
_BASE_: ../Base-bagtricks.yml
MODEL:
BACKBONE:
@ -8,4 +8,4 @@ DATASETS:
NAMES: ("DukeMTMC",)
TESTS: ("DukeMTMC",)
OUTPUT_DIR: "logs/dukemtmc/bagtricks_R50-ibn"
OUTPUT_DIR: logs/dukemtmc/bagtricks_R50-ibn

View File

@ -1,7 +1,7 @@
_BASE_: "../Base-bagtricks.yml"
_BASE_: ../Base-bagtricks.yml
DATASETS:
NAMES: ("DukeMTMC",)
TESTS: ("DukeMTMC",)
OUTPUT_DIR: "logs/dukemtmc/bagtricks_R50"
OUTPUT_DIR: logs/dukemtmc/bagtricks_R50

View File

@ -1,11 +1,11 @@
_BASE_: "../Base-bagtricks.yml"
_BASE_: ../Base-bagtricks.yml
MODEL:
BACKBONE:
NAME: "build_resnest_backbone"
NAME: build_resnest_backbone
DATASETS:
NAMES: ("DukeMTMC",)
TESTS: ("DukeMTMC",)
OUTPUT_DIR: "logs/dukemtmc/bagtricks_S50"
OUTPUT_DIR: logs/dukemtmc/bagtricks_S50

View File

@ -1,4 +1,4 @@
_BASE_: "../Base-MGN.yml"
_BASE_: ../Base-MGN.yml
MODEL:
BACKBONE:
@ -8,4 +8,4 @@ DATASETS:
NAMES: ("DukeMTMC",)
TESTS: ("DukeMTMC",)
OUTPUT_DIR: "logs/dukemtmc/mgn_R50-ibn"
OUTPUT_DIR: logs/dukemtmc/mgn_R50-ibn

View File

@ -1,12 +1,12 @@
_BASE_: "../Base-SBS.yml"
_BASE_: ../Base-SBS.yml
MODEL:
BACKBONE:
DEPTH: "101x"
DEPTH: 101x
WITH_IBN: True
DATASETS:
NAMES: ("DukeMTMC",)
TESTS: ("DukeMTMC",)
OUTPUT_DIR: "logs/dukemtmc/sbs_R101-ibn"
OUTPUT_DIR: logs/dukemtmc/sbs_R101-ibn

View File

@ -1,4 +1,4 @@
_BASE_: "../Base-SBS.yml"
_BASE_: ../Base-SBS.yml
MODEL:
BACKBONE:
@ -8,4 +8,4 @@ DATASETS:
NAMES: ("DukeMTMC",)
TESTS: ("DukeMTMC",)
OUTPUT_DIR: "logs/dukemtmc/sbs_R50-ibn"
OUTPUT_DIR: logs/dukemtmc/sbs_R50-ibn

View File

@ -1,7 +1,7 @@
_BASE_: "../Base-SBS.yml"
_BASE_: ../Base-SBS.yml
DATASETS:
NAMES: ("DukeMTMC",)
TESTS: ("DukeMTMC",)
OUTPUT_DIR: "logs/dukemtmc/sbs_R50"
OUTPUT_DIR: logs/dukemtmc/sbs_R50

View File

@ -1,11 +1,11 @@
_BASE_: "../Base-SBS.yml"
_BASE_: ../Base-SBS.yml
MODEL:
BACKBONE:
NAME: "build_resnest_backbone"
NAME: build_resnest_backbone
DATASETS:
NAMES: ("DukeMTMC",)
TESTS: ("DukeMTMC",)
OUTPUT_DIR: "logs/dukemtmc/sbs_S50"
OUTPUT_DIR: logs/dukemtmc/sbs_S50

View File

@ -1,12 +1,12 @@
_BASE_: "../Base-AGW.yml"
_BASE_: ../Base-AGW.yml
MODEL:
BACKBONE:
DEPTH: "101x"
DEPTH: 101x
WITH_IBN: True
DATASETS:
NAMES: ("MSMT17",)
TESTS: ("MSMT17",)
OUTPUT_DIR: "logs/msmt17/agw_R101-ibn"
OUTPUT_DIR: logs/msmt17/agw_R101-ibn

View File

@ -1,4 +1,4 @@
_BASE_: "../Base-AGW.yml"
_BASE_: ../Base-AGW.yml
MODEL:
BACKBONE:
@ -8,4 +8,4 @@ DATASETS:
NAMES: ("MSMT17",)
TESTS: ("MSMT17",)
OUTPUT_DIR: "logs/msmt17/agw_R50-ibn"
OUTPUT_DIR: logs/msmt17/agw_R50-ibn

View File

@ -1,7 +1,7 @@
_BASE_: "../Base-AGW.yml"
_BASE_: ../Base-AGW.yml
DATASETS:
NAMES: ("MSMT17",)
TESTS: ("MSMT17",)
OUTPUT_DIR: "logs/msmt17/agw_R50"
OUTPUT_DIR: logs/msmt17/agw_R50

View File

@ -1,11 +1,11 @@
_BASE_: "../Base-AGW.yml"
_BASE_: ../Base-AGW.yml
MODEL:
BACKBONE:
NAME: "build_resnest_backbone"
NAME: build_resnest_backbone
DATASETS:
NAMES: ("MSMT17",)
TESTS: ("MSMT17",)
OUTPUT_DIR: "logs/msmt17/agw_S50"
OUTPUT_DIR: logs/msmt17/agw_S50

View File

@ -1,13 +1,13 @@
_BASE_: "../Base-bagtricks.yml"
_BASE_: ../Base-bagtricks.yml
MODEL:
BACKBONE:
DEPTH: "101x"
DEPTH: 101x
WITH_IBN: True
DATASETS:
NAMES: ("MSMT17",)
TESTS: ("MSMT17",)
OUTPUT_DIR: "logs/msmt17/bagtricks_R101-ibn"
OUTPUT_DIR: logs/msmt17/bagtricks_R101-ibn

View File

@ -1,4 +1,4 @@
_BASE_: "../Base-bagtricks.yml"
_BASE_: ../Base-bagtricks.yml
MODEL:
BACKBONE:
@ -8,5 +8,5 @@ DATASETS:
NAMES: ("MSMT17",)
TESTS: ("MSMT17",)
OUTPUT_DIR: "logs/msmt17/bagtricks_R50-ibn"
OUTPUT_DIR: logs/msmt17/bagtricks_R50-ibn

View File

@ -1,7 +1,7 @@
_BASE_: "../Base-bagtricks.yml"
_BASE_: ../Base-bagtricks.yml
DATASETS:
NAMES: ("MSMT17",)
TESTS: ("MSMT17",)
OUTPUT_DIR: "logs/msmt17/bagtricks_R50"
OUTPUT_DIR: logs/msmt17/bagtricks_R50

View File

@ -1,12 +1,12 @@
_BASE_: "../Base-bagtricks.yml"
_BASE_: ../Base-bagtricks.yml
MODEL:
BACKBONE:
NAME: "build_resnest_backbone"
NAME: build_resnest_backbone
DATASETS:
NAMES: ("MSMT17",)
TESTS: ("MSMT17",)
OUTPUT_DIR: "logs/msmt17/bagtricks_S50"
OUTPUT_DIR: logs/msmt17/bagtricks_S50

View File

@ -1,4 +1,4 @@
_BASE_: "../Base-MGN.yml"
_BASE_: ../Base-MGN.yml
MODEL:
BACKBONE:
@ -8,4 +8,4 @@ DATASETS:
NAMES: ("MSMT17",)
TESTS: ("MSMT17",)
OUTPUT_DIR: "logs/msmt17/mgn_R50-ibn"
OUTPUT_DIR: logs/msmt17/mgn_R50-ibn

View File

@ -1,12 +1,12 @@
_BASE_: "../Base-SBS.yml"
_BASE_: ../Base-SBS.yml
MODEL:
BACKBONE:
DEPTH: "101x"
DEPTH: 101x
WITH_IBN: True
DATASETS:
NAMES: ("MSMT17",)
TESTS: ("MSMT17",)
OUTPUT_DIR: "logs/msmt17/sbs_R101-ibn"
OUTPUT_DIR: logs/msmt17/sbs_R101-ibn

View File

@ -1,4 +1,4 @@
_BASE_: "../Base-SBS.yml"
_BASE_: ../Base-SBS.yml
MODEL:
BACKBONE:
@ -8,4 +8,4 @@ DATASETS:
NAMES: ("MSMT17",)
TESTS: ("MSMT17",)
OUTPUT_DIR: "logs/msmt17/sbs_R50-ibn"
OUTPUT_DIR: logs/msmt17/sbs_R50-ibn

View File

@ -1,7 +1,7 @@
_BASE_: "../Base-SBS.yml"
_BASE_: ../Base-SBS.yml
DATASETS:
NAMES: ("MSMT17",)
TESTS: ("MSMT17",)
OUTPUT_DIR: "logs/msmt17/sbs_R50"
OUTPUT_DIR: logs/msmt17/sbs_R50

View File

@ -1,11 +1,11 @@
_BASE_: "../Base-SBS.yml"
_BASE_: ../Base-SBS.yml
MODEL:
BACKBONE:
NAME: "build_resnest_backbone"
NAME: build_resnest_backbone
DATASETS:
NAMES: ("MSMT17",)
TESTS: ("MSMT17",)
OUTPUT_DIR: "logs/msmt17/sbs_S50"
OUTPUT_DIR: logs/msmt17/sbs_S50

View File

@ -1,12 +1,12 @@
_BASE_: "../Base-AGW.yml"
_BASE_: ../Base-AGW.yml
MODEL:
BACKBONE:
DEPTH: "101x"
DEPTH: 101x
WITH_IBN: True
DATASETS:
NAMES: ("Market1501",)
TESTS: ("Market1501",)
OUTPUT_DIR: "logs/market1501/agw_R101-ibn"
OUTPUT_DIR: logs/market1501/agw_R101-ibn

View File

@ -1,4 +1,4 @@
_BASE_: "../Base-AGW.yml"
_BASE_: ../Base-AGW.yml
MODEL:
BACKBONE:
@ -8,4 +8,4 @@ DATASETS:
NAMES: ("Market1501",)
TESTS: ("Market1501",)
OUTPUT_DIR: "logs/market1501/agw_R50-ibn"
OUTPUT_DIR: logs/market1501/agw_R50-ibn

View File

@ -1,7 +1,7 @@
_BASE_: "../Base-AGW.yml"
_BASE_: ../Base-AGW.yml
DATASETS:
NAMES: ("Market1501",)
TESTS: ("Market1501",)
OUTPUT_DIR: "logs/market1501/agw_R50"
OUTPUT_DIR: logs/market1501/agw_R50

View File

@ -1,11 +1,11 @@
_BASE_: "../Base-AGW.yml"
_BASE_: ../Base-AGW.yml
MODEL:
BACKBONE:
NAME: "build_resnest_backbone"
NAME: build_resnest_backbone
DATASETS:
NAMES: ("Market1501",)
TESTS: ("Market1501",)
OUTPUT_DIR: "logs/market1501/agw_S50"
OUTPUT_DIR: logs/market1501/agw_S50

View File

@ -1,12 +1,12 @@
_BASE_: "../Base-bagtricks.yml"
_BASE_: ../Base-bagtricks.yml
MODEL:
BACKBONE:
DEPTH: "101x"
DEPTH: 101x
WITH_IBN: True
DATASETS:
NAMES: ("Market1501",)
TESTS: ("Market1501",)
OUTPUT_DIR: "logs/market1501/bagtricks_R101-ibn"
OUTPUT_DIR: logs/market1501/bagtricks_R101-ibn

View File

@ -1,4 +1,4 @@
_BASE_: "../Base-bagtricks.yml"
_BASE_: ../Base-bagtricks.yml
MODEL:
BACKBONE:
@ -8,4 +8,4 @@ DATASETS:
NAMES: ("Market1501",)
TESTS: ("Market1501",)
OUTPUT_DIR: "logs/market1501/bagtricks_R50-ibn"
OUTPUT_DIR: logs/market1501/bagtricks_R50-ibn

View File

@ -1,7 +1,7 @@
_BASE_: "../Base-bagtricks.yml"
_BASE_: ../Base-bagtricks.yml
DATASETS:
NAMES: ("Market1501",)
TESTS: ("Market1501",)
OUTPUT_DIR: "logs/market1501/bagtricks_R50"
OUTPUT_DIR: logs/market1501/bagtricks_R50

View File

@ -1,11 +1,11 @@
_BASE_: "../Base-bagtricks.yml"
_BASE_: ../Base-bagtricks.yml
MODEL:
BACKBONE:
NAME: "build_resnest_backbone"
NAME: build_resnest_backbone
DATASETS:
NAMES: ("Market1501",)
TESTS: ("Market1501",)
OUTPUT_DIR: "logs/market1501/bagtricks_S50"
OUTPUT_DIR: logs/market1501/bagtricks_S50

View File

@ -1,4 +1,4 @@
_BASE_: "../Base-MGN.yml"
_BASE_: ../Base-MGN.yml
MODEL:
BACKBONE:
@ -8,4 +8,4 @@ DATASETS:
NAMES: ("Market1501",)
TESTS: ("Market1501",)
OUTPUT_DIR: "logs/market1501/mgn_R50-ibn"
OUTPUT_DIR: logs/market1501/mgn_R50-ibn

View File

@ -1,12 +1,12 @@
_BASE_: "../Base-SBS.yml"
_BASE_: ../Base-SBS.yml
MODEL:
BACKBONE:
DEPTH: "101x"
DEPTH: 101x
WITH_IBN: True
DATASETS:
NAMES: ("Market1501",)
TESTS: ("Market1501",)
OUTPUT_DIR: "logs/market1501/sbs_R101-ibn"
OUTPUT_DIR: logs/market1501/sbs_R101-ibn

View File

@ -1,4 +1,4 @@
_BASE_: "../Base-SBS.yml"
_BASE_: ../Base-SBS.yml
MODEL:
BACKBONE:
@ -8,4 +8,4 @@ DATASETS:
NAMES: ("Market1501",)
TESTS: ("Market1501",)
OUTPUT_DIR: "logs/market1501/sbs_R50-ibn"
OUTPUT_DIR: logs/market1501/sbs_R50-ibn

View File

@ -1,7 +1,7 @@
_BASE_: "../Base-SBS.yml"
_BASE_: ../Base-SBS.yml
DATASETS:
NAMES: ("Market1501",)
TESTS: ("Market1501",)
OUTPUT_DIR: "logs/market1501/sbs_R50"
OUTPUT_DIR: logs/market1501/sbs_R50

View File

@ -1,11 +1,11 @@
_BASE_: "../Base-SBS.yml"
_BASE_: ../Base-SBS.yml
MODEL:
BACKBONE:
NAME: "build_resnest_backbone"
NAME: build_resnest_backbone
DATASETS:
NAMES: ("Market1501",)
TESTS: ("Market1501",)
OUTPUT_DIR: "logs/market1501/sbs_S50"
OUTPUT_DIR: logs/market1501/sbs_S50

View File

@ -1,4 +1,4 @@
_BASE_: "../Base-bagtricks.yml"
_BASE_: ../Base-bagtricks.yml
INPUT:
SIZE_TRAIN: [256, 256]
@ -22,7 +22,7 @@ SOLVER:
IMS_PER_BATCH: 128
MAX_ITER: 60
STEPS: [30, 50]
WARMUP_ITERS: 10
WARMUP_EPOCHS: 10
CHECKPOINT_PERIOD: 20
@ -30,4 +30,4 @@ TEST:
EVAL_PERIOD: 20
IMS_PER_BATCH: 128
OUTPUT_DIR: "logs/veriwild/bagtricks_R50-ibn_4gpu"
OUTPUT_DIR: logs/veriwild/bagtricks_R50-ibn_4gpu

View File

@ -1,4 +1,4 @@
_BASE_: "../Base-SBS.yml"
_BASE_: ../Base-SBS.yml
INPUT:
SIZE_TRAIN: [256, 256]
@ -9,14 +9,14 @@ MODEL:
WITH_IBN: True
SOLVER:
OPT: "SGD"
OPT: SGD
BASE_LR: 0.01
ETA_MIN_LR: 7.7e-5
IMS_PER_BATCH: 64
MAX_ITER: 60
DELAY_ITERS: 30
WARMUP_ITERS: 10
WARMUP_EPOCHS: 10
FREEZE_ITERS: 10
CHECKPOINT_PERIOD: 20
@ -29,4 +29,4 @@ TEST:
EVAL_PERIOD: 20
IMS_PER_BATCH: 128
OUTPUT_DIR: "logs/veri/sbs_R50-ibn"
OUTPUT_DIR: logs/veri/sbs_R50-ibn

View File

@ -1,4 +1,4 @@
_BASE_: "../Base-bagtricks.yml"
_BASE_: ../Base-bagtricks.yml
INPUT:
SIZE_TRAIN: [256, 256]
@ -24,7 +24,7 @@ SOLVER:
IMS_PER_BATCH: 512
MAX_ITER: 60
STEPS: [30, 50]
WARMUP_ITERS: 10
WARMUP_EPOCHS: 10
CHECKPOINT_PERIOD: 20
@ -32,4 +32,4 @@ TEST:
EVAL_PERIOD: 20
IMS_PER_BATCH: 128
OUTPUT_DIR: "logs/vehicleid/bagtricks_R50-ibn_4gpu"
OUTPUT_DIR: logs/vehicleid/bagtricks_R50-ibn_4gpu

View File

@ -25,6 +25,9 @@ _C.MODEL.META_ARCHITECTURE = "Baseline"
_C.MODEL.FREEZE_LAYERS = ['']
# MoCo memory size
_C.MODEL.QUEUE_SIZE = 8192
# ---------------------------------------------------------------------------- #
# Backbone options
# ---------------------------------------------------------------------------- #
@ -120,6 +123,13 @@ _C.MODEL.PIXEL_MEAN = [0.485*255, 0.456*255, 0.406*255]
# Values to be used for image normalization
_C.MODEL.PIXEL_STD = [0.229*255, 0.224*255, 0.225*255]
# -----------------------------------------------------------------------------
# KNOWLEDGE DISTILLATION
# -----------------------------------------------------------------------------
_C.KD = CN()
_C.KD.MODEL_CONFIG = ""
_C.KD.MODEL_WEIGHTS = ""
# -----------------------------------------------------------------------------
# INPUT
@ -148,6 +158,9 @@ _C.INPUT.CJ.CONTRAST = 0.15
_C.INPUT.CJ.SATURATION = 0.1
_C.INPUT.CJ.HUE = 0.1
# Random Affine
_C.INPUT.DO_AFFINE = False
# Auto augmentation
_C.INPUT.DO_AUTOAUG = False
_C.INPUT.AUTOAUG_PROB = 0.0
@ -160,7 +173,7 @@ _C.INPUT.AUGMIX_PROB = 0.0
_C.INPUT.REA = CN()
_C.INPUT.REA.ENABLED = False
_C.INPUT.REA.PROB = 0.5
_C.INPUT.REA.VALUE = [0.596*255, 0.558*255, 0.497*255]
_C.INPUT.REA.VALUE = [0.485*255, 0.456*255, 0.406*255]
# Random Patch
_C.INPUT.RPT = CN()
_C.INPUT.RPT.ENABLED = False
@ -207,6 +220,7 @@ _C.SOLVER.BIAS_LR_FACTOR = 1.
_C.SOLVER.HEADS_LR_FACTOR = 1.
_C.SOLVER.MOMENTUM = 0.9
_C.SOLVER.NESTEROV = True
_C.SOLVER.WEIGHT_DECAY = 0.0005
_C.SOLVER.WEIGHT_DECAY_BIAS = 0.
@ -224,7 +238,7 @@ _C.SOLVER.ETA_MIN_LR = 1e-7
# Warmup options
_C.SOLVER.WARMUP_FACTOR = 0.1
_C.SOLVER.WARMUP_ITERS = 10
_C.SOLVER.WARMUP_EPOCHS = 10
_C.SOLVER.WARMUP_METHOD = "linear"
# Backbone freeze iters

View File

@ -59,7 +59,7 @@ def build_reid_train_loader(cfg, mapper=None, **kwargs):
return train_loader
def build_reid_test_loader(cfg, dataset_name, **kwargs):
def build_reid_test_loader(cfg, dataset_name, mapper=None, **kwargs):
cfg = cfg.clone()
dataset = DATASET_REGISTRY.get(dataset_name)(root=_root, **kwargs)
@ -67,8 +67,12 @@ def build_reid_test_loader(cfg, dataset_name, **kwargs):
dataset.show_test()
test_items = dataset.query + dataset.gallery
test_transforms = build_transforms(cfg, is_train=False)
test_set = CommDataset(test_items, test_transforms, relabel=False)
if mapper is not None:
transforms = mapper
else:
transforms = build_transforms(cfg, is_train=False)
test_set = CommDataset(test_items, transforms, relabel=False)
mini_batch_size = cfg.TEST.IMS_PER_BATCH // comm.get_world_size()
data_sampler = samplers.InferenceSampler(len(test_set))

View File

@ -4,7 +4,6 @@
@contact: sherlockliao01@gmail.com
"""
from .autoaugment import *
from .build import build_transforms
from .transforms import *
from .autoaugment import *

View File

@ -41,6 +41,9 @@ def build_transforms(cfg, is_train=True):
cj_saturation = cfg.INPUT.CJ.SATURATION
cj_hue = cfg.INPUT.CJ.HUE
# random affine
do_affine = cfg.INPUT.DO_AFFINE
# random erasing
do_rea = cfg.INPUT.REA.ENABLED
rea_prob = cfg.INPUT.REA.PROB
@ -60,9 +63,11 @@ def build_transforms(cfg, is_train=True):
res.extend([T.Pad(padding, padding_mode=padding_mode), T.RandomCrop(size_train)])
if do_cj:
res.append(T.RandomApply([T.ColorJitter(cj_brightness, cj_contrast, cj_saturation, cj_hue)], p=cj_prob))
if do_affine:
res.append(T.RandomAffine(degrees=0, translate=None, scale=[0.9, 1.1], shear=None, resample=False,
fillcolor=128))
if do_augmix:
res.append(T.RandomApply([AugMix()], p=augmix_prob))
res.append(AugMix(prob=augmix_prob))
res.append(ToTensor())
if do_rea:
res.append(T.RandomErasing(p=rea_prob, value=rea_value))

View File

@ -114,38 +114,38 @@ def solarize(pil_img, level, *args):
return ImageOps.solarize(pil_img, 256 - level)
def shear_x(pil_img, level, image_size):
def shear_x(pil_img, level):
level = float_parameter(sample_level(level), 0.3)
if np.random.uniform() > 0.5:
level = -level
return pil_img.transform(image_size,
return pil_img.transform(pil_img.size,
Image.AFFINE, (1, level, 0, 0, 1, 0),
resample=Image.BILINEAR)
def shear_y(pil_img, level, image_size):
def shear_y(pil_img, level):
level = float_parameter(sample_level(level), 0.3)
if np.random.uniform() > 0.5:
level = -level
return pil_img.transform(image_size,
return pil_img.transform(pil_img.size,
Image.AFFINE, (1, 0, 0, level, 1, 0),
resample=Image.BILINEAR)
def translate_x(pil_img, level, image_size):
level = int_parameter(sample_level(level), image_size[0] / 3)
def translate_x(pil_img, level):
level = int_parameter(sample_level(level), pil_img.size[0] / 3)
if np.random.random() > 0.5:
level = -level
return pil_img.transform(image_size,
return pil_img.transform(pil_img.size,
Image.AFFINE, (1, 0, level, 0, 1, 0),
resample=Image.BILINEAR)
def translate_y(pil_img, level, image_size):
level = int_parameter(sample_level(level), image_size[1] / 3)
def translate_y(pil_img, level):
level = int_parameter(sample_level(level), pil_img.size[1] / 3)
if np.random.random() > 0.5:
level = -level
return pil_img.transform(image_size,
return pil_img.transform(pil_img.size,
Image.AFFINE, (1, 0, 0, 0, 1, level),
resample=Image.BILINEAR)
@ -174,17 +174,7 @@ def sharpness(pil_img, level, *args):
return ImageEnhance.Sharpness(pil_img).enhance(level)
augmentations_reid = [
autocontrast, equalize, posterize, shear_x, shear_y,
color, contrast, brightness, sharpness
]
augmentations = [
autocontrast, equalize, posterize, rotate, solarize, shear_x, shear_y,
translate_x, translate_y
]
augmentations_all = [
autocontrast, equalize, posterize, rotate, solarize, shear_x, shear_y,
translate_x, translate_y, color, contrast, brightness, sharpness
]

View File

@ -13,7 +13,7 @@ from collections import deque
import numpy as np
from PIL import Image
from .functional import to_tensor, augmentations_reid
from .functional import to_tensor, augmentations
class ToTensor(object):
@ -122,38 +122,45 @@ class RandomPatch(object):
class AugMix(object):
""" Perform AugMix augmentation and compute mixture.
Args:
prob: Probability of taking augmix
aug_prob_coeff: Probability distribution coefficients.
mixture_width: Number of augmentation chains to mix per augmented example.
mixture_depth: Depth of augmentation chains. -1 denotes stochastic depth in [1, 3]'
severity: Severity of underlying augmentation operators (between 1 to 10).
aug_severity: Severity of underlying augmentation operators (between 1 to 10).
"""
def __init__(self, aug_prob_coeff=1, mixture_width=3, mixture_depth=-1, severity=1):
def __init__(self, prob=0.5, aug_prob_coeff=0.1, mixture_width=3, mixture_depth=1, aug_severity=1):
self.prob = prob
self.aug_prob_coeff = aug_prob_coeff
self.mixture_width = mixture_width
self.mixture_depth = mixture_depth
self.severity = severity
self.aug_list = augmentations_reid
self.aug_severity = aug_severity
self.augmentations = augmentations
def __call__(self, image):
"""Perform AugMix augmentations and compute mixture.
Returns:
mixed: Augmented and mixed image.
"""
if random.random() > self.prob:
return np.asarray(image)
ws = np.float32(
np.random.dirichlet([self.aug_prob_coeff] * self.mixture_width))
m = np.float32(np.random.beta(self.aug_prob_coeff, self.aug_prob_coeff))
image = np.asarray(image, dtype=np.float32).copy()
mix = np.zeros_like(image)
h, w = image.shape[0], image.shape[1]
# image = np.asarray(image, dtype=np.float32).copy()
# mix = np.zeros_like(image)
mix = np.zeros([image.size[1], image.size[0], 3])
# h, w = image.shape[0], image.shape[1]
for i in range(self.mixture_width):
image_aug = Image.fromarray(image.copy().astype(np.uint8))
image_aug = image.copy()
# image_aug = Image.fromarray(image.copy().astype(np.uint8))
depth = self.mixture_depth if self.mixture_depth > 0 else np.random.randint(1, 4)
for _ in range(depth):
op = np.random.choice(self.aug_list)
image_aug = op(image_aug, self.severity, (w, h))
mix += ws[i] * np.asarray(image_aug, dtype=np.float32)
op = np.random.choice(self.augmentations)
image_aug = op(image_aug, self.aug_severity)
mix += ws[i] * np.asarray(image_aug)
mixed = (1 - m) * image + m * mix
return mixed
return mixed.astype(np.uint8)

View File

@ -233,8 +233,7 @@ class DefaultTrainer(TrainerBase):
model, data_loader, optimizer
)
self.iters_per_epoch = len(data_loader.dataset) // cfg.SOLVER.IMS_PER_BATCH
self.scheduler = self.build_lr_scheduler(cfg, optimizer, self.iters_per_epoch)
self.scheduler = self.build_lr_scheduler(cfg, optimizer)
# Assume no other objects need to be checkpointed.
# We can later make it checkpoint the stateful hooks
@ -246,16 +245,13 @@ class DefaultTrainer(TrainerBase):
**optimizer_ckpt,
**self.scheduler,
)
self.iters_per_epoch = len(data_loader.dataset) // cfg.SOLVER.IMS_PER_BATCH
self.start_epoch = 0
# if cfg.SOLVER.SWA.ENABLED:
# self.max_iter = cfg.SOLVER.MAX_ITER + cfg.SOLVER.SWA.ITER
# else:
# self.max_iter = cfg.SOLVER.MAX_ITER
self.max_epoch = cfg.SOLVER.MAX_EPOCH
self.max_iter = self.max_epoch * self.iters_per_epoch
self.warmup_iters = cfg.SOLVER.WARMUP_ITERS
self.warmup_epochs = cfg.SOLVER.WARMUP_EPOCHS
self.delay_epochs = cfg.SOLVER.DELAY_EPOCHS
self.cfg = cfg
@ -413,15 +409,11 @@ class DefaultTrainer(TrainerBase):
return build_optimizer(cfg, model)
@classmethod
def build_lr_scheduler(cls, cfg, optimizer, iters_per_epoch):
def build_lr_scheduler(cls, cfg, optimizer):
"""
It now calls :func:`fastreid.solver.build_lr_scheduler`.
Overwrite it if you'd like a different scheduler.
"""
cfg = cfg.clone()
cfg.defrost()
cfg.SOLVER.MAX_EPOCH = cfg.SOLVER.MAX_EPOCH - max(
math.ceil(cfg.SOLVER.WARMUP_ITERS / iters_per_epoch), cfg.SOLVER.DELAY_EPOCHS)
return build_lr_scheduler(cfg, optimizer)
@classmethod
@ -429,7 +421,7 @@ class DefaultTrainer(TrainerBase):
"""
Returns:
iterable
It now calls :func:`fastreid.data.build_detection_train_loader`.
It now calls :func:`fastreid.data.build_reid_train_loader`.
Overwrite it if you'd like a different data loader.
"""
logger = logging.getLogger(__name__)
@ -441,7 +433,7 @@ class DefaultTrainer(TrainerBase):
"""
Returns:
iterable
It now calls :func:`fastreid.data.build_detection_test_loader`.
It now calls :func:`fastreid.data.build_reid_test_loader`.
Overwrite it if you'd like a different data loader.
"""
return build_reid_test_loader(cfg, dataset_name)

View File

@ -250,14 +250,11 @@ class LRScheduler(HookBase):
lr = self._optimizer.param_groups[self._best_param_group_id]["lr"]
self.trainer.storage.put_scalar("lr", lr, smoothing_hint=False)
next_iter = self.trainer.iter + 1
if next_iter < self.trainer.warmup_iters:
self._scheduler["warmup_sched"].step()
def after_epoch(self):
next_iter = self.trainer.iter
next_epoch = self.trainer.epoch + 1
if next_iter >= self.trainer.warmup_iters and next_epoch >= self.trainer.delay_epochs:
if next_epoch <= self.trainer.warmup_epochs:
self._scheduler["warmup_sched"].step()
elif next_epoch >= self.trainer.delay_epochs:
self._scheduler["lr_sched"].step()
@ -459,7 +456,6 @@ class LayerFreeze(HookBase):
self.fc_freeze_iters = fc_freeze_iters
self.is_frozen = False
self.fc_frozen = False
def before_step(self):

View File

@ -236,14 +236,7 @@ class SimpleTrainer(TrainerBase):
If your want to do something with the heads, you can wrap the model.
"""
outs = self.model(data)
# Compute loss
if isinstance(self.model, DistributedDataParallel):
loss_dict = self.model.module.losses(outs)
else:
loss_dict = self.model.losses(outs)
loss_dict = self.model(data)
losses = sum(loss_dict.values())
"""
@ -251,6 +244,7 @@ class SimpleTrainer(TrainerBase):
wrap the optimizer with your custom `zero_grad()` method.
"""
self.optimizer.zero_grad()
losses.backward()
self._write_metrics(loss_dict, data_time)
@ -308,6 +302,7 @@ class AMPTrainer(SimpleTrainer):
Like :class:`SimpleTrainer`, but uses apex automatic mixed precision
in the training loop.
"""
def run_step(self):
"""
Implement the AMP training logic.
@ -319,14 +314,7 @@ class AMPTrainer(SimpleTrainer):
data = next(self._data_loader_iter)
data_time = time.perf_counter() - start
outs = self.model(data)
# Compute loss
if isinstance(self.model, DistributedDataParallel):
loss_dict = self.model.module.losses(outs)
else:
loss_dict = self.model.losses(outs)
loss_dict = self.model(data)
losses = sum(loss_dict.values())
self.optimizer.zero_grad()

View File

@ -6,19 +6,18 @@
import copy
import logging
from collections import OrderedDict
from sklearn import metrics
import numpy as np
import torch
import torch.nn.functional as F
from sklearn import metrics
from fastreid.utils import comm
from fastreid.utils.compute_dist import build_dist
from .evaluator import DatasetEvaluator
from .query_expansion import aqe
from .rank import evaluate_rank
from .rerank import re_ranking
from .roc import evaluate_roc
from fastreid.utils import comm
from fastreid.utils.compute_dist import build_dist
logger = logging.getLogger(__name__)
@ -103,10 +102,10 @@ class ReidEvaluator(DatasetEvaluator):
mAP = np.mean(all_AP)
mINP = np.mean(all_INP)
for r in [1, 5, 10]:
self._results['Rank-{}'.format(r)] = cmc[r - 1]
self._results['mAP'] = mAP
self._results['mINP'] = mINP
self._results["metric"] = (mAP + cmc[0]) / 2
self._results['Rank-{}'.format(r)] = cmc[r - 1] * 100
self._results['mAP'] = mAP * 100
self._results['mINP'] = mINP * 100
self._results["metric"] = (mAP + cmc[0]) / 2 * 100
if self.cfg.TEST.ROC_ENABLED:
scores, labels = evaluate_roc(dist, query_pids, gallery_pids, query_camids, gallery_camids)

View File

@ -30,7 +30,7 @@ def print_csv_format(results):
table = tabulate(
csv_results,
tablefmt="pipe",
floatfmt=".2%",
floatfmt=".2f",
headers=metrics,
numalign="left",
)

View File

@ -20,6 +20,8 @@ class ArcSoftmax(nn.Module):
self.s = cfg.MODEL.HEADS.SCALE
self.m = cfg.MODEL.HEADS.MARGIN
self.easy_margin = False
self.cos_m = math.cos(self.m)
self.sin_m = math.sin(self.m)
self.threshold = math.cos(math.pi - self.m)
@ -30,26 +32,18 @@ class ArcSoftmax(nn.Module):
self.register_buffer('t', torch.zeros(1))
def forward(self, features, targets):
# get cos(theta)
cos_theta = F.linear(F.normalize(features), F.normalize(self.weight))
cos_theta = cos_theta.clamp(-1, 1) # for numerical stability
target_logit = cos_theta[torch.arange(0, features.size(0)), targets].view(-1, 1)
sin_theta = torch.sqrt(1.0 - torch.pow(target_logit, 2))
cos_theta_m = target_logit * self.cos_m - sin_theta * self.sin_m # cos(target+margin)
mask = cos_theta > cos_theta_m
final_target_logit = torch.where(target_logit > self.threshold,
cos_theta_m.to(target_logit),
target_logit - self.mm)
hard_example = cos_theta[mask]
with torch.no_grad():
self.t = target_logit.mean() * 0.01 + (1 - 0.01) * self.t
cos_theta[mask] = hard_example * (self.t + hard_example).to(hard_example.dtype)
cos_theta.scatter_(1, targets.view(-1, 1).long(), final_target_logit)
pred_class_logits = cos_theta * self.s
return pred_class_logits
cosine = F.linear(F.normalize(features), F.normalize(self.weight))
sine = torch.sqrt(1.0 - torch.pow(cosine, 2))
phi = cosine * self.cos_m - sine * self.sin_m # cos(theta + m)
if self.easy_margin:
phi = torch.where(cosine > 0, phi, cosine)
else:
phi = torch.where(cosine > self.threshold, phi, cosine - self.mm)
one_hot = torch.zeros(cosine.size(), device=cosine.device)
one_hot.scatter_(1, targets.view(-1, 1).long(), 1)
output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
output *= self.s
return output
def extra_repr(self):
return 'in_features={}, num_classes={}, scale={}, margin={}'.format(

View File

@ -80,7 +80,7 @@ class GhostBatchNorm(BatchNorm):
self.weight, self.bias, False, self.momentum, self.eps)
class FrozenBatchNorm(BatchNorm):
class FrozenBatchNorm(nn.Module):
"""
BatchNorm2d where the batch statistics and the affine parameters are fixed.
It contains non-trainable buffers called
@ -99,9 +99,13 @@ class FrozenBatchNorm(BatchNorm):
_version = 3
def __init__(self, num_features, eps=1e-5, **kwargs):
super().__init__(num_features, weight_freeze=True, bias_freeze=True, **kwargs)
super().__init__()
self.num_features = num_features
self.eps = eps
self.register_buffer("weight", torch.ones(num_features))
self.register_buffer("bias", torch.zeros(num_features))
self.register_buffer("running_mean", torch.zeros(num_features))
self.register_buffer("running_var", torch.ones(num_features) - eps)
def forward(self, x):
if x.requires_grad:
@ -198,9 +202,9 @@ def get_norm(norm, out_channels, **kwargs):
return None
norm = {
"BN": BatchNorm,
"syncBN": SyncBatchNorm,
"GhostBN": GhostBatchNorm,
"FrozenBN": FrozenBatchNorm,
"GN": lambda channels, **args: nn.GroupNorm(32, channels),
"syncBN": SyncBatchNorm,
}[norm]
return norm(out_channels, **kwargs)

View File

@ -11,3 +11,4 @@ from .osnet import build_osnet_backbone
from .resnest import build_resnest_backbone
from .resnext import build_resnext_backbone
from .regnet import build_regnet_backbone, build_effnet_backbone
from .shufflenet import build_shufflenetv2_backbone

View File

@ -183,6 +183,7 @@ class ResNet(nn.Module):
x = self.relu(x)
x = self.maxpool(x)
# layer 1
NL1_counter = 0
if len(self.NL_1_idx) == 0:
self.NL_1_idx = [-1]
@ -192,7 +193,7 @@ class ResNet(nn.Module):
_, C, H, W = x.shape
x = self.NL_1[NL1_counter](x)
NL1_counter += 1
# Layer 2
# layer 2
NL2_counter = 0
if len(self.NL_2_idx) == 0:
self.NL_2_idx = [-1]
@ -202,7 +203,8 @@ class ResNet(nn.Module):
_, C, H, W = x.shape
x = self.NL_2[NL2_counter](x)
NL2_counter += 1
# Layer 3
# layer 3
NL3_counter = 0
if len(self.NL_3_idx) == 0:
self.NL_3_idx = [-1]
@ -212,7 +214,8 @@ class ResNet(nn.Module):
_, C, H, W = x.shape
x = self.NL_3[NL3_counter](x)
NL3_counter += 1
# Layer 4
# layer 4
NL4_counter = 0
if len(self.NL_4_idx) == 0:
self.NL_4_idx = [-1]

View File

@ -0,0 +1,203 @@
"""
Author: Guan'an Wang
Contact: guan.wang0706@gmail.com
"""
import torch
from torch import nn
from collections import OrderedDict
import logging
from fastreid.utils.checkpoint import get_missing_parameters_message, get_unexpected_parameters_message
from fastreid.layers import get_norm
from fastreid.modeling.backbones import BACKBONE_REGISTRY
logger = logging.getLogger(__name__)
class ShuffleV2Block(nn.Module):
"""
Reference:
https://github.com/megvii-model/ShuffleNet-Series/tree/master/ShuffleNetV2
"""
def __init__(self, bn_norm, inp, oup, mid_channels, *, ksize, stride):
super(ShuffleV2Block, self).__init__()
self.stride = stride
assert stride in [1, 2]
self.mid_channels = mid_channels
self.ksize = ksize
pad = ksize // 2
self.pad = pad
self.inp = inp
outputs = oup - inp
branch_main = [
# pw
nn.Conv2d(inp, mid_channels, 1, 1, 0, bias=False),
get_norm(bn_norm, mid_channels),
nn.ReLU(inplace=True),
# dw
nn.Conv2d(mid_channels, mid_channels, ksize, stride, pad, groups=mid_channels, bias=False),
get_norm(bn_norm, mid_channels),
# pw-linear
nn.Conv2d(mid_channels, outputs, 1, 1, 0, bias=False),
get_norm(bn_norm, outputs),
nn.ReLU(inplace=True),
]
self.branch_main = nn.Sequential(*branch_main)
if stride == 2:
branch_proj = [
# dw
nn.Conv2d(inp, inp, ksize, stride, pad, groups=inp, bias=False),
get_norm(bn_norm, inp),
# pw-linear
nn.Conv2d(inp, inp, 1, 1, 0, bias=False),
get_norm(bn_norm, inp),
nn.ReLU(inplace=True),
]
self.branch_proj = nn.Sequential(*branch_proj)
else:
self.branch_proj = None
def forward(self, old_x):
if self.stride == 1:
x_proj, x = self.channel_shuffle(old_x)
return torch.cat((x_proj, self.branch_main(x)), 1)
elif self.stride == 2:
x_proj = old_x
x = old_x
return torch.cat((self.branch_proj(x_proj), self.branch_main(x)), 1)
def channel_shuffle(self, x):
batchsize, num_channels, height, width = x.data.size()
assert (num_channels % 4 == 0)
x = x.reshape(batchsize * num_channels // 2, 2, height * width)
x = x.permute(1, 0, 2)
x = x.reshape(2, -1, num_channels // 2, height, width)
return x[0], x[1]
class ShuffleNetV2(nn.Module):
"""
Reference:
https://github.com/megvii-model/ShuffleNet-Series/tree/master/ShuffleNetV2
"""
def __init__(self, bn_norm, model_size='1.5x'):
super(ShuffleNetV2, self).__init__()
self.stage_repeats = [4, 8, 4]
self.model_size = model_size
if model_size == '0.5x':
self.stage_out_channels = [-1, 24, 48, 96, 192, 1024]
elif model_size == '1.0x':
self.stage_out_channels = [-1, 24, 116, 232, 464, 1024]
elif model_size == '1.5x':
self.stage_out_channels = [-1, 24, 176, 352, 704, 1024]
elif model_size == '2.0x':
self.stage_out_channels = [-1, 24, 244, 488, 976, 2048]
else:
raise NotImplementedError
# building first layer
input_channel = self.stage_out_channels[1]
self.first_conv = nn.Sequential(
nn.Conv2d(3, input_channel, 3, 2, 1, bias=False),
get_norm(bn_norm, input_channel),
nn.ReLU(inplace=True),
)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.features = []
for idxstage in range(len(self.stage_repeats)):
numrepeat = self.stage_repeats[idxstage]
output_channel = self.stage_out_channels[idxstage + 2]
for i in range(numrepeat):
if i == 0:
self.features.append(ShuffleV2Block(bn_norm, input_channel, output_channel,
mid_channels=output_channel // 2, ksize=3, stride=2))
else:
self.features.append(ShuffleV2Block(bn_norm, input_channel // 2, output_channel,
mid_channels=output_channel // 2, ksize=3, stride=1))
input_channel = output_channel
self.features = nn.Sequential(*self.features)
self.conv_last = nn.Sequential(
nn.Conv2d(input_channel, self.stage_out_channels[-1], 1, 1, 0, bias=False),
get_norm(bn_norm, self.stage_out_channels[-1]),
nn.ReLU(inplace=True)
)
self._initialize_weights()
def forward(self, x):
x = self.first_conv(x)
x = self.maxpool(x)
x = self.features(x)
x = self.conv_last(x)
return x
def _initialize_weights(self):
for name, m in self.named_modules():
if isinstance(m, nn.Conv2d):
if 'first' in name:
nn.init.normal_(m.weight, 0, 0.01)
else:
nn.init.normal_(m.weight, 0, 1.0 / m.weight.shape[1])
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
if m.bias is not None:
nn.init.constant_(m.bias, 0.0001)
nn.init.constant_(m.running_mean, 0)
elif isinstance(m, nn.BatchNorm1d):
nn.init.constant_(m.weight, 1)
if m.bias is not None:
nn.init.constant_(m.bias, 0.0001)
nn.init.constant_(m.running_mean, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
@BACKBONE_REGISTRY.register()
def build_shufflenetv2_backbone(cfg):
# fmt: off
pretrain = cfg.MODEL.BACKBONE.PRETRAIN
pretrain_path = cfg.MODEL.BACKBONE.PRETRAIN_PATH
bn_norm = cfg.MODEL.BACKBONE.NORM
model_size = cfg.MODEL.BACKBONE.DEPTH
# fmt: on
model = ShuffleNetV2(bn_norm, model_size=model_size)
if pretrain:
new_state_dict = OrderedDict()
state_dict = torch.load(pretrain_path)["state_dict"]
for k, v in state_dict.items():
if k[:7] == 'module.':
k = k[7:]
new_state_dict[k] = v
incompatible = model.load_state_dict(new_state_dict, strict=False)
if incompatible.missing_keys:
logger.info(
get_missing_parameters_message(incompatible.missing_keys)
)
if incompatible.unexpected_keys:
logger.info(
get_unexpected_parameters_message(incompatible.unexpected_keys)
)
return model

View File

@ -8,4 +8,3 @@ from .build import REID_HEADS_REGISTRY, build_heads
# import all the meta_arch, so they will be registered
from .embedding_head import EmbeddingHead
from .attr_head import AttrHead

View File

@ -16,9 +16,9 @@ The call is expected to return an :class:`ROIHeads`.
"""
def build_heads(cfg):
def build_heads(cfg, **kwargs):
"""
Build REIDHeads defined by `cfg.MODEL.REID_HEADS.NAME`.
"""
head = cfg.MODEL.HEADS.NAME
return REID_HEADS_REGISTRY.get(head)(cfg)
return REID_HEADS_REGISTRY.get(head)(cfg, **kwargs)

View File

@ -50,7 +50,7 @@ class EmbeddingHead(nn.Module):
self.bottleneck = nn.Sequential(*bottleneck)
# identity classification layer
# classification layer
# fmt: off
if cls_type == 'linear': self.classifier = nn.Linear(feat_dim, num_classes, bias=False)
elif cls_type == 'arcSoftmax': self.classifier = ArcSoftmax(cfg, feat_dim, num_classes)

View File

@ -10,3 +10,5 @@ from .build import META_ARCH_REGISTRY, build_model
# import all the meta_arch, so they will be registered
from .baseline import Baseline
from .mgn import MGN
from .moco import MoCo
from .distiller import Distiller

View File

@ -46,10 +46,8 @@ class Baseline(nn.Module):
if targets.sum() < 0: targets.zero_()
outputs = self.heads(features, targets)
return {
"outputs": outputs,
"targets": targets,
}
losses = self.losses(outputs, targets)
return losses
else:
outputs = self.heads(features)
return outputs
@ -68,15 +66,13 @@ class Baseline(nn.Module):
images.sub_(self.pixel_mean).div_(self.pixel_std)
return images
def losses(self, outs):
def losses(self, outputs, gt_labels):
r"""
Compute loss from modeling's outputs, the loss function input arguments
must be the same as the outputs of the model forwarding.
"""
# fmt: off
outputs = outs["outputs"]
gt_labels = outs["targets"]
# model predictions
# fmt: off
pred_class_logits = outputs['pred_class_logits'].detach()
cls_outputs = outputs['cls_outputs']
pred_features = outputs['features']

View File

@ -15,12 +15,12 @@ and expected to return a `nn.Module` object.
"""
def build_model(cfg):
def build_model(cfg, **kwargs):
"""
Build the whole model architecture, defined by ``cfg.MODEL.META_ARCHITECTURE``.
Note that it does not load any weights from ``cfg``.
"""
meta_arch = cfg.MODEL.META_ARCHITECTURE
model = META_ARCH_REGISTRY.get(meta_arch)(cfg)
model = META_ARCH_REGISTRY.get(meta_arch)(cfg, **kwargs)
model.to(torch.device(cfg.MODEL.DEVICE))
return model

View File

@ -0,0 +1,88 @@
# encoding: utf-8
"""
@author: l1aoxingyu
@contact: sherlockliao01@gmail.com
"""
import logging
import torch
import torch.nn.functional as F
from fastreid.config import get_cfg
from fastreid.modeling.meta_arch import META_ARCH_REGISTRY, build_model, Baseline
from fastreid.utils.checkpoint import Checkpointer
logger = logging.getLogger(__name__)
@META_ARCH_REGISTRY.register()
class Distiller(Baseline):
def __init__(self, cfg):
super(Distiller, self).__init__(cfg)
# Get teacher model config
cfg_t = get_cfg()
cfg_t.merge_from_file(cfg.KD.MODEL_CONFIG)
model_t = build_model(cfg_t)
logger.info("Teacher model:\n{}".format(model_t))
# No gradients for teacher model
for param in model_t.parameters():
param.requires_grad_(False)
logger.info("Loading teacher model weights ...")
Checkpointer(model_t).load(cfg.KD.MODEL_WEIGHTS)
# Not register teacher model as `nn.Module`, this is
# make sure teacher model weights not saved
self.model_t = [model_t.backbone, model_t.heads]
def forward(self, batched_inputs):
if self.training:
images = self.preprocess_image(batched_inputs)
# student model forward
s_feat = self.backbone(images)
assert "targets" in batched_inputs, "Labels are missing in training!"
targets = batched_inputs["targets"].to(self.device)
if targets.sum() < 0: targets.zero_()
s_outputs = self.heads(s_feat, targets)
# teacher model forward
with torch.no_grad():
t_feat = self.model_t[0](images)
t_outputs = self.model_t[1](t_feat, targets)
losses = self.losses(s_outputs, t_outputs, targets)
return losses
# Eval mode, just conventional reid feature extraction
else:
return super(Distiller, self).forward(batched_inputs)
def losses(self, s_outputs, t_outputs, gt_labels):
r"""
Compute loss from modeling's outputs, the loss function input arguments
must be the same as the outputs of the model forwarding.
"""
loss_dict = super(Distiller, self).losses(s_outputs, gt_labels)
s_logits = s_outputs["pred_class_logits"]
t_logits = t_outputs["pred_class_logits"].detach()
loss_dict["loss_jsdiv"] = self.jsdiv_loss(s_logits, t_logits)
return loss_dict
@staticmethod
def _kldiv(y_s, y_t, t):
p_s = F.log_softmax(y_s / t, dim=1)
p_t = F.softmax(y_t / t, dim=1)
loss = F.kl_div(p_s, p_t, reduction="sum") * (t ** 2) / y_s.shape[0]
return loss
def jsdiv_loss(self, y_s, y_t, t=16):
loss = (self._kldiv(y_s, y_t, t) + self._kldiv(y_t, y_s, t)) / 2
return loss

View File

@ -111,17 +111,11 @@ class MGN(nn.Module):
b32_outputs = self.b32_head(b32_feat, targets)
b33_outputs = self.b33_head(b33_feat, targets)
return {
"b1_outputs": b1_outputs,
"b2_outputs": b2_outputs,
"b21_outputs": b21_outputs,
"b22_outputs": b22_outputs,
"b3_outputs": b3_outputs,
"b31_outputs": b31_outputs,
"b32_outputs": b32_outputs,
"b33_outputs": b33_outputs,
"targets": targets,
}
losses = self.losses(b1_outputs,
b2_outputs, b21_outputs, b22_outputs,
b3_outputs, b31_outputs, b32_outputs, b33_outputs,
targets)
return losses
else:
b1_pool_feat = self.b1_head(b1_feat)
b2_pool_feat = self.b2_head(b2_feat)
@ -150,18 +144,12 @@ class MGN(nn.Module):
images.sub_(self.pixel_mean).div_(self.pixel_std)
return images
def losses(self, outs):
# fmt: off
b1_outputs = outs["b1_outputs"]
b2_outputs = outs["b2_outputs"]
b21_outputs = outs["b21_outputs"]
b22_outputs = outs["b22_outputs"]
b3_outputs = outs["b3_outputs"]
b31_outputs = outs["b31_outputs"]
b32_outputs = outs["b32_outputs"]
b33_outputs = outs["b33_outputs"]
gt_labels = outs["targets"]
def losses(self,
b1_outputs,
b2_outputs, b21_outputs, b22_outputs,
b3_outputs, b31_outputs, b32_outputs, b33_outputs, gt_labels):
# model predictions
# fmt: off
pred_class_logits = b1_outputs['pred_class_logits'].detach()
b1_logits = b1_outputs['cls_outputs']
b2_logits = b2_outputs['cls_outputs']

View File

@ -0,0 +1,126 @@
# encoding: utf-8
"""
@author: xingyu liao
@contact: sherlockliao01@gmail.com
"""
import torch
import torch.nn.functional as F
from torch import nn
from fastreid.modeling.losses.utils import concat_all_gather
from fastreid.utils import comm
from .baseline import Baseline
from .build import META_ARCH_REGISTRY
@META_ARCH_REGISTRY.register()
class MoCo(Baseline):
def __init__(self, cfg):
super(MoCo, self).__init__(cfg)
dim = cfg.MODEL.HEADS.EMBEDDING_DIM if cfg.MODEL.HEADS.EMBEDDING_DIM \
else cfg.MODEL.BACKBONE.FEAT_DIM
size = cfg.MODEL.QUEUE_SIZE
self.memory = Memory(dim, size)
def losses(self, outputs, gt_labels):
"""
Compute loss from modeling's outputs, the loss function input arguments
must be the same as the outputs of the model forwarding.
"""
# reid loss
loss_dict = super(MoCo, self).losses(outputs, gt_labels)
# memory loss
pred_features = outputs['features']
loss_mb = self.memory(pred_features, gt_labels)
loss_dict["loss_mb"] = loss_mb
return loss_dict
class Memory(nn.Module):
"""
Build a MoCo memory with a queue
https://arxiv.org/abs/1911.05722
"""
def __init__(self, dim=512, K=65536):
"""
dim: feature dimension (default: 128)
K: queue size; number of negative keys (default: 65536)
"""
super().__init__()
self.K = K
self.margin = 0.25
self.gamma = 32
# create the queue
self.register_buffer("queue", torch.randn(dim, K))
self.queue = F.normalize(self.queue, dim=0)
self.register_buffer("queue_label", torch.zeros((1, K), dtype=torch.long))
self.register_buffer("queue_ptr", torch.zeros(1, dtype=torch.long))
@torch.no_grad()
def _dequeue_and_enqueue(self, keys, targets):
# gather keys/targets before updating queue
if comm.get_world_size() > 1:
keys = concat_all_gather(keys)
targets = concat_all_gather(targets)
else:
keys = keys.detach()
targets = targets.detach()
batch_size = keys.shape[0]
ptr = int(self.queue_ptr)
assert self.K % batch_size == 0 # for simplicity
# replace the keys at ptr (dequeue and enqueue)
self.queue[:, ptr:ptr + batch_size] = keys.T
self.queue_label[:, ptr:ptr + batch_size] = targets
ptr = (ptr + batch_size) % self.K # move pointer
self.queue_ptr[0] = ptr
def forward(self, feat_q, targets):
"""
Memory bank enqueue and compute metric loss
Args:
feat_q: model features
targets: gt labels
Returns:
"""
# normalize embedding features
feat_q = F.normalize(feat_q, p=2, dim=1)
# dequeue and enqueue
self._dequeue_and_enqueue(feat_q.detach(), targets)
# compute loss
loss = self._pairwise_cosface(feat_q, targets)
return loss
def _pairwise_cosface(self, feat_q, targets):
dist_mat = torch.matmul(feat_q, self.queue)
N, M = dist_mat.size() # (bsz, memory)
is_pos = targets.view(N, 1).expand(N, M).eq(self.queue_label.expand(N, M)).float()
is_neg = targets.view(N, 1).expand(N, M).ne(self.queue_label.expand(N, M)).float()
# Mask scores related to themselves
same_indx = torch.eye(N, N, device=is_pos.device)
other_indx = torch.zeros(N, M - N, device=is_pos.device)
same_indx = torch.cat((same_indx, other_indx), dim=1)
is_pos = is_pos - same_indx
s_p = dist_mat * is_pos
s_n = dist_mat * is_neg
logit_p = -self.gamma * s_p + (-99999999.) * (1 - is_pos)
logit_n = self.gamma * (s_n + self.margin) + (-99999999.) * (1 - is_neg)
loss = F.softplus(torch.logsumexp(logit_p, dim=1) + torch.logsumexp(logit_n, dim=1)).mean()
return loss

View File

@ -23,27 +23,25 @@ def build_optimizer(cfg, model):
params += [{"name": key, "params": [value], "lr": lr, "weight_decay": weight_decay}]
solver_opt = cfg.SOLVER.OPT
# fmt: off
if solver_opt == "SGD": opt_fns = getattr(optim, solver_opt)(params, momentum=cfg.SOLVER.MOMENTUM)
else: opt_fns = getattr(optim, solver_opt)(params)
# fmt: on
if solver_opt == "SGD":
opt_fns = getattr(optim, solver_opt)(
params,
momentum=cfg.SOLVER.MOMENTUM,
nesterov=True if cfg.SOLVER.MOMENTUM and cfg.SOLVER.NESTEROV else False
)
else:
opt_fns = getattr(optim, solver_opt)(params)
return opt_fns
def build_lr_scheduler(cfg, optimizer):
cfg = cfg.clone()
cfg.defrost()
cfg.SOLVER.MAX_EPOCH = cfg.SOLVER.MAX_EPOCH - max(
cfg.SOLVER.WARMUP_EPOCHS + 1, cfg.SOLVER.DELAY_EPOCHS)
scheduler_dict = {}
if cfg.SOLVER.WARMUP_ITERS > 0:
warmup_args = {
"optimizer": optimizer,
# warmup options
"warmup_factor": cfg.SOLVER.WARMUP_FACTOR,
"warmup_iters": cfg.SOLVER.WARMUP_ITERS,
"warmup_method": cfg.SOLVER.WARMUP_METHOD,
}
scheduler_dict["warmup_sched"] = lr_scheduler.WarmupLR(**warmup_args)
scheduler_args = {
"MultiStepLR": {
"optimizer": optimizer,
@ -63,4 +61,15 @@ def build_lr_scheduler(cfg, optimizer):
scheduler_dict["lr_sched"] = getattr(lr_scheduler, cfg.SOLVER.SCHED)(
**scheduler_args[cfg.SOLVER.SCHED])
if cfg.SOLVER.WARMUP_EPOCHS > 0:
warmup_args = {
"optimizer": optimizer,
# warmup options
"warmup_factor": cfg.SOLVER.WARMUP_FACTOR,
"warmup_epochs": cfg.SOLVER.WARMUP_EPOCHS,
"warmup_method": cfg.SOLVER.WARMUP_METHOD,
}
scheduler_dict["warmup_sched"] = lr_scheduler.WarmupLR(**warmup_args)
return scheduler_dict

View File

@ -8,26 +8,25 @@ from typing import List
import torch
from torch.optim.lr_scheduler import *
from torch.optim.lr_scheduler import _LRScheduler
class WarmupLR(_LRScheduler):
class WarmupLR(torch.optim.lr_scheduler._LRScheduler):
def __init__(
self,
optimizer: torch.optim.Optimizer,
warmup_factor: float = 0.1,
warmup_iters: int = 10,
warmup_epochs: int = 10,
warmup_method: str = "linear",
last_epoch: int = -1,
):
self.warmup_factor = warmup_factor
self.warmup_iters = warmup_iters
self.warmup_epochs = warmup_epochs
self.warmup_method = warmup_method
super().__init__(optimizer, last_epoch)
def get_lr(self) -> List[float]:
warmup_factor = _get_warmup_factor_at_iter(
self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor
warmup_factor = _get_warmup_factor_at_epoch(
self.warmup_method, self.last_epoch, self.warmup_epochs, self.warmup_factor
)
return [
base_lr * warmup_factor for base_lr in self.base_lrs
@ -38,30 +37,30 @@ class WarmupLR(_LRScheduler):
return self.get_lr()
def _get_warmup_factor_at_iter(
method: str, iter: int, warmup_iters: int, warmup_factor: float
def _get_warmup_factor_at_epoch(
method: str, epoch: int, warmup_epochs: int, warmup_factor: float
) -> float:
"""
Return the learning rate warmup factor at a specific iteration.
See https://arxiv.org/abs/1706.02677 for more details.
Args:
method (str): warmup method; either "constant" or "linear".
iter (int): iteration at which to calculate the warmup factor.
warmup_iters (int): the number of warmup iterations.
epoch (int): epoch at which to calculate the warmup factor.
warmup_epochs (int): the number of warmup epochs.
warmup_factor (float): the base warmup factor (the meaning changes according
to the method used).
Returns:
float: the effective warmup factor at the given iteration.
"""
if iter >= warmup_iters:
if epoch >= warmup_epochs:
return 1.0
if method == "constant":
return warmup_factor
elif method == "linear":
alpha = (1 - iter / warmup_iters) * (1 - warmup_factor)
return 1 - alpha
alpha = epoch / warmup_epochs
return warmup_factor * (1 - alpha) + alpha
elif method == "exp":
return warmup_factor ** (1 - iter / warmup_iters)
return warmup_factor ** (1 - epoch / warmup_epochs)
else:
raise ValueError("Unknown warmup method: {}".format(method))

View File

@ -1,3 +1,9 @@
# encoding: utf-8
"""
@author: xingyu liao
@contact: sherlockliao01@gmail.com
"""
from .lamb import Lamb
from .swa import SWA
from torch.optim import *

View File

@ -1,116 +0,0 @@
import math
import torch
from torch.optim.optimizer import Optimizer
class Adam(Optimizer):
r"""Implements Adam algorithm.
It has been proposed in `Adam: A Method for Stochastic Optimization`_.
The implementation of the L2 penalty follows changes proposed in
`Decoupled Weight Decay Regularization`_.
Arguments:
params (iterable): iterable of parameters to optimize or dicts defining
parameter groups
lr (float, optional): learning rate (default: 1e-3)
betas (Tuple[float, float], optional): coefficients used for computing
running averages of gradient and its square (default: (0.9, 0.999))
eps (float, optional): term added to the denominator to improve
numerical stability (default: 1e-8)
weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
amsgrad (boolean, optional): whether to use the AMSGrad variant of this
algorithm from the paper `On the Convergence of Adam and Beyond`_
(default: False)
.. _Adam\: A Method for Stochastic Optimization:
https://arxiv.org/abs/1412.6980
.. _Decoupled Weight Decay Regularization:
https://arxiv.org/abs/1711.05101
.. _On the Convergence of Adam and Beyond:
https://openreview.net/forum?id=ryQu7f-RZ
"""
def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8,
weight_decay=0, amsgrad=False):
if not 0.0 <= lr:
raise ValueError("Invalid learning rate: {}".format(lr))
if not 0.0 <= eps:
raise ValueError("Invalid epsilon value: {}".format(eps))
if not 0.0 <= betas[0] < 1.0:
raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
if not 0.0 <= betas[1] < 1.0:
raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
if not 0.0 <= weight_decay:
raise ValueError("Invalid weight_decay value: {}".format(weight_decay))
defaults = dict(lr=lr, betas=betas, eps=eps,
weight_decay=weight_decay, amsgrad=amsgrad)
super(Adam, self).__init__(params, defaults)
def __setstate__(self, state):
super(Adam, self).__setstate__(state)
for group in self.param_groups:
group.setdefault('amsgrad', False)
@torch.no_grad()
def step(self, closure=None):
"""Performs a single optimization step.
Arguments:
closure (callable, optional): A closure that reevaluates the model
and returns the loss.
"""
loss = None
if closure is not None:
with torch.enable_grad():
loss = closure()
for group in self.param_groups:
if group['freeze']: continue
for p in group['params']:
if p.grad is None:
continue
grad = p.grad
if grad.is_sparse:
raise RuntimeError('Adam does not support sparse gradients, please consider SparseAdam instead')
amsgrad = group['amsgrad']
state = self.state[p]
# State initialization
if len(state) == 0:
state['step'] = 0
# Exponential moving average of gradient values
state['exp_avg'] = torch.zeros_like(p, memory_format=torch.preserve_format)
# Exponential moving average of squared gradient values
state['exp_avg_sq'] = torch.zeros_like(p, memory_format=torch.preserve_format)
if amsgrad:
# Maintains max of all exp. moving avg. of sq. grad. values
state['max_exp_avg_sq'] = torch.zeros_like(p, memory_format=torch.preserve_format)
exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
if amsgrad:
max_exp_avg_sq = state['max_exp_avg_sq']
beta1, beta2 = group['betas']
state['step'] += 1
bias_correction1 = 1 - beta1 ** state['step']
bias_correction2 = 1 - beta2 ** state['step']
if group['weight_decay'] != 0:
grad = grad.add(p, alpha=group['weight_decay'])
# Decay the first and second moment running average coefficient
exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1)
exp_avg_sq.mul_(beta2).addcmul_(grad, grad, value=1 - beta2)
if amsgrad:
# Maintains the maximum of all 2nd moment running avg. till now
torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
# Use the max. for normalizing running avg. of gradient
denom = (max_exp_avg_sq.sqrt() / math.sqrt(bias_correction2)).add_(group['eps'])
else:
denom = (exp_avg_sq.sqrt() / math.sqrt(bias_correction2)).add_(group['eps'])
step_size = group['lr'] / bias_correction1
p.addcdiv_(exp_avg, denom, value=-step_size)
return loss

View File

@ -68,7 +68,7 @@ class Lamb(Optimizer):
for group in self.param_groups:
for p in group['params']:
if p.grad is None or group['freeze']:
if p.grad is None:
continue
grad = p.grad.data
if grad.is_sparse:

View File

@ -1,104 +0,0 @@
import torch
from torch.optim.optimizer import Optimizer, required
class SGD(Optimizer):
r"""Implements stochastic gradient descent (optionally with momentum).
Nesterov momentum is based on the formula from
`On the importance of initialization and momentum in deep learning`__.
Args:
params (iterable): iterable of parameters to optimize or dicts defining
parameter groups
lr (float): learning rate
momentum (float, optional): momentum factor (default: 0)
weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
dampening (float, optional): dampening for momentum (default: 0)
nesterov (bool, optional): enables Nesterov momentum (default: False)
Example:
>>> optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
>>> optimizer.zero_grad()
>>> loss_fn(model(input), target).backward()
>>> optimizer.step()
__ http://www.cs.toronto.edu/%7Ehinton/absps/momentum.pdf
.. note::
The implementation of SGD with Momentum/Nesterov subtly differs from
Sutskever et. al. and implementations in some other frameworks.
Considering the specific case of Momentum, the update can be written as
.. math::
\begin{aligned}
v_{t+1} & = \mu * v_{t} + g_{t+1}, \\
p_{t+1} & = p_{t} - \text{lr} * v_{t+1},
\end{aligned}
where :math:`p`, :math:`g`, :math:`v` and :math:`\mu` denote the
parameters, gradient, velocity, and momentum respectively.
This is in contrast to Sutskever et. al. and
other frameworks which employ an update of the form
.. math::
\begin{aligned}
v_{t+1} & = \mu * v_{t} + \text{lr} * g_{t+1}, \\
p_{t+1} & = p_{t} - v_{t+1}.
\end{aligned}
The Nesterov version is analogously modified.
"""
def __init__(self, params, lr=required, momentum=0, dampening=0,
weight_decay=0, nesterov=False):
if lr is not required and lr < 0.0:
raise ValueError("Invalid learning rate: {}".format(lr))
if momentum < 0.0:
raise ValueError("Invalid momentum value: {}".format(momentum))
if weight_decay < 0.0:
raise ValueError("Invalid weight_decay value: {}".format(weight_decay))
defaults = dict(lr=lr, momentum=momentum, dampening=dampening,
weight_decay=weight_decay, nesterov=nesterov)
if nesterov and (momentum <= 0 or dampening != 0):
raise ValueError("Nesterov momentum requires a momentum and zero dampening")
super(SGD, self).__init__(params, defaults)
def __setstate__(self, state):
super(SGD, self).__setstate__(state)
for group in self.param_groups:
group.setdefault('nesterov', False)
@torch.no_grad()
def step(self, closure=None):
"""Performs a single optimization step.
Arguments:
closure (callable, optional): A closure that reevaluates the model
and returns the loss.
"""
loss = None
if closure is not None:
with torch.enable_grad():
loss = closure()
for group in self.param_groups:
if group['freeze']: continue
weight_decay = group['weight_decay']
momentum = group['momentum']
dampening = group['dampening']
nesterov = group['nesterov']
for p in group['params']:
if p.grad is None:
continue
d_p = p.grad
if weight_decay != 0:
d_p = d_p.add(p, alpha=weight_decay)
if momentum != 0:
param_state = self.state[p]
if 'momentum_buffer' not in param_state:
buf = param_state['momentum_buffer'] = torch.clone(d_p).detach()
else:
buf = param_state['momentum_buffer']
buf.mul_(momentum).add_(d_p, alpha=1 - dampening)
if nesterov:
d_p = d_p.add(buf, alpha=momentum)
else:
d_p = buf
p.add_(d_p, alpha=-group['lr'])
return loss

View File

@ -322,20 +322,21 @@ class PeriodicCheckpointer:
additional_state = {"epoch": epoch}
additional_state.update(kwargs)
if (epoch + 1) % self.period == 0 and epoch < self.max_epoch - 1:
self.checkpointer.save(
"model_{:04d}".format(epoch), **additional_state
)
if additional_state["metric"] > self.best_metric:
self.checkpointer.save(
"model_best", **additional_state
)
self.best_metric = additional_state["metric"]
# Put it behind best model save to make last checkpoint valid
self.checkpointer.save(
"model_{:04d}".format(epoch), **additional_state
)
if epoch >= self.max_epoch - 1:
self.checkpointer.save("model_final", **additional_state)
if additional_state["metric"] > self.best_metric:
self.checkpointer.save(
"model_best", **additional_state
)
self.checkpointer.save("model_final", **additional_state)
def save(self, name: str, **kwargs: Any):
"""

View File

@ -4,7 +4,6 @@
@contact: sherlockliao01@gmail.com
"""
import math
from torch import nn
__all__ = [
@ -25,7 +24,6 @@ def weights_init_kaiming(m):
nn.init.constant_(m.bias, 0.0)
elif classname.find('BatchNorm') != -1:
if m.affine:
# nn.init.normal_(m.weight, 1.0, 0.02)
nn.init.constant_(m.weight, 1.0)
nn.init.constant_(m.bias, 0.0)

View File

@ -1,47 +0,0 @@
# Model Distillation in FastReID
This project provides a training script of small model
for both fast inference and high accuracy.
## Datasets Prepration
- Market1501
- DukeMTMC-reID
- MSMT17
## Train and Evaluation
```shell script
# a demo on DukeMTMC-reID dataset
# please see more in ./configs
# train BagTricksIBN50 as teacher model
python3 projects/DistillReID/train_net.py --config-file projects/DistillReID/configs/DukeMTMC/bot50ibn.yml
# train BagTricksIBN18 as student model
python3 projects/DistillReID/train_net.py --config-file projects/DistillReID/configs/DukeMTMC/KD-bot50ibn-bot18ibn.yml --kd
```
## Experimental Results and Trained Models
### Settings
All the experiments are conducted with a P40 GPU and
- CPU: Intel(R) Xeon(R) CPU E5-2683 v4 @ 2.10GHz
- GPUTesla P40 (Memory 22919MB)
### DukeMTMC-reID
<table><thead><tr><th colspan="2" rowspan="2">Rank-1 (mAP) / <br>Q.Time/batch(128)</th><th colspan="4">Student (BagTricks)</th></tr><tr><td>IBN-101</td><td>IBN-50</td><td>IBN-34</td><td>IBN-18</td></tr></thead><tbody><tr><td rowspan="4">Teacher<br>(BagTricks)</td><td>IBN-101</td><td>90.8(80.8)/0.3395s</td><td>90.8(81.1)/0.1984s</td><td>89.63(78.9)/0.1760s</td><td>86.96(75.75)/0.0854s</td></tr><tr><td>IBN-50</td><td>-</td><td>89.8(79.8)/0.2264s</td><td>88.82(78.9)/0.1761s</td><td>87.75(76.18)/0.0838s</td></tr><tr><td>IBN-34</td><td>-</td><td>-</td><td>88.64(76.4)/0.1766s</td><td>87.43(75.66)/0.0845s</td></tr><tr><td>IBN-18</td><td>-</td><td>-</td><td>-</td><td>85.50(71.60)/0.9178s</td></tr></tbody></table>
### Market-1501
<table><thead><tr><th colspan="2" rowspan="2">Rank-1 (mAP) / <br>Q.Time/batch(128)</th><th colspan="4">Student (BagTricks)</th></tr><tr><td>IBN-101</td><td>IBN-50</td><td>IBN-34</td><td>IBN-18</td></tr></thead><tbody><tr><td rowspan="4">Teacher<br>(BagTricks)</td><td>IBN-101</td><td>95.43(88.95)/0.2698s</td><td>95.19(89.52)/0.1791s</td><td>94.51(87.82)/0.0869s</td><td>93.85(85.77)/0.0612s</td></tr><tr><td>IBN-50</td><td>-</td><td>95.25(88.16)/0.1823s</td><td>95.13(87.28)/0.0863s</td><td>94.18(85.81)/0.0614s</td></tr><tr><td>IBN-34</td><td></td><td>-</td><td>94.63(84.91)/0.0860s</td><td>93.71(85.20)/0.0620s</td></tr><tr><td>IBN-18</td><td>-</td><td>-</td><td>-</td><td>92.87(81.22)/0.0615s</td></tr><tr><td colspan="2">Average Q.Time</td><td>0.2698s</td><td>0.1807s</td><td>0.0864s</td><td>0.0616s</td></tr></tbody></table>
### MSMT17
<table><thead><tr><th colspan="2" rowspan="2">Rank-1 (mAP) / <br>Q.Time/batch(128)</th><th colspan="4">Student (BagTricks)</th></tr><tr><td>IBN-101</td><td>IBN-50</td><td>IBN-34</td><td>IBN-18</td></tr></thead><tbody><tr><td rowspan="4">Teacher<br>(BagTricks)</td><td>IBN-101</td><td>81.95(60.51)/0.2693s</td><td>82.37(62.08)/0.1792s</td><td>81.07(58.56)/0.0872s</td><td>77.77(52.77)/0.0610s</td></tr><tr><td>IBN-50</td><td>-</td><td>80.18(57.80)/0.1789s</td><td>81.28(58.27)/0.0863s</td><td>78.11(53.10)/0.0623s</td></tr><tr><td>IBN-34</td><td></td><td>-</td><td>78.27(53.41)/0.0873s</td><td>77.65(52.82)/0.0615s</td></tr><tr><td>IBN-18</td><td>-</td><td>-</td><td>-</td><td>74.11(47.26)/0.0621s</td></tr><tr><td colspan="2">Average Q.Time</td><td>0.2693s</td><td>0.1801s</td><td>0.0868s</td><td>0.0617s</td></tr></tbody></table>
## Contact
This project is conducted by [Guan'an Wang](https://wangguanan.github.io/) (guan.wang0706@gmail) and [Xingyu Liao](https://github.com/L1aoXingyu).

View File

@ -1,30 +0,0 @@
_BASE_: "../../../configs/Base-bagtricks.yml"
MODEL_TEACHER:
META_ARCHITECTURE: "Baseline"
BACKBONE:
NAME: "build_resnet_backbone"
NORM: "BN"
DEPTH: "101x"
FEAT_DIM: 2048
LAST_STRIDE: 1
WITH_IBN: True
PRETRAIN: True
HEADS:
NAME: "EmbeddingHead"
NORM: "BN"
POOL_LAYER: "avgpool"
NECK_FEAT: "before"
CLS_LAYER: "linear"
MODEL:
BACKBONE:
NAME: "build_resnet_backbone"
DEPTH: "50x"
FEAT_DIM: 2048
WITH_IBN: True
STUDENT_WEIGHTS: ""
TEACHER_WEIGHTS: "logs/dukemtmc/bagtricks_R34-ibn/model_final.pth"

View File

@ -1,37 +0,0 @@
_BASE_: "../../../configs/Base-Strongerbaseline.yml"
MODEL_TEACHER:
META_ARCHITECTURE: "Baseline"
BACKBONE:
NAME: "build_resnet_backbone"
NORM: "BN"
DEPTH: "101x"
FEAT_DIM: 2048
LAST_STRIDE: 1
WITH_NL: False
WITH_IBN: True
PRETRAIN: True
HEADS:
NAME: "EmbeddingHead"
NORM: "BN"
NECK_FEAT: "after"
POOL_LAYER: "gempoolP"
CLS_LAYER: "circleSoftmax"
SCALE: 64
MARGIN: 0.35
MODEL:
BACKBONE:
NAME: "build_resnet_backbone"
DEPTH: "50x"
FEAT_DIM: 2048
WITH_IBN: True
STUDENT_WEIGHTS: ""
TEACHER_WEIGHTS: "logs/dukemtmc/bagtricks_R34-ibn/model_final.pth"
INPUT:
SIZE_TRAIN: [ 256, 128 ]
SIZE_TEST: [ 256, 128 ]

View File

@ -1,20 +0,0 @@
_BASE_: "../Base-bot-kd.yml"
MODEL_TEACHER:
BACKBONE:
DEPTH: "101x"
FEAT_DIM: 2048
MODEL:
BACKBONE:
DEPTH: "18x"
FEAT_DIM: 512
STUDENT_WEIGHTS: ""
TEACHER_WEIGHTS: "projects/DistillReID/logs/dukemtmc/bagtricks_R101-ibn"
DATASETS:
NAMES: ("DukeMTMC",)
TESTS: ("DukeMTMC",)
OUTPUT_DIR: "projects/DistillReID/logs/dukemtmc/bot101ibn-kd-bot18ibn"

View File

@ -1,20 +0,0 @@
_BASE_: "../Base-bot-kd.yml"
MODEL_TEACHER:
BACKBONE:
DEPTH: "101x"
FEAT_DIM: 2048
MODEL:
BACKBONE:
DEPTH: "50x"
FEAT_DIM: 2048
STUDENT_WEIGHTS: ""
TEACHER_WEIGHTS: "projects/DistillReID/logs/dukemtmc/bagtricks_R101-ibn"
DATASETS:
NAMES: ("DukeMTMC",)
TESTS: ("DukeMTMC",)
OUTPUT_DIR: "projects/DistillReID/logs/dukemtmc/dukemtmc/bot101ibn-kd-bot50ibn"

View File

@ -1,20 +0,0 @@
_BASE_: "../Base-bot-kd.yml"
MODEL_TEACHER:
BACKBONE:
DEPTH: "50x"
FEAT_DIM: 2048
MODEL:
BACKBONE:
DEPTH: "18x"
FEAT_DIM: 512
STUDENT_WEIGHTS: ""
TEACHER_WEIGHTS: "projects/DistillReID/logs/dukemtmc/bagtricks_R50-ibn/model_final.pth"
DATASETS:
NAMES: ("DukeMTMC",)
TESTS: ("DukeMTMC",)
OUTPUT_DIR: "projects/DistillReID/logs/dukemtmc/bot50ibn-kd-bot18ibn"

View File

@ -1,20 +0,0 @@
_BASE_: "../Base-sbs-kd.yml"
MODEL_TEACHER:
BACKBONE:
DEPTH: "101x"
FEAT_DIM: 2048
MODEL:
BACKBONE:
DEPTH: "34x"
FEAT_DIM: 512
STUDENT_WEIGHTS: ""
TEACHER_WEIGHTS: "projects/DistillReID/logs/dukemtmc/sbs_R101-ibn/model_final.pth"
DATASETS:
NAMES: ("DukeMTMC",)
TESTS: ("DukeMTMC",)
OUTPUT_DIR: "projects/DistillReID/logs/dukemtmc/sbs101ibn-kd-sbs18ibn"

View File

@ -1,20 +0,0 @@
_BASE_: "../Base-sbs-kd.yml"
MODEL_TEACHER:
BACKBONE:
DEPTH: "101x"
FEAT_DIM: 2048
MODEL:
BACKBONE:
DEPTH: "50x"
FEAT_DIM: 2048
STUDENT_WEIGHTS: ""
TEACHER_WEIGHTS: "projects/DistillReID/logs/dukemtmc/sbs_R101-ibn/model_final.pth"
DATASETS:
NAMES: ("DukeMTMC",)
TESTS: ("DukeMTMC",)
OUTPUT_DIR: "projects/DistillReID/logs/dukemtmc/sbs101ibn-kd-sbs50ibn"

View File

@ -1,20 +0,0 @@
_BASE_: "../Base-sbs-kd.yml"
MODEL_TEACHER:
BACKBONE:
DEPTH: "50x"
FEAT_DIM: 2048
MODEL:
BACKBONE:
DEPTH: "18x"
FEAT_DIM: 512
STUDENT_WEIGHTS: ""
TEACHER_WEIGHTS: "projects/DistillReID/logs/dukemtmc/sbs_R50-ibn/model_final.pth"
DATASETS:
NAMES: ("DukeMTMC",)
TESTS: ("DukeMTMC",)
OUTPUT_DIR: "projects/DistillReID/logs/dukemtmc/sbs50ibn-kd-sbs18ibn"

View File

@ -1,12 +0,0 @@
_BASE_: "../../../../configs/Base-bagtricks.yml"
MODEL:
BACKBONE:
DEPTH: "101x"
WITH_IBN: True
DATASETS:
NAMES: ("DukeMTMC",)
TESTS: ("DukeMTMC",)
OUTPUT_DIR: "projects/DistillReID/logs/dukemtmc/bagtricks_R101-ibn"

View File

@ -1,13 +0,0 @@
_BASE_: "../../../../configs/Base-bagtricks.yml"
MODEL:
BACKBONE:
DEPTH: "18x"
WITH_IBN: True
FEAT_DIM: 512
DATASETS:
NAMES: ("DukeMTMC",)
TESTS: ("DukeMTMC",)
OUTPUT_DIR: "projects/DistillReID/logs/dukemtmc/bagtricks_R18-ibn"

View File

@ -1,12 +0,0 @@
_BASE_: "../../../../configs/Base-bagtricks.yml"
MODEL:
BACKBONE:
DEPTH: "50x"
WITH_IBN: True
DATASETS:
NAMES: ("DukeMTMC",)
TESTS: ("DukeMTMC",)
OUTPUT_DIR: "projects/DistillReID/logs/dukemtmc/bagtricks_R50-ibn"

View File

@ -1,13 +0,0 @@
_BASE_: "../../../configs/Base-Strongerbaseline.yml"
MODEL:
BACKBONE:
DEPTH: "101x"
WITH_IBN: True
FEAT_DIM: 2048
DATASETS:
NAMES: ("DukeMTMC",)
TESTS: ("DukeMTMC",)
OUTPUT_DIR: "projects/DistillReID/logs/dukemtmc/sbs_R101-ibn"

View File

@ -1,13 +0,0 @@
_BASE_: "../../../configs/Base-Strongerbaseline.yml"
MODEL:
BACKBONE:
DEPTH: "18x"
WITH_IBN: True
FEAT_DIM: 512
DATASETS:
NAMES: ("DukeMTMC",)
TESTS: ("DukeMTMC",)
OUTPUT_DIR: "projects/DistillReID/logs/dukemtmc/sbs_R18-ibn"

View File

@ -1,13 +0,0 @@
_BASE_: "../../../configs/Base-Strongerbaseline.yml"
MODEL:
BACKBONE:
DEPTH: "50x"
WITH_IBN: True
FEAT_DIM: 2048
DATASETS:
NAMES: ("DukeMTMC",)
TESTS: ("DukeMTMC",)
OUTPUT_DIR: "projects/DistillReID/logs/dukemtmc/sbs_R50-ibn"

View File

@ -1,9 +0,0 @@
# encoding: utf-8
"""
@author: l1aoxingyu
@contact: sherlockliao01@gmail.com
"""
from .config import add_kdreid_config, add_shufflenet_config
from .kd_trainer import KDTrainer
from .modeling import build_shufflenetv2_backbone

View File

@ -1,105 +0,0 @@
# encoding: utf-8
"""
@author: l1aoxingyu, guan'an wang
@contact: sherlockliao01@gmail.com, guan.wang0706@gmail.com
"""
from fastreid.config import CfgNode as CN
def add_shufflenet_config(cfg):
_C = cfg
_C.MODEL.BACKBONE.MODEL_SIZE = '1.0x'
def add_kdreid_config(cfg):
_C = cfg
_C.MODEL_TEACHER = CN()
_C.MODEL_TEACHER.META_ARCHITECTURE = 'Baseline'
# ---------------------------------------------------------------------------- #
# teacher model Backbone options
# ---------------------------------------------------------------------------- #
_C.MODEL_TEACHER.BACKBONE = CN()
_C.MODEL_TEACHER.BACKBONE.NAME = "build_resnet_backbone"
_C.MODEL_TEACHER.BACKBONE.DEPTH = "50x"
_C.MODEL_TEACHER.BACKBONE.LAST_STRIDE = 1
# If use IBN block in backbone
_C.MODEL_TEACHER.BACKBONE.WITH_IBN = False
# If use SE block in backbone
_C.MODEL_TEACHER.BACKBONE.WITH_SE = False
# If use Non-local block in backbone
_C.MODEL_TEACHER.BACKBONE.WITH_NL = False
# Input feature dimension
_C.MODEL_TEACHER.BACKBONE.FEAT_DIM = 2048
# for shufflenet
_C.MODEL_TEACHER.BACKBONE.MODEL_SIZE = '1.0x'
#
_C.MODEL_TEACHER.BACKBONE.NORM = 'BN'
_C.MODEL_TEACHER.BACKBONE.PRETRAIN = False
# ---------------------------------------------------------------------------- #
# teacher model HEADS options
# ---------------------------------------------------------------------------- #
_C.MODEL_TEACHER.HEADS = CN()
_C.MODEL_TEACHER.HEADS.NAME = "EmbeddingHead"
# Pooling layer type
_C.MODEL_TEACHER.HEADS.POOL_LAYER = "avgpool"
_C.MODEL_TEACHER.HEADS.NECK_FEAT = "before"
_C.MODEL_TEACHER.HEADS.CLS_LAYER = "linear"
# Pretrained teacher and student model weights
_C.MODEL.TEACHER_WEIGHTS = ""
_C.MODEL.STUDENT_WEIGHTS = ""
#
_C.MODEL_TEACHER.HEADS.NORM = 'BN'
_C.MODEL_TEACHER.HEADS.SCALE = 64
_C.MODEL_TEACHER.HEADS.MARGIN = 0.35
def update_model_teacher_config(cfg):
cfg = cfg.clone()
frozen = cfg.is_frozen()
cfg.defrost()
cfg.MODEL.META_ARCHITECTURE = cfg.MODEL_TEACHER.META_ARCHITECTURE
# ---------------------------------------------------------------------------- #
# teacher model Backbone options
# ---------------------------------------------------------------------------- #
cfg.MODEL.BACKBONE.NAME = cfg.MODEL_TEACHER.BACKBONE.NAME
cfg.MODEL.BACKBONE.DEPTH = cfg.MODEL_TEACHER.BACKBONE.DEPTH
cfg.MODEL.BACKBONE.LAST_STRIDE = cfg.MODEL_TEACHER.BACKBONE.LAST_STRIDE
# If use IBN block in backbone
cfg.MODEL.BACKBONE.WITH_IBN = cfg.MODEL_TEACHER.BACKBONE.WITH_IBN
# If use SE block in backbone
cfg.MODEL.BACKBONE.WITH_SE = cfg.MODEL_TEACHER.BACKBONE.WITH_SE
# If use Non-local block in backbone
cfg.MODEL.BACKBONE.WITH_NL = cfg.MODEL_TEACHER.BACKBONE.WITH_NL
# Input feature dimension
cfg.MODEL.BACKBONE.FEAT_DIM = cfg.MODEL_TEACHER.BACKBONE.FEAT_DIM
cfg.MODEL.BACKBONE.PRETRAIN = False
# for shufflenet
cfg.MODEL.BACKBONE.MODEL_SIZE = cfg.MODEL_TEACHER.BACKBONE.MODEL_SIZE
# ---------------------------------------------------------------------------- #
# teacher model HEADS options
# ---------------------------------------------------------------------------- #
cfg.MODEL.HEADS.NAME = cfg.MODEL_TEACHER.HEADS.NAME
# Pooling layer type
cfg.MODEL.HEADS.POOL_LAYER = cfg.MODEL_TEACHER.HEADS.POOL_LAYER
cfg.MODEL.HEADS.SCALE = cfg.MODEL_TEACHER.HEADS.SCALE
cfg.MODEL.HEADS.MARGIN = cfg.MODEL_TEACHER.HEADS.MARGIN
if frozen: cfg.freeze()
return cfg

View File

@ -1,139 +0,0 @@
# encoding: utf-8
"""
@author: l1aoxingyu
@contact: sherlockliao01@gmail.com
"""
import logging
import time
import torch
import torch.nn.functional as F
from torch import nn
from torch.nn.parallel import DistributedDataParallel
from fastreid.engine import DefaultTrainer
from fastreid.utils.file_io import PathManager
from fastreid.modeling.meta_arch import build_model
from fastreid.utils.checkpoint import Checkpointer
from .config import update_model_teacher_config
class KDTrainer(DefaultTrainer):
"""
A knowledge distillation trainer for person reid of task.
"""
def __init__(self, cfg):
"""
Args:
cfg (CfgNode):
"""
super().__init__(cfg)
model_t = self.build_model_teacher(self.cfg)
for param in model_t.parameters():
param.requires_grad = False
logger = logging.getLogger('fastreid.' + __name__)
# Load pre-trained teacher model
logger.info("Loading teacher model ...")
Checkpointer(model_t).load(cfg.MODEL.TEACHER_WEIGHTS)
if PathManager.exists(cfg.MODEL.STUDENT_WEIGHTS):
logger.info("Loading student model ...")
Checkpointer(self.model).load(cfg.MODEL.STUDENT_WEIGHTS)
else:
logger.info("No student model checkpoints")
self.model_t = model_t
def run_step(self):
"""
Implement the moco training logic described above.
"""
assert self.model.training, "[KDTrainer] base model was changed to eval mode!"
start = time.perf_counter()
"""
If your want to do something with the data, you can wrap the dataloader.
"""
data = next(self._data_loader_iter)
data_time = time.perf_counter() - start
outs = self.model(data)
# Compute reid loss
if isinstance(self.model, DistributedDataParallel):
loss_dict = self.model.module.losses(outs)
else:
loss_dict = self.model.losses(outs)
with torch.no_grad():
outs_t = self.model_t(data)
q_logits = outs["outputs"]["pred_class_logits"]
t_logits = outs_t["outputs"]["pred_class_logits"].detach()
loss_dict['loss_kl'] = self.distill_loss(q_logits, t_logits, t=16)
losses = sum(loss_dict.values())
with torch.cuda.stream(torch.cuda.Stream()):
metrics_dict = loss_dict
metrics_dict["data_time"] = data_time
self._write_metrics(metrics_dict)
self._detect_anomaly(losses, loss_dict)
"""
If you need accumulate gradients or something similar, you can
wrap the optimizer with your custom `zero_grad()` method.
"""
self.optimizer.zero_grad()
losses.backward()
"""
If you need gradient clipping/scaling or other processing, you can
wrap the optimizer with your custom `step()` method.
"""
self.optimizer.step()
@classmethod
def build_model_teacher(cls, cfg) -> nn.Module:
cfg_t = update_model_teacher_config(cfg)
model_t = build_model(cfg_t)
return model_t
@staticmethod
def pkt_loss(output_net, target_net, eps=0.0000001):
# Normalize each vector by its norm
output_net_norm = torch.sqrt(torch.sum(output_net ** 2, dim=1, keepdim=True))
output_net = output_net / (output_net_norm + eps)
output_net[output_net != output_net] = 0
target_net_norm = torch.sqrt(torch.sum(target_net ** 2, dim=1, keepdim=True))
target_net = target_net / (target_net_norm + eps)
target_net[target_net != target_net] = 0
# Calculate the cosine similarity
model_similarity = torch.mm(output_net, output_net.transpose(0, 1))
target_similarity = torch.mm(target_net, target_net.transpose(0, 1))
# Scale cosine similarity to 0..1
model_similarity = (model_similarity + 1.0) / 2.0
target_similarity = (target_similarity + 1.0) / 2.0
# Transform them into probabilities
model_similarity = model_similarity / torch.sum(model_similarity, dim=1, keepdim=True)
target_similarity = target_similarity / torch.sum(target_similarity, dim=1, keepdim=True)
# Calculate the KL-divergence
loss = torch.mean(target_similarity * torch.log((target_similarity + eps) / (model_similarity + eps)))
return loss
@staticmethod
def distill_loss(y_s, y_t, t=4):
p_s = F.log_softmax(y_s / t, dim=1)
p_t = F.softmax(y_t / t, dim=1)
loss = F.kl_div(p_s, p_t, reduction='sum') * (t ** 2) / y_s.shape[0]
return loss

View File

@ -1 +0,0 @@
from .backbones import build_shufflenetv2_backbone

View File

@ -1 +0,0 @@
from .shufflenetv2 import build_shufflenetv2_backbone

View File

@ -1,43 +0,0 @@
import torch
import torch.nn as nn
from collections import OrderedDict
from fastreid.modeling.backbones.build import BACKBONE_REGISTRY
from .network import ShuffleNetV2
__all__ = ['build_shufflenetv2_backbone']
@BACKBONE_REGISTRY.register()
def build_shufflenetv2_backbone(cfg):
pretrain = cfg.MODEL.BACKBONE.PRETRAIN
pretrain_path = cfg.MODEL.BACKBONE.PRETRAIN_PATH
model_size = cfg.MODEL.BACKBONE.MODEL_SIZE
return ShuffleNetV2Backbone(model_size=model_size, pretrained=pretrain, pretrain_path=pretrain_path)
class ShuffleNetV2Backbone(nn.Module):
def __init__(self, model_size, pretrained=False, pretrain_path=''):
super(ShuffleNetV2Backbone, self).__init__()
model = ShuffleNetV2(model_size=model_size)
if pretrained:
new_state_dict = OrderedDict()
state_dict = torch.load(pretrain_path)['state_dict']
for k, v in state_dict.items():
if k[:7] == 'module.':
k = k[7:]
new_state_dict[k] = v
model.load_state_dict(new_state_dict, strict=True)
self.backbone = nn.Sequential(
model.first_conv, model.maxpool, model.features, model.conv_last)
def forward(self, x):
return self.backbone(x)

Some files were not shown because too many files have changed in this diff Show More