mirror of https://github.com/JDAI-CV/fast-reid.git
update fastreid V1.0
parent
15213dde4b
commit
15e1729a27
|
@ -1,11 +1,11 @@
|
|||
_BASE_: "Base-bagtricks.yml"
|
||||
_BASE_: Base-bagtricks.yml
|
||||
|
||||
MODEL:
|
||||
BACKBONE:
|
||||
WITH_NL: True
|
||||
|
||||
HEADS:
|
||||
POOL_LAYER: "gempool"
|
||||
POOL_LAYER: gempool
|
||||
|
||||
LOSSES:
|
||||
NAME: ("CrossEntropyLoss", "TripletLoss")
|
||||
|
|
|
@ -1,25 +1,12 @@
|
|||
_BASE_: "Base-SBS.yml"
|
||||
_BASE_: Base-SBS.yml
|
||||
|
||||
MODEL:
|
||||
META_ARCHITECTURE: 'MGN'
|
||||
META_ARCHITECTURE: MGN
|
||||
|
||||
FREEZE_LAYERS: ["backbone", "b1", "b2", "b3",]
|
||||
FREEZE_LAYERS: [backbone, b1, b2, b3,]
|
||||
|
||||
BACKBONE:
|
||||
WITH_NL: False
|
||||
|
||||
HEADS:
|
||||
EMBEDDING_DIM: 256
|
||||
|
||||
LOSSES:
|
||||
NAME: ("CrossEntropyLoss", "TripletLoss",)
|
||||
CE:
|
||||
EPSILON: 0.1
|
||||
SCALE: 1.0
|
||||
|
||||
TRI:
|
||||
MARGIN: 0.0
|
||||
HARD_MINING: True
|
||||
NORM_FEAT: False
|
||||
SCALE: 1.0
|
||||
|
||||
|
|
|
@ -1,15 +1,15 @@
|
|||
_BASE_: "Base-bagtricks.yml"
|
||||
_BASE_: Base-bagtricks.yml
|
||||
|
||||
MODEL:
|
||||
FREEZE_LAYERS: ["backbone"]
|
||||
FREEZE_LAYERS: [ backbone ]
|
||||
|
||||
BACKBONE:
|
||||
WITH_NL: True
|
||||
|
||||
HEADS:
|
||||
NECK_FEAT: "after"
|
||||
POOL_LAYER: "gempoolP"
|
||||
CLS_LAYER: "circleSoftmax"
|
||||
NECK_FEAT: after
|
||||
POOL_LAYER: gempoolP
|
||||
CLS_LAYER: circleSoftmax
|
||||
SCALE: 64
|
||||
MARGIN: 0.35
|
||||
|
||||
|
@ -26,8 +26,8 @@ MODEL:
|
|||
SCALE: 1.0
|
||||
|
||||
INPUT:
|
||||
SIZE_TRAIN: [384, 128]
|
||||
SIZE_TEST: [384, 128]
|
||||
SIZE_TRAIN: [ 384, 128 ]
|
||||
SIZE_TEST: [ 384, 128 ]
|
||||
|
||||
DO_AUTOAUG: True
|
||||
AUTOAUG_PROB: 0.1
|
||||
|
@ -36,7 +36,8 @@ DATALOADER:
|
|||
NUM_INSTANCE: 16
|
||||
|
||||
SOLVER:
|
||||
OPT: "Adam"
|
||||
FP16_ENABLED: False
|
||||
OPT: Adam
|
||||
MAX_EPOCH: 60
|
||||
BASE_LR: 0.00035
|
||||
BIAS_LR_FACTOR: 1.
|
||||
|
@ -44,19 +45,19 @@ SOLVER:
|
|||
WEIGHT_DECAY_BIAS: 0.0005
|
||||
IMS_PER_BATCH: 64
|
||||
|
||||
SCHED: "CosineAnnealingLR"
|
||||
SCHED: CosineAnnealingLR
|
||||
DELAY_EPOCHS: 30
|
||||
ETA_MIN_LR: 0.00000077
|
||||
ETA_MIN_LR: 0.0000007
|
||||
|
||||
WARMUP_FACTOR: 0.1
|
||||
WARMUP_ITERS: 2000
|
||||
WARMUP_EPOCHS: 10
|
||||
|
||||
FREEZE_ITERS: 2000
|
||||
FREEZE_ITERS: 1000
|
||||
|
||||
CHECKPOINT_PERIOD: 20
|
||||
|
||||
TEST:
|
||||
EVAL_PERIOD: 20
|
||||
EVAL_PERIOD: 10
|
||||
IMS_PER_BATCH: 128
|
||||
|
||||
CUDNN_BENCHMARK: True
|
||||
|
|
|
@ -1,23 +1,22 @@
|
|||
MODEL:
|
||||
META_ARCHITECTURE: "Baseline"
|
||||
META_ARCHITECTURE: Baseline
|
||||
|
||||
BACKBONE:
|
||||
NAME: "build_resnet_backbone"
|
||||
NORM: "BN"
|
||||
DEPTH: "50x"
|
||||
NAME: build_resnet_backbone
|
||||
NORM: BN
|
||||
DEPTH: 50x
|
||||
LAST_STRIDE: 1
|
||||
FEAT_DIM: 2048
|
||||
WITH_IBN: False
|
||||
PRETRAIN: True
|
||||
PRETRAIN_PATH: "/export/home/lxy/.cache/torch/checkpoints/resnet50-19c8e357.pth"
|
||||
|
||||
HEADS:
|
||||
NAME: "EmbeddingHead"
|
||||
NORM: "BN"
|
||||
NAME: EmbeddingHead
|
||||
NORM: BN
|
||||
WITH_BNNECK: True
|
||||
POOL_LAYER: "avgpool"
|
||||
NECK_FEAT: "before"
|
||||
CLS_LAYER: "linear"
|
||||
POOL_LAYER: avgpool
|
||||
NECK_FEAT: before
|
||||
CLS_LAYER: linear
|
||||
|
||||
LOSSES:
|
||||
NAME: ("CrossEntropyLoss", "TripletLoss",)
|
||||
|
@ -33,8 +32,8 @@ MODEL:
|
|||
SCALE: 1.
|
||||
|
||||
INPUT:
|
||||
SIZE_TRAIN: [256, 128]
|
||||
SIZE_TEST: [256, 128]
|
||||
SIZE_TRAIN: [ 256, 128 ]
|
||||
SIZE_TEST: [ 256, 128 ]
|
||||
REA:
|
||||
ENABLED: True
|
||||
PROB: 0.5
|
||||
|
@ -48,7 +47,7 @@ DATALOADER:
|
|||
|
||||
SOLVER:
|
||||
FP16_ENABLED: True
|
||||
OPT: "Adam"
|
||||
OPT: Adam
|
||||
MAX_EPOCH: 120
|
||||
BASE_LR: 0.00035
|
||||
BIAS_LR_FACTOR: 2.
|
||||
|
@ -56,12 +55,12 @@ SOLVER:
|
|||
WEIGHT_DECAY_BIAS: 0.0005
|
||||
IMS_PER_BATCH: 64
|
||||
|
||||
SCHED: "MultiStepLR"
|
||||
STEPS: [40, 90]
|
||||
SCHED: MultiStepLR
|
||||
STEPS: [ 40, 90 ]
|
||||
GAMMA: 0.1
|
||||
|
||||
WARMUP_FACTOR: 0.1
|
||||
WARMUP_ITERS: 2000
|
||||
WARMUP_EPOCHS: 10
|
||||
|
||||
CHECKPOINT_PERIOD: 30
|
||||
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
_BASE_: "../Base-AGW.yml"
|
||||
_BASE_: ../Base-AGW.yml
|
||||
|
||||
MODEL:
|
||||
BACKBONE:
|
||||
DEPTH: "101x"
|
||||
DEPTH: 101x
|
||||
WITH_IBN: True
|
||||
|
||||
DATASETS:
|
||||
NAMES: ("DukeMTMC",)
|
||||
TESTS: ("DukeMTMC",)
|
||||
|
||||
OUTPUT_DIR: "logs/dukemtmc/agw_R101-ibn"
|
||||
OUTPUT_DIR: logs/dukemtmc/agw_R101-ibn
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
_BASE_: "../Base-AGW.yml"
|
||||
_BASE_: ../Base-AGW.yml
|
||||
|
||||
MODEL:
|
||||
BACKBONE:
|
||||
|
@ -8,4 +8,4 @@ DATASETS:
|
|||
NAMES: ("DukeMTMC",)
|
||||
TESTS: ("DukeMTMC",)
|
||||
|
||||
OUTPUT_DIR: "logs/dukemtmc/agw_R50-ibn"
|
||||
OUTPUT_DIR: logs/dukemtmc/agw_R50-ibn
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
_BASE_: "../Base-AGW.yml"
|
||||
_BASE_: ../Base-AGW.yml
|
||||
|
||||
DATASETS:
|
||||
NAMES: ("DukeMTMC",)
|
||||
TESTS: ("DukeMTMC",)
|
||||
|
||||
OUTPUT_DIR: "logs/dukemtmc/agw_R50"
|
||||
OUTPUT_DIR: logs/dukemtmc/agw_R50
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
_BASE_: "../Base-AGW.yml"
|
||||
_BASE_: ../Base-AGW.yml
|
||||
|
||||
MODEL:
|
||||
BACKBONE:
|
||||
NAME: "build_resnest_backbone"
|
||||
NAME: build_resnest_backbone
|
||||
|
||||
DATASETS:
|
||||
NAMES: ("DukeMTMC",)
|
||||
TESTS: ("DukeMTMC",)
|
||||
|
||||
OUTPUT_DIR: "logs/dukemtmc/agw_S50"
|
||||
OUTPUT_DIR: logs/dukemtmc/agw_S50
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
_BASE_: "../Base-bagtricks.yml"
|
||||
_BASE_: ../Base-bagtricks.yml
|
||||
|
||||
MODEL:
|
||||
BACKBONE:
|
||||
DEPTH: "101x"
|
||||
DEPTH: 101x
|
||||
WITH_IBN: True
|
||||
|
||||
DATASETS:
|
||||
NAMES: ("DukeMTMC",)
|
||||
TESTS: ("DukeMTMC",)
|
||||
|
||||
OUTPUT_DIR: "logs/dukemtmc/bagtricks_R101-ibn"
|
||||
OUTPUT_DIR: logs/dukemtmc/bagtricks_R101-ibn
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
_BASE_: "../Base-bagtricks.yml"
|
||||
_BASE_: ../Base-bagtricks.yml
|
||||
|
||||
MODEL:
|
||||
BACKBONE:
|
||||
|
@ -8,4 +8,4 @@ DATASETS:
|
|||
NAMES: ("DukeMTMC",)
|
||||
TESTS: ("DukeMTMC",)
|
||||
|
||||
OUTPUT_DIR: "logs/dukemtmc/bagtricks_R50-ibn"
|
||||
OUTPUT_DIR: logs/dukemtmc/bagtricks_R50-ibn
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
_BASE_: "../Base-bagtricks.yml"
|
||||
_BASE_: ../Base-bagtricks.yml
|
||||
|
||||
DATASETS:
|
||||
NAMES: ("DukeMTMC",)
|
||||
TESTS: ("DukeMTMC",)
|
||||
|
||||
OUTPUT_DIR: "logs/dukemtmc/bagtricks_R50"
|
||||
OUTPUT_DIR: logs/dukemtmc/bagtricks_R50
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
_BASE_: "../Base-bagtricks.yml"
|
||||
_BASE_: ../Base-bagtricks.yml
|
||||
|
||||
MODEL:
|
||||
BACKBONE:
|
||||
NAME: "build_resnest_backbone"
|
||||
NAME: build_resnest_backbone
|
||||
|
||||
DATASETS:
|
||||
NAMES: ("DukeMTMC",)
|
||||
TESTS: ("DukeMTMC",)
|
||||
|
||||
OUTPUT_DIR: "logs/dukemtmc/bagtricks_S50"
|
||||
OUTPUT_DIR: logs/dukemtmc/bagtricks_S50
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
_BASE_: "../Base-MGN.yml"
|
||||
_BASE_: ../Base-MGN.yml
|
||||
|
||||
MODEL:
|
||||
BACKBONE:
|
||||
|
@ -8,4 +8,4 @@ DATASETS:
|
|||
NAMES: ("DukeMTMC",)
|
||||
TESTS: ("DukeMTMC",)
|
||||
|
||||
OUTPUT_DIR: "logs/dukemtmc/mgn_R50-ibn"
|
||||
OUTPUT_DIR: logs/dukemtmc/mgn_R50-ibn
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
_BASE_: "../Base-SBS.yml"
|
||||
_BASE_: ../Base-SBS.yml
|
||||
|
||||
MODEL:
|
||||
BACKBONE:
|
||||
DEPTH: "101x"
|
||||
DEPTH: 101x
|
||||
WITH_IBN: True
|
||||
|
||||
DATASETS:
|
||||
NAMES: ("DukeMTMC",)
|
||||
TESTS: ("DukeMTMC",)
|
||||
|
||||
OUTPUT_DIR: "logs/dukemtmc/sbs_R101-ibn"
|
||||
OUTPUT_DIR: logs/dukemtmc/sbs_R101-ibn
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
_BASE_: "../Base-SBS.yml"
|
||||
_BASE_: ../Base-SBS.yml
|
||||
|
||||
MODEL:
|
||||
BACKBONE:
|
||||
|
@ -8,4 +8,4 @@ DATASETS:
|
|||
NAMES: ("DukeMTMC",)
|
||||
TESTS: ("DukeMTMC",)
|
||||
|
||||
OUTPUT_DIR: "logs/dukemtmc/sbs_R50-ibn"
|
||||
OUTPUT_DIR: logs/dukemtmc/sbs_R50-ibn
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
_BASE_: "../Base-SBS.yml"
|
||||
_BASE_: ../Base-SBS.yml
|
||||
|
||||
DATASETS:
|
||||
NAMES: ("DukeMTMC",)
|
||||
TESTS: ("DukeMTMC",)
|
||||
|
||||
OUTPUT_DIR: "logs/dukemtmc/sbs_R50"
|
||||
OUTPUT_DIR: logs/dukemtmc/sbs_R50
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
_BASE_: "../Base-SBS.yml"
|
||||
_BASE_: ../Base-SBS.yml
|
||||
|
||||
MODEL:
|
||||
BACKBONE:
|
||||
NAME: "build_resnest_backbone"
|
||||
NAME: build_resnest_backbone
|
||||
|
||||
DATASETS:
|
||||
NAMES: ("DukeMTMC",)
|
||||
TESTS: ("DukeMTMC",)
|
||||
|
||||
OUTPUT_DIR: "logs/dukemtmc/sbs_S50"
|
||||
OUTPUT_DIR: logs/dukemtmc/sbs_S50
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
_BASE_: "../Base-AGW.yml"
|
||||
_BASE_: ../Base-AGW.yml
|
||||
|
||||
MODEL:
|
||||
BACKBONE:
|
||||
DEPTH: "101x"
|
||||
DEPTH: 101x
|
||||
WITH_IBN: True
|
||||
|
||||
DATASETS:
|
||||
NAMES: ("MSMT17",)
|
||||
TESTS: ("MSMT17",)
|
||||
|
||||
OUTPUT_DIR: "logs/msmt17/agw_R101-ibn"
|
||||
OUTPUT_DIR: logs/msmt17/agw_R101-ibn
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
_BASE_: "../Base-AGW.yml"
|
||||
_BASE_: ../Base-AGW.yml
|
||||
|
||||
MODEL:
|
||||
BACKBONE:
|
||||
|
@ -8,4 +8,4 @@ DATASETS:
|
|||
NAMES: ("MSMT17",)
|
||||
TESTS: ("MSMT17",)
|
||||
|
||||
OUTPUT_DIR: "logs/msmt17/agw_R50-ibn"
|
||||
OUTPUT_DIR: logs/msmt17/agw_R50-ibn
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
_BASE_: "../Base-AGW.yml"
|
||||
_BASE_: ../Base-AGW.yml
|
||||
|
||||
DATASETS:
|
||||
NAMES: ("MSMT17",)
|
||||
TESTS: ("MSMT17",)
|
||||
|
||||
OUTPUT_DIR: "logs/msmt17/agw_R50"
|
||||
OUTPUT_DIR: logs/msmt17/agw_R50
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
_BASE_: "../Base-AGW.yml"
|
||||
_BASE_: ../Base-AGW.yml
|
||||
|
||||
MODEL:
|
||||
BACKBONE:
|
||||
NAME: "build_resnest_backbone"
|
||||
NAME: build_resnest_backbone
|
||||
|
||||
DATASETS:
|
||||
NAMES: ("MSMT17",)
|
||||
TESTS: ("MSMT17",)
|
||||
|
||||
OUTPUT_DIR: "logs/msmt17/agw_S50"
|
||||
OUTPUT_DIR: logs/msmt17/agw_S50
|
||||
|
|
|
@ -1,13 +1,13 @@
|
|||
_BASE_: "../Base-bagtricks.yml"
|
||||
_BASE_: ../Base-bagtricks.yml
|
||||
|
||||
MODEL:
|
||||
BACKBONE:
|
||||
DEPTH: "101x"
|
||||
DEPTH: 101x
|
||||
WITH_IBN: True
|
||||
|
||||
DATASETS:
|
||||
NAMES: ("MSMT17",)
|
||||
TESTS: ("MSMT17",)
|
||||
|
||||
OUTPUT_DIR: "logs/msmt17/bagtricks_R101-ibn"
|
||||
OUTPUT_DIR: logs/msmt17/bagtricks_R101-ibn
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
_BASE_: "../Base-bagtricks.yml"
|
||||
_BASE_: ../Base-bagtricks.yml
|
||||
|
||||
MODEL:
|
||||
BACKBONE:
|
||||
|
@ -8,5 +8,5 @@ DATASETS:
|
|||
NAMES: ("MSMT17",)
|
||||
TESTS: ("MSMT17",)
|
||||
|
||||
OUTPUT_DIR: "logs/msmt17/bagtricks_R50-ibn"
|
||||
OUTPUT_DIR: logs/msmt17/bagtricks_R50-ibn
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
_BASE_: "../Base-bagtricks.yml"
|
||||
_BASE_: ../Base-bagtricks.yml
|
||||
|
||||
DATASETS:
|
||||
NAMES: ("MSMT17",)
|
||||
TESTS: ("MSMT17",)
|
||||
|
||||
OUTPUT_DIR: "logs/msmt17/bagtricks_R50"
|
||||
OUTPUT_DIR: logs/msmt17/bagtricks_R50
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
_BASE_: "../Base-bagtricks.yml"
|
||||
_BASE_: ../Base-bagtricks.yml
|
||||
|
||||
MODEL:
|
||||
BACKBONE:
|
||||
NAME: "build_resnest_backbone"
|
||||
NAME: build_resnest_backbone
|
||||
|
||||
DATASETS:
|
||||
NAMES: ("MSMT17",)
|
||||
TESTS: ("MSMT17",)
|
||||
|
||||
OUTPUT_DIR: "logs/msmt17/bagtricks_S50"
|
||||
OUTPUT_DIR: logs/msmt17/bagtricks_S50
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
_BASE_: "../Base-MGN.yml"
|
||||
_BASE_: ../Base-MGN.yml
|
||||
|
||||
MODEL:
|
||||
BACKBONE:
|
||||
|
@ -8,4 +8,4 @@ DATASETS:
|
|||
NAMES: ("MSMT17",)
|
||||
TESTS: ("MSMT17",)
|
||||
|
||||
OUTPUT_DIR: "logs/msmt17/mgn_R50-ibn"
|
||||
OUTPUT_DIR: logs/msmt17/mgn_R50-ibn
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
_BASE_: "../Base-SBS.yml"
|
||||
_BASE_: ../Base-SBS.yml
|
||||
|
||||
MODEL:
|
||||
BACKBONE:
|
||||
DEPTH: "101x"
|
||||
DEPTH: 101x
|
||||
WITH_IBN: True
|
||||
|
||||
DATASETS:
|
||||
NAMES: ("MSMT17",)
|
||||
TESTS: ("MSMT17",)
|
||||
|
||||
OUTPUT_DIR: "logs/msmt17/sbs_R101-ibn"
|
||||
OUTPUT_DIR: logs/msmt17/sbs_R101-ibn
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
_BASE_: "../Base-SBS.yml"
|
||||
_BASE_: ../Base-SBS.yml
|
||||
|
||||
MODEL:
|
||||
BACKBONE:
|
||||
|
@ -8,4 +8,4 @@ DATASETS:
|
|||
NAMES: ("MSMT17",)
|
||||
TESTS: ("MSMT17",)
|
||||
|
||||
OUTPUT_DIR: "logs/msmt17/sbs_R50-ibn"
|
||||
OUTPUT_DIR: logs/msmt17/sbs_R50-ibn
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
_BASE_: "../Base-SBS.yml"
|
||||
_BASE_: ../Base-SBS.yml
|
||||
|
||||
DATASETS:
|
||||
NAMES: ("MSMT17",)
|
||||
TESTS: ("MSMT17",)
|
||||
|
||||
OUTPUT_DIR: "logs/msmt17/sbs_R50"
|
||||
OUTPUT_DIR: logs/msmt17/sbs_R50
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
_BASE_: "../Base-SBS.yml"
|
||||
_BASE_: ../Base-SBS.yml
|
||||
|
||||
MODEL:
|
||||
BACKBONE:
|
||||
NAME: "build_resnest_backbone"
|
||||
NAME: build_resnest_backbone
|
||||
|
||||
DATASETS:
|
||||
NAMES: ("MSMT17",)
|
||||
TESTS: ("MSMT17",)
|
||||
|
||||
OUTPUT_DIR: "logs/msmt17/sbs_S50"
|
||||
OUTPUT_DIR: logs/msmt17/sbs_S50
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
_BASE_: "../Base-AGW.yml"
|
||||
_BASE_: ../Base-AGW.yml
|
||||
|
||||
MODEL:
|
||||
BACKBONE:
|
||||
DEPTH: "101x"
|
||||
DEPTH: 101x
|
||||
WITH_IBN: True
|
||||
|
||||
DATASETS:
|
||||
NAMES: ("Market1501",)
|
||||
TESTS: ("Market1501",)
|
||||
|
||||
OUTPUT_DIR: "logs/market1501/agw_R101-ibn"
|
||||
OUTPUT_DIR: logs/market1501/agw_R101-ibn
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
_BASE_: "../Base-AGW.yml"
|
||||
_BASE_: ../Base-AGW.yml
|
||||
|
||||
MODEL:
|
||||
BACKBONE:
|
||||
|
@ -8,4 +8,4 @@ DATASETS:
|
|||
NAMES: ("Market1501",)
|
||||
TESTS: ("Market1501",)
|
||||
|
||||
OUTPUT_DIR: "logs/market1501/agw_R50-ibn"
|
||||
OUTPUT_DIR: logs/market1501/agw_R50-ibn
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
_BASE_: "../Base-AGW.yml"
|
||||
_BASE_: ../Base-AGW.yml
|
||||
|
||||
DATASETS:
|
||||
NAMES: ("Market1501",)
|
||||
TESTS: ("Market1501",)
|
||||
|
||||
OUTPUT_DIR: "logs/market1501/agw_R50"
|
||||
OUTPUT_DIR: logs/market1501/agw_R50
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
_BASE_: "../Base-AGW.yml"
|
||||
_BASE_: ../Base-AGW.yml
|
||||
|
||||
MODEL:
|
||||
BACKBONE:
|
||||
NAME: "build_resnest_backbone"
|
||||
NAME: build_resnest_backbone
|
||||
|
||||
DATASETS:
|
||||
NAMES: ("Market1501",)
|
||||
TESTS: ("Market1501",)
|
||||
|
||||
OUTPUT_DIR: "logs/market1501/agw_S50"
|
||||
OUTPUT_DIR: logs/market1501/agw_S50
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
_BASE_: "../Base-bagtricks.yml"
|
||||
_BASE_: ../Base-bagtricks.yml
|
||||
|
||||
MODEL:
|
||||
BACKBONE:
|
||||
DEPTH: "101x"
|
||||
DEPTH: 101x
|
||||
WITH_IBN: True
|
||||
|
||||
DATASETS:
|
||||
NAMES: ("Market1501",)
|
||||
TESTS: ("Market1501",)
|
||||
|
||||
OUTPUT_DIR: "logs/market1501/bagtricks_R101-ibn"
|
||||
OUTPUT_DIR: logs/market1501/bagtricks_R101-ibn
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
_BASE_: "../Base-bagtricks.yml"
|
||||
_BASE_: ../Base-bagtricks.yml
|
||||
|
||||
MODEL:
|
||||
BACKBONE:
|
||||
|
@ -8,4 +8,4 @@ DATASETS:
|
|||
NAMES: ("Market1501",)
|
||||
TESTS: ("Market1501",)
|
||||
|
||||
OUTPUT_DIR: "logs/market1501/bagtricks_R50-ibn"
|
||||
OUTPUT_DIR: logs/market1501/bagtricks_R50-ibn
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
_BASE_: "../Base-bagtricks.yml"
|
||||
_BASE_: ../Base-bagtricks.yml
|
||||
|
||||
DATASETS:
|
||||
NAMES: ("Market1501",)
|
||||
TESTS: ("Market1501",)
|
||||
|
||||
OUTPUT_DIR: "logs/market1501/bagtricks_R50"
|
||||
OUTPUT_DIR: logs/market1501/bagtricks_R50
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
_BASE_: "../Base-bagtricks.yml"
|
||||
_BASE_: ../Base-bagtricks.yml
|
||||
|
||||
MODEL:
|
||||
BACKBONE:
|
||||
NAME: "build_resnest_backbone"
|
||||
NAME: build_resnest_backbone
|
||||
|
||||
DATASETS:
|
||||
NAMES: ("Market1501",)
|
||||
TESTS: ("Market1501",)
|
||||
|
||||
OUTPUT_DIR: "logs/market1501/bagtricks_S50"
|
||||
OUTPUT_DIR: logs/market1501/bagtricks_S50
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
_BASE_: "../Base-MGN.yml"
|
||||
_BASE_: ../Base-MGN.yml
|
||||
|
||||
MODEL:
|
||||
BACKBONE:
|
||||
|
@ -8,4 +8,4 @@ DATASETS:
|
|||
NAMES: ("Market1501",)
|
||||
TESTS: ("Market1501",)
|
||||
|
||||
OUTPUT_DIR: "logs/market1501/mgn_R50-ibn"
|
||||
OUTPUT_DIR: logs/market1501/mgn_R50-ibn
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
_BASE_: "../Base-SBS.yml"
|
||||
_BASE_: ../Base-SBS.yml
|
||||
|
||||
MODEL:
|
||||
BACKBONE:
|
||||
DEPTH: "101x"
|
||||
DEPTH: 101x
|
||||
WITH_IBN: True
|
||||
|
||||
DATASETS:
|
||||
NAMES: ("Market1501",)
|
||||
TESTS: ("Market1501",)
|
||||
|
||||
OUTPUT_DIR: "logs/market1501/sbs_R101-ibn"
|
||||
OUTPUT_DIR: logs/market1501/sbs_R101-ibn
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
_BASE_: "../Base-SBS.yml"
|
||||
_BASE_: ../Base-SBS.yml
|
||||
|
||||
MODEL:
|
||||
BACKBONE:
|
||||
|
@ -8,4 +8,4 @@ DATASETS:
|
|||
NAMES: ("Market1501",)
|
||||
TESTS: ("Market1501",)
|
||||
|
||||
OUTPUT_DIR: "logs/market1501/sbs_R50-ibn"
|
||||
OUTPUT_DIR: logs/market1501/sbs_R50-ibn
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
_BASE_: "../Base-SBS.yml"
|
||||
_BASE_: ../Base-SBS.yml
|
||||
|
||||
DATASETS:
|
||||
NAMES: ("Market1501",)
|
||||
TESTS: ("Market1501",)
|
||||
|
||||
OUTPUT_DIR: "logs/market1501/sbs_R50"
|
||||
OUTPUT_DIR: logs/market1501/sbs_R50
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
_BASE_: "../Base-SBS.yml"
|
||||
_BASE_: ../Base-SBS.yml
|
||||
|
||||
MODEL:
|
||||
BACKBONE:
|
||||
NAME: "build_resnest_backbone"
|
||||
NAME: build_resnest_backbone
|
||||
|
||||
DATASETS:
|
||||
NAMES: ("Market1501",)
|
||||
TESTS: ("Market1501",)
|
||||
|
||||
OUTPUT_DIR: "logs/market1501/sbs_S50"
|
||||
OUTPUT_DIR: logs/market1501/sbs_S50
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
_BASE_: "../Base-bagtricks.yml"
|
||||
_BASE_: ../Base-bagtricks.yml
|
||||
|
||||
INPUT:
|
||||
SIZE_TRAIN: [256, 256]
|
||||
|
@ -22,7 +22,7 @@ SOLVER:
|
|||
IMS_PER_BATCH: 128
|
||||
MAX_ITER: 60
|
||||
STEPS: [30, 50]
|
||||
WARMUP_ITERS: 10
|
||||
WARMUP_EPOCHS: 10
|
||||
|
||||
CHECKPOINT_PERIOD: 20
|
||||
|
||||
|
@ -30,4 +30,4 @@ TEST:
|
|||
EVAL_PERIOD: 20
|
||||
IMS_PER_BATCH: 128
|
||||
|
||||
OUTPUT_DIR: "logs/veriwild/bagtricks_R50-ibn_4gpu"
|
||||
OUTPUT_DIR: logs/veriwild/bagtricks_R50-ibn_4gpu
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
_BASE_: "../Base-SBS.yml"
|
||||
_BASE_: ../Base-SBS.yml
|
||||
|
||||
INPUT:
|
||||
SIZE_TRAIN: [256, 256]
|
||||
|
@ -9,14 +9,14 @@ MODEL:
|
|||
WITH_IBN: True
|
||||
|
||||
SOLVER:
|
||||
OPT: "SGD"
|
||||
OPT: SGD
|
||||
BASE_LR: 0.01
|
||||
ETA_MIN_LR: 7.7e-5
|
||||
|
||||
IMS_PER_BATCH: 64
|
||||
MAX_ITER: 60
|
||||
DELAY_ITERS: 30
|
||||
WARMUP_ITERS: 10
|
||||
WARMUP_EPOCHS: 10
|
||||
FREEZE_ITERS: 10
|
||||
|
||||
CHECKPOINT_PERIOD: 20
|
||||
|
@ -29,4 +29,4 @@ TEST:
|
|||
EVAL_PERIOD: 20
|
||||
IMS_PER_BATCH: 128
|
||||
|
||||
OUTPUT_DIR: "logs/veri/sbs_R50-ibn"
|
||||
OUTPUT_DIR: logs/veri/sbs_R50-ibn
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
_BASE_: "../Base-bagtricks.yml"
|
||||
_BASE_: ../Base-bagtricks.yml
|
||||
|
||||
INPUT:
|
||||
SIZE_TRAIN: [256, 256]
|
||||
|
@ -24,7 +24,7 @@ SOLVER:
|
|||
IMS_PER_BATCH: 512
|
||||
MAX_ITER: 60
|
||||
STEPS: [30, 50]
|
||||
WARMUP_ITERS: 10
|
||||
WARMUP_EPOCHS: 10
|
||||
|
||||
CHECKPOINT_PERIOD: 20
|
||||
|
||||
|
@ -32,4 +32,4 @@ TEST:
|
|||
EVAL_PERIOD: 20
|
||||
IMS_PER_BATCH: 128
|
||||
|
||||
OUTPUT_DIR: "logs/vehicleid/bagtricks_R50-ibn_4gpu"
|
||||
OUTPUT_DIR: logs/vehicleid/bagtricks_R50-ibn_4gpu
|
||||
|
|
|
@ -25,6 +25,9 @@ _C.MODEL.META_ARCHITECTURE = "Baseline"
|
|||
|
||||
_C.MODEL.FREEZE_LAYERS = ['']
|
||||
|
||||
# MoCo memory size
|
||||
_C.MODEL.QUEUE_SIZE = 8192
|
||||
|
||||
# ---------------------------------------------------------------------------- #
|
||||
# Backbone options
|
||||
# ---------------------------------------------------------------------------- #
|
||||
|
@ -120,6 +123,13 @@ _C.MODEL.PIXEL_MEAN = [0.485*255, 0.456*255, 0.406*255]
|
|||
# Values to be used for image normalization
|
||||
_C.MODEL.PIXEL_STD = [0.229*255, 0.224*255, 0.225*255]
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# KNOWLEDGE DISTILLATION
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
_C.KD = CN()
|
||||
_C.KD.MODEL_CONFIG = ""
|
||||
_C.KD.MODEL_WEIGHTS = ""
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# INPUT
|
||||
|
@ -148,6 +158,9 @@ _C.INPUT.CJ.CONTRAST = 0.15
|
|||
_C.INPUT.CJ.SATURATION = 0.1
|
||||
_C.INPUT.CJ.HUE = 0.1
|
||||
|
||||
# Random Affine
|
||||
_C.INPUT.DO_AFFINE = False
|
||||
|
||||
# Auto augmentation
|
||||
_C.INPUT.DO_AUTOAUG = False
|
||||
_C.INPUT.AUTOAUG_PROB = 0.0
|
||||
|
@ -160,7 +173,7 @@ _C.INPUT.AUGMIX_PROB = 0.0
|
|||
_C.INPUT.REA = CN()
|
||||
_C.INPUT.REA.ENABLED = False
|
||||
_C.INPUT.REA.PROB = 0.5
|
||||
_C.INPUT.REA.VALUE = [0.596*255, 0.558*255, 0.497*255]
|
||||
_C.INPUT.REA.VALUE = [0.485*255, 0.456*255, 0.406*255]
|
||||
# Random Patch
|
||||
_C.INPUT.RPT = CN()
|
||||
_C.INPUT.RPT.ENABLED = False
|
||||
|
@ -207,6 +220,7 @@ _C.SOLVER.BIAS_LR_FACTOR = 1.
|
|||
_C.SOLVER.HEADS_LR_FACTOR = 1.
|
||||
|
||||
_C.SOLVER.MOMENTUM = 0.9
|
||||
_C.SOLVER.NESTEROV = True
|
||||
|
||||
_C.SOLVER.WEIGHT_DECAY = 0.0005
|
||||
_C.SOLVER.WEIGHT_DECAY_BIAS = 0.
|
||||
|
@ -224,7 +238,7 @@ _C.SOLVER.ETA_MIN_LR = 1e-7
|
|||
|
||||
# Warmup options
|
||||
_C.SOLVER.WARMUP_FACTOR = 0.1
|
||||
_C.SOLVER.WARMUP_ITERS = 10
|
||||
_C.SOLVER.WARMUP_EPOCHS = 10
|
||||
_C.SOLVER.WARMUP_METHOD = "linear"
|
||||
|
||||
# Backbone freeze iters
|
||||
|
|
|
@ -59,7 +59,7 @@ def build_reid_train_loader(cfg, mapper=None, **kwargs):
|
|||
return train_loader
|
||||
|
||||
|
||||
def build_reid_test_loader(cfg, dataset_name, **kwargs):
|
||||
def build_reid_test_loader(cfg, dataset_name, mapper=None, **kwargs):
|
||||
cfg = cfg.clone()
|
||||
|
||||
dataset = DATASET_REGISTRY.get(dataset_name)(root=_root, **kwargs)
|
||||
|
@ -67,8 +67,12 @@ def build_reid_test_loader(cfg, dataset_name, **kwargs):
|
|||
dataset.show_test()
|
||||
test_items = dataset.query + dataset.gallery
|
||||
|
||||
test_transforms = build_transforms(cfg, is_train=False)
|
||||
test_set = CommDataset(test_items, test_transforms, relabel=False)
|
||||
if mapper is not None:
|
||||
transforms = mapper
|
||||
else:
|
||||
transforms = build_transforms(cfg, is_train=False)
|
||||
|
||||
test_set = CommDataset(test_items, transforms, relabel=False)
|
||||
|
||||
mini_batch_size = cfg.TEST.IMS_PER_BATCH // comm.get_world_size()
|
||||
data_sampler = samplers.InferenceSampler(len(test_set))
|
||||
|
|
|
@ -4,7 +4,6 @@
|
|||
@contact: sherlockliao01@gmail.com
|
||||
"""
|
||||
|
||||
|
||||
from .autoaugment import *
|
||||
from .build import build_transforms
|
||||
from .transforms import *
|
||||
from .autoaugment import *
|
||||
|
|
|
@ -41,6 +41,9 @@ def build_transforms(cfg, is_train=True):
|
|||
cj_saturation = cfg.INPUT.CJ.SATURATION
|
||||
cj_hue = cfg.INPUT.CJ.HUE
|
||||
|
||||
# random affine
|
||||
do_affine = cfg.INPUT.DO_AFFINE
|
||||
|
||||
# random erasing
|
||||
do_rea = cfg.INPUT.REA.ENABLED
|
||||
rea_prob = cfg.INPUT.REA.PROB
|
||||
|
@ -60,9 +63,11 @@ def build_transforms(cfg, is_train=True):
|
|||
res.extend([T.Pad(padding, padding_mode=padding_mode), T.RandomCrop(size_train)])
|
||||
if do_cj:
|
||||
res.append(T.RandomApply([T.ColorJitter(cj_brightness, cj_contrast, cj_saturation, cj_hue)], p=cj_prob))
|
||||
if do_affine:
|
||||
res.append(T.RandomAffine(degrees=0, translate=None, scale=[0.9, 1.1], shear=None, resample=False,
|
||||
fillcolor=128))
|
||||
if do_augmix:
|
||||
res.append(T.RandomApply([AugMix()], p=augmix_prob))
|
||||
|
||||
res.append(AugMix(prob=augmix_prob))
|
||||
res.append(ToTensor())
|
||||
if do_rea:
|
||||
res.append(T.RandomErasing(p=rea_prob, value=rea_value))
|
||||
|
|
|
@ -114,38 +114,38 @@ def solarize(pil_img, level, *args):
|
|||
return ImageOps.solarize(pil_img, 256 - level)
|
||||
|
||||
|
||||
def shear_x(pil_img, level, image_size):
|
||||
def shear_x(pil_img, level):
|
||||
level = float_parameter(sample_level(level), 0.3)
|
||||
if np.random.uniform() > 0.5:
|
||||
level = -level
|
||||
return pil_img.transform(image_size,
|
||||
return pil_img.transform(pil_img.size,
|
||||
Image.AFFINE, (1, level, 0, 0, 1, 0),
|
||||
resample=Image.BILINEAR)
|
||||
|
||||
|
||||
def shear_y(pil_img, level, image_size):
|
||||
def shear_y(pil_img, level):
|
||||
level = float_parameter(sample_level(level), 0.3)
|
||||
if np.random.uniform() > 0.5:
|
||||
level = -level
|
||||
return pil_img.transform(image_size,
|
||||
return pil_img.transform(pil_img.size,
|
||||
Image.AFFINE, (1, 0, 0, level, 1, 0),
|
||||
resample=Image.BILINEAR)
|
||||
|
||||
|
||||
def translate_x(pil_img, level, image_size):
|
||||
level = int_parameter(sample_level(level), image_size[0] / 3)
|
||||
def translate_x(pil_img, level):
|
||||
level = int_parameter(sample_level(level), pil_img.size[0] / 3)
|
||||
if np.random.random() > 0.5:
|
||||
level = -level
|
||||
return pil_img.transform(image_size,
|
||||
return pil_img.transform(pil_img.size,
|
||||
Image.AFFINE, (1, 0, level, 0, 1, 0),
|
||||
resample=Image.BILINEAR)
|
||||
|
||||
|
||||
def translate_y(pil_img, level, image_size):
|
||||
level = int_parameter(sample_level(level), image_size[1] / 3)
|
||||
def translate_y(pil_img, level):
|
||||
level = int_parameter(sample_level(level), pil_img.size[1] / 3)
|
||||
if np.random.random() > 0.5:
|
||||
level = -level
|
||||
return pil_img.transform(image_size,
|
||||
return pil_img.transform(pil_img.size,
|
||||
Image.AFFINE, (1, 0, 0, 0, 1, level),
|
||||
resample=Image.BILINEAR)
|
||||
|
||||
|
@ -174,17 +174,7 @@ def sharpness(pil_img, level, *args):
|
|||
return ImageEnhance.Sharpness(pil_img).enhance(level)
|
||||
|
||||
|
||||
augmentations_reid = [
|
||||
autocontrast, equalize, posterize, shear_x, shear_y,
|
||||
color, contrast, brightness, sharpness
|
||||
]
|
||||
|
||||
augmentations = [
|
||||
autocontrast, equalize, posterize, rotate, solarize, shear_x, shear_y,
|
||||
translate_x, translate_y
|
||||
]
|
||||
|
||||
augmentations_all = [
|
||||
autocontrast, equalize, posterize, rotate, solarize, shear_x, shear_y,
|
||||
translate_x, translate_y, color, contrast, brightness, sharpness
|
||||
]
|
||||
|
|
|
@ -13,7 +13,7 @@ from collections import deque
|
|||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
from .functional import to_tensor, augmentations_reid
|
||||
from .functional import to_tensor, augmentations
|
||||
|
||||
|
||||
class ToTensor(object):
|
||||
|
@ -122,38 +122,45 @@ class RandomPatch(object):
|
|||
class AugMix(object):
|
||||
""" Perform AugMix augmentation and compute mixture.
|
||||
Args:
|
||||
prob: Probability of taking augmix
|
||||
aug_prob_coeff: Probability distribution coefficients.
|
||||
mixture_width: Number of augmentation chains to mix per augmented example.
|
||||
mixture_depth: Depth of augmentation chains. -1 denotes stochastic depth in [1, 3]'
|
||||
severity: Severity of underlying augmentation operators (between 1 to 10).
|
||||
aug_severity: Severity of underlying augmentation operators (between 1 to 10).
|
||||
"""
|
||||
|
||||
def __init__(self, aug_prob_coeff=1, mixture_width=3, mixture_depth=-1, severity=1):
|
||||
def __init__(self, prob=0.5, aug_prob_coeff=0.1, mixture_width=3, mixture_depth=1, aug_severity=1):
|
||||
self.prob = prob
|
||||
self.aug_prob_coeff = aug_prob_coeff
|
||||
self.mixture_width = mixture_width
|
||||
self.mixture_depth = mixture_depth
|
||||
self.severity = severity
|
||||
self.aug_list = augmentations_reid
|
||||
self.aug_severity = aug_severity
|
||||
self.augmentations = augmentations
|
||||
|
||||
def __call__(self, image):
|
||||
"""Perform AugMix augmentations and compute mixture.
|
||||
Returns:
|
||||
mixed: Augmented and mixed image.
|
||||
"""
|
||||
if random.random() > self.prob:
|
||||
return np.asarray(image)
|
||||
|
||||
ws = np.float32(
|
||||
np.random.dirichlet([self.aug_prob_coeff] * self.mixture_width))
|
||||
m = np.float32(np.random.beta(self.aug_prob_coeff, self.aug_prob_coeff))
|
||||
|
||||
image = np.asarray(image, dtype=np.float32).copy()
|
||||
mix = np.zeros_like(image)
|
||||
h, w = image.shape[0], image.shape[1]
|
||||
# image = np.asarray(image, dtype=np.float32).copy()
|
||||
# mix = np.zeros_like(image)
|
||||
mix = np.zeros([image.size[1], image.size[0], 3])
|
||||
# h, w = image.shape[0], image.shape[1]
|
||||
for i in range(self.mixture_width):
|
||||
image_aug = Image.fromarray(image.copy().astype(np.uint8))
|
||||
image_aug = image.copy()
|
||||
# image_aug = Image.fromarray(image.copy().astype(np.uint8))
|
||||
depth = self.mixture_depth if self.mixture_depth > 0 else np.random.randint(1, 4)
|
||||
for _ in range(depth):
|
||||
op = np.random.choice(self.aug_list)
|
||||
image_aug = op(image_aug, self.severity, (w, h))
|
||||
mix += ws[i] * np.asarray(image_aug, dtype=np.float32)
|
||||
op = np.random.choice(self.augmentations)
|
||||
image_aug = op(image_aug, self.aug_severity)
|
||||
mix += ws[i] * np.asarray(image_aug)
|
||||
|
||||
mixed = (1 - m) * image + m * mix
|
||||
return mixed
|
||||
return mixed.astype(np.uint8)
|
||||
|
|
|
@ -233,8 +233,7 @@ class DefaultTrainer(TrainerBase):
|
|||
model, data_loader, optimizer
|
||||
)
|
||||
|
||||
self.iters_per_epoch = len(data_loader.dataset) // cfg.SOLVER.IMS_PER_BATCH
|
||||
self.scheduler = self.build_lr_scheduler(cfg, optimizer, self.iters_per_epoch)
|
||||
self.scheduler = self.build_lr_scheduler(cfg, optimizer)
|
||||
|
||||
# Assume no other objects need to be checkpointed.
|
||||
# We can later make it checkpoint the stateful hooks
|
||||
|
@ -246,16 +245,13 @@ class DefaultTrainer(TrainerBase):
|
|||
**optimizer_ckpt,
|
||||
**self.scheduler,
|
||||
)
|
||||
|
||||
self.iters_per_epoch = len(data_loader.dataset) // cfg.SOLVER.IMS_PER_BATCH
|
||||
|
||||
self.start_epoch = 0
|
||||
|
||||
# if cfg.SOLVER.SWA.ENABLED:
|
||||
# self.max_iter = cfg.SOLVER.MAX_ITER + cfg.SOLVER.SWA.ITER
|
||||
# else:
|
||||
# self.max_iter = cfg.SOLVER.MAX_ITER
|
||||
|
||||
self.max_epoch = cfg.SOLVER.MAX_EPOCH
|
||||
self.max_iter = self.max_epoch * self.iters_per_epoch
|
||||
self.warmup_iters = cfg.SOLVER.WARMUP_ITERS
|
||||
self.warmup_epochs = cfg.SOLVER.WARMUP_EPOCHS
|
||||
self.delay_epochs = cfg.SOLVER.DELAY_EPOCHS
|
||||
self.cfg = cfg
|
||||
|
||||
|
@ -413,15 +409,11 @@ class DefaultTrainer(TrainerBase):
|
|||
return build_optimizer(cfg, model)
|
||||
|
||||
@classmethod
|
||||
def build_lr_scheduler(cls, cfg, optimizer, iters_per_epoch):
|
||||
def build_lr_scheduler(cls, cfg, optimizer):
|
||||
"""
|
||||
It now calls :func:`fastreid.solver.build_lr_scheduler`.
|
||||
Overwrite it if you'd like a different scheduler.
|
||||
"""
|
||||
cfg = cfg.clone()
|
||||
cfg.defrost()
|
||||
cfg.SOLVER.MAX_EPOCH = cfg.SOLVER.MAX_EPOCH - max(
|
||||
math.ceil(cfg.SOLVER.WARMUP_ITERS / iters_per_epoch), cfg.SOLVER.DELAY_EPOCHS)
|
||||
return build_lr_scheduler(cfg, optimizer)
|
||||
|
||||
@classmethod
|
||||
|
@ -429,7 +421,7 @@ class DefaultTrainer(TrainerBase):
|
|||
"""
|
||||
Returns:
|
||||
iterable
|
||||
It now calls :func:`fastreid.data.build_detection_train_loader`.
|
||||
It now calls :func:`fastreid.data.build_reid_train_loader`.
|
||||
Overwrite it if you'd like a different data loader.
|
||||
"""
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@ -441,7 +433,7 @@ class DefaultTrainer(TrainerBase):
|
|||
"""
|
||||
Returns:
|
||||
iterable
|
||||
It now calls :func:`fastreid.data.build_detection_test_loader`.
|
||||
It now calls :func:`fastreid.data.build_reid_test_loader`.
|
||||
Overwrite it if you'd like a different data loader.
|
||||
"""
|
||||
return build_reid_test_loader(cfg, dataset_name)
|
||||
|
|
|
@ -250,14 +250,11 @@ class LRScheduler(HookBase):
|
|||
lr = self._optimizer.param_groups[self._best_param_group_id]["lr"]
|
||||
self.trainer.storage.put_scalar("lr", lr, smoothing_hint=False)
|
||||
|
||||
next_iter = self.trainer.iter + 1
|
||||
if next_iter < self.trainer.warmup_iters:
|
||||
self._scheduler["warmup_sched"].step()
|
||||
|
||||
def after_epoch(self):
|
||||
next_iter = self.trainer.iter
|
||||
next_epoch = self.trainer.epoch + 1
|
||||
if next_iter >= self.trainer.warmup_iters and next_epoch >= self.trainer.delay_epochs:
|
||||
if next_epoch <= self.trainer.warmup_epochs:
|
||||
self._scheduler["warmup_sched"].step()
|
||||
elif next_epoch >= self.trainer.delay_epochs:
|
||||
self._scheduler["lr_sched"].step()
|
||||
|
||||
|
||||
|
@ -459,7 +456,6 @@ class LayerFreeze(HookBase):
|
|||
self.fc_freeze_iters = fc_freeze_iters
|
||||
|
||||
self.is_frozen = False
|
||||
|
||||
self.fc_frozen = False
|
||||
|
||||
def before_step(self):
|
||||
|
|
|
@ -236,14 +236,7 @@ class SimpleTrainer(TrainerBase):
|
|||
If your want to do something with the heads, you can wrap the model.
|
||||
"""
|
||||
|
||||
outs = self.model(data)
|
||||
|
||||
# Compute loss
|
||||
if isinstance(self.model, DistributedDataParallel):
|
||||
loss_dict = self.model.module.losses(outs)
|
||||
else:
|
||||
loss_dict = self.model.losses(outs)
|
||||
|
||||
loss_dict = self.model(data)
|
||||
losses = sum(loss_dict.values())
|
||||
|
||||
"""
|
||||
|
@ -251,6 +244,7 @@ class SimpleTrainer(TrainerBase):
|
|||
wrap the optimizer with your custom `zero_grad()` method.
|
||||
"""
|
||||
self.optimizer.zero_grad()
|
||||
|
||||
losses.backward()
|
||||
|
||||
self._write_metrics(loss_dict, data_time)
|
||||
|
@ -308,6 +302,7 @@ class AMPTrainer(SimpleTrainer):
|
|||
Like :class:`SimpleTrainer`, but uses apex automatic mixed precision
|
||||
in the training loop.
|
||||
"""
|
||||
|
||||
def run_step(self):
|
||||
"""
|
||||
Implement the AMP training logic.
|
||||
|
@ -319,14 +314,7 @@ class AMPTrainer(SimpleTrainer):
|
|||
data = next(self._data_loader_iter)
|
||||
data_time = time.perf_counter() - start
|
||||
|
||||
outs = self.model(data)
|
||||
|
||||
# Compute loss
|
||||
if isinstance(self.model, DistributedDataParallel):
|
||||
loss_dict = self.model.module.losses(outs)
|
||||
else:
|
||||
loss_dict = self.model.losses(outs)
|
||||
|
||||
loss_dict = self.model(data)
|
||||
losses = sum(loss_dict.values())
|
||||
|
||||
self.optimizer.zero_grad()
|
||||
|
|
|
@ -6,19 +6,18 @@
|
|||
import copy
|
||||
import logging
|
||||
from collections import OrderedDict
|
||||
from sklearn import metrics
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from sklearn import metrics
|
||||
|
||||
from fastreid.utils import comm
|
||||
from fastreid.utils.compute_dist import build_dist
|
||||
from .evaluator import DatasetEvaluator
|
||||
from .query_expansion import aqe
|
||||
from .rank import evaluate_rank
|
||||
from .rerank import re_ranking
|
||||
from .roc import evaluate_roc
|
||||
from fastreid.utils import comm
|
||||
from fastreid.utils.compute_dist import build_dist
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -103,10 +102,10 @@ class ReidEvaluator(DatasetEvaluator):
|
|||
mAP = np.mean(all_AP)
|
||||
mINP = np.mean(all_INP)
|
||||
for r in [1, 5, 10]:
|
||||
self._results['Rank-{}'.format(r)] = cmc[r - 1]
|
||||
self._results['mAP'] = mAP
|
||||
self._results['mINP'] = mINP
|
||||
self._results["metric"] = (mAP + cmc[0]) / 2
|
||||
self._results['Rank-{}'.format(r)] = cmc[r - 1] * 100
|
||||
self._results['mAP'] = mAP * 100
|
||||
self._results['mINP'] = mINP * 100
|
||||
self._results["metric"] = (mAP + cmc[0]) / 2 * 100
|
||||
|
||||
if self.cfg.TEST.ROC_ENABLED:
|
||||
scores, labels = evaluate_roc(dist, query_pids, gallery_pids, query_camids, gallery_camids)
|
||||
|
|
|
@ -30,7 +30,7 @@ def print_csv_format(results):
|
|||
table = tabulate(
|
||||
csv_results,
|
||||
tablefmt="pipe",
|
||||
floatfmt=".2%",
|
||||
floatfmt=".2f",
|
||||
headers=metrics,
|
||||
numalign="left",
|
||||
)
|
||||
|
|
|
@ -20,6 +20,8 @@ class ArcSoftmax(nn.Module):
|
|||
self.s = cfg.MODEL.HEADS.SCALE
|
||||
self.m = cfg.MODEL.HEADS.MARGIN
|
||||
|
||||
self.easy_margin = False
|
||||
|
||||
self.cos_m = math.cos(self.m)
|
||||
self.sin_m = math.sin(self.m)
|
||||
self.threshold = math.cos(math.pi - self.m)
|
||||
|
@ -30,26 +32,18 @@ class ArcSoftmax(nn.Module):
|
|||
self.register_buffer('t', torch.zeros(1))
|
||||
|
||||
def forward(self, features, targets):
|
||||
# get cos(theta)
|
||||
cos_theta = F.linear(F.normalize(features), F.normalize(self.weight))
|
||||
cos_theta = cos_theta.clamp(-1, 1) # for numerical stability
|
||||
|
||||
target_logit = cos_theta[torch.arange(0, features.size(0)), targets].view(-1, 1)
|
||||
|
||||
sin_theta = torch.sqrt(1.0 - torch.pow(target_logit, 2))
|
||||
cos_theta_m = target_logit * self.cos_m - sin_theta * self.sin_m # cos(target+margin)
|
||||
mask = cos_theta > cos_theta_m
|
||||
final_target_logit = torch.where(target_logit > self.threshold,
|
||||
cos_theta_m.to(target_logit),
|
||||
target_logit - self.mm)
|
||||
|
||||
hard_example = cos_theta[mask]
|
||||
with torch.no_grad():
|
||||
self.t = target_logit.mean() * 0.01 + (1 - 0.01) * self.t
|
||||
cos_theta[mask] = hard_example * (self.t + hard_example).to(hard_example.dtype)
|
||||
cos_theta.scatter_(1, targets.view(-1, 1).long(), final_target_logit)
|
||||
pred_class_logits = cos_theta * self.s
|
||||
return pred_class_logits
|
||||
cosine = F.linear(F.normalize(features), F.normalize(self.weight))
|
||||
sine = torch.sqrt(1.0 - torch.pow(cosine, 2))
|
||||
phi = cosine * self.cos_m - sine * self.sin_m # cos(theta + m)
|
||||
if self.easy_margin:
|
||||
phi = torch.where(cosine > 0, phi, cosine)
|
||||
else:
|
||||
phi = torch.where(cosine > self.threshold, phi, cosine - self.mm)
|
||||
one_hot = torch.zeros(cosine.size(), device=cosine.device)
|
||||
one_hot.scatter_(1, targets.view(-1, 1).long(), 1)
|
||||
output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
|
||||
output *= self.s
|
||||
return output
|
||||
|
||||
def extra_repr(self):
|
||||
return 'in_features={}, num_classes={}, scale={}, margin={}'.format(
|
||||
|
|
|
@ -80,7 +80,7 @@ class GhostBatchNorm(BatchNorm):
|
|||
self.weight, self.bias, False, self.momentum, self.eps)
|
||||
|
||||
|
||||
class FrozenBatchNorm(BatchNorm):
|
||||
class FrozenBatchNorm(nn.Module):
|
||||
"""
|
||||
BatchNorm2d where the batch statistics and the affine parameters are fixed.
|
||||
It contains non-trainable buffers called
|
||||
|
@ -99,9 +99,13 @@ class FrozenBatchNorm(BatchNorm):
|
|||
_version = 3
|
||||
|
||||
def __init__(self, num_features, eps=1e-5, **kwargs):
|
||||
super().__init__(num_features, weight_freeze=True, bias_freeze=True, **kwargs)
|
||||
super().__init__()
|
||||
self.num_features = num_features
|
||||
self.eps = eps
|
||||
self.register_buffer("weight", torch.ones(num_features))
|
||||
self.register_buffer("bias", torch.zeros(num_features))
|
||||
self.register_buffer("running_mean", torch.zeros(num_features))
|
||||
self.register_buffer("running_var", torch.ones(num_features) - eps)
|
||||
|
||||
def forward(self, x):
|
||||
if x.requires_grad:
|
||||
|
@ -198,9 +202,9 @@ def get_norm(norm, out_channels, **kwargs):
|
|||
return None
|
||||
norm = {
|
||||
"BN": BatchNorm,
|
||||
"syncBN": SyncBatchNorm,
|
||||
"GhostBN": GhostBatchNorm,
|
||||
"FrozenBN": FrozenBatchNorm,
|
||||
"GN": lambda channels, **args: nn.GroupNorm(32, channels),
|
||||
"syncBN": SyncBatchNorm,
|
||||
}[norm]
|
||||
return norm(out_channels, **kwargs)
|
||||
|
|
|
@ -11,3 +11,4 @@ from .osnet import build_osnet_backbone
|
|||
from .resnest import build_resnest_backbone
|
||||
from .resnext import build_resnext_backbone
|
||||
from .regnet import build_regnet_backbone, build_effnet_backbone
|
||||
from .shufflenet import build_shufflenetv2_backbone
|
||||
|
|
|
@ -183,6 +183,7 @@ class ResNet(nn.Module):
|
|||
x = self.relu(x)
|
||||
x = self.maxpool(x)
|
||||
|
||||
# layer 1
|
||||
NL1_counter = 0
|
||||
if len(self.NL_1_idx) == 0:
|
||||
self.NL_1_idx = [-1]
|
||||
|
@ -192,7 +193,7 @@ class ResNet(nn.Module):
|
|||
_, C, H, W = x.shape
|
||||
x = self.NL_1[NL1_counter](x)
|
||||
NL1_counter += 1
|
||||
# Layer 2
|
||||
# layer 2
|
||||
NL2_counter = 0
|
||||
if len(self.NL_2_idx) == 0:
|
||||
self.NL_2_idx = [-1]
|
||||
|
@ -202,7 +203,8 @@ class ResNet(nn.Module):
|
|||
_, C, H, W = x.shape
|
||||
x = self.NL_2[NL2_counter](x)
|
||||
NL2_counter += 1
|
||||
# Layer 3
|
||||
|
||||
# layer 3
|
||||
NL3_counter = 0
|
||||
if len(self.NL_3_idx) == 0:
|
||||
self.NL_3_idx = [-1]
|
||||
|
@ -212,7 +214,8 @@ class ResNet(nn.Module):
|
|||
_, C, H, W = x.shape
|
||||
x = self.NL_3[NL3_counter](x)
|
||||
NL3_counter += 1
|
||||
# Layer 4
|
||||
|
||||
# layer 4
|
||||
NL4_counter = 0
|
||||
if len(self.NL_4_idx) == 0:
|
||||
self.NL_4_idx = [-1]
|
||||
|
|
|
@ -0,0 +1,203 @@
|
|||
"""
|
||||
Author: Guan'an Wang
|
||||
Contact: guan.wang0706@gmail.com
|
||||
"""
|
||||
|
||||
import torch
|
||||
from torch import nn
|
||||
from collections import OrderedDict
|
||||
import logging
|
||||
from fastreid.utils.checkpoint import get_missing_parameters_message, get_unexpected_parameters_message
|
||||
|
||||
from fastreid.layers import get_norm
|
||||
from fastreid.modeling.backbones import BACKBONE_REGISTRY
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ShuffleV2Block(nn.Module):
|
||||
"""
|
||||
Reference:
|
||||
https://github.com/megvii-model/ShuffleNet-Series/tree/master/ShuffleNetV2
|
||||
"""
|
||||
|
||||
def __init__(self, bn_norm, inp, oup, mid_channels, *, ksize, stride):
|
||||
super(ShuffleV2Block, self).__init__()
|
||||
self.stride = stride
|
||||
assert stride in [1, 2]
|
||||
|
||||
self.mid_channels = mid_channels
|
||||
self.ksize = ksize
|
||||
pad = ksize // 2
|
||||
self.pad = pad
|
||||
self.inp = inp
|
||||
|
||||
outputs = oup - inp
|
||||
|
||||
branch_main = [
|
||||
# pw
|
||||
nn.Conv2d(inp, mid_channels, 1, 1, 0, bias=False),
|
||||
get_norm(bn_norm, mid_channels),
|
||||
nn.ReLU(inplace=True),
|
||||
# dw
|
||||
nn.Conv2d(mid_channels, mid_channels, ksize, stride, pad, groups=mid_channels, bias=False),
|
||||
get_norm(bn_norm, mid_channels),
|
||||
# pw-linear
|
||||
nn.Conv2d(mid_channels, outputs, 1, 1, 0, bias=False),
|
||||
get_norm(bn_norm, outputs),
|
||||
nn.ReLU(inplace=True),
|
||||
]
|
||||
self.branch_main = nn.Sequential(*branch_main)
|
||||
|
||||
if stride == 2:
|
||||
branch_proj = [
|
||||
# dw
|
||||
nn.Conv2d(inp, inp, ksize, stride, pad, groups=inp, bias=False),
|
||||
get_norm(bn_norm, inp),
|
||||
# pw-linear
|
||||
nn.Conv2d(inp, inp, 1, 1, 0, bias=False),
|
||||
get_norm(bn_norm, inp),
|
||||
nn.ReLU(inplace=True),
|
||||
]
|
||||
self.branch_proj = nn.Sequential(*branch_proj)
|
||||
else:
|
||||
self.branch_proj = None
|
||||
|
||||
def forward(self, old_x):
|
||||
if self.stride == 1:
|
||||
x_proj, x = self.channel_shuffle(old_x)
|
||||
return torch.cat((x_proj, self.branch_main(x)), 1)
|
||||
elif self.stride == 2:
|
||||
x_proj = old_x
|
||||
x = old_x
|
||||
return torch.cat((self.branch_proj(x_proj), self.branch_main(x)), 1)
|
||||
|
||||
def channel_shuffle(self, x):
|
||||
batchsize, num_channels, height, width = x.data.size()
|
||||
assert (num_channels % 4 == 0)
|
||||
x = x.reshape(batchsize * num_channels // 2, 2, height * width)
|
||||
x = x.permute(1, 0, 2)
|
||||
x = x.reshape(2, -1, num_channels // 2, height, width)
|
||||
return x[0], x[1]
|
||||
|
||||
|
||||
class ShuffleNetV2(nn.Module):
|
||||
"""
|
||||
Reference:
|
||||
https://github.com/megvii-model/ShuffleNet-Series/tree/master/ShuffleNetV2
|
||||
"""
|
||||
|
||||
def __init__(self, bn_norm, model_size='1.5x'):
|
||||
super(ShuffleNetV2, self).__init__()
|
||||
|
||||
self.stage_repeats = [4, 8, 4]
|
||||
self.model_size = model_size
|
||||
if model_size == '0.5x':
|
||||
self.stage_out_channels = [-1, 24, 48, 96, 192, 1024]
|
||||
elif model_size == '1.0x':
|
||||
self.stage_out_channels = [-1, 24, 116, 232, 464, 1024]
|
||||
elif model_size == '1.5x':
|
||||
self.stage_out_channels = [-1, 24, 176, 352, 704, 1024]
|
||||
elif model_size == '2.0x':
|
||||
self.stage_out_channels = [-1, 24, 244, 488, 976, 2048]
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
# building first layer
|
||||
input_channel = self.stage_out_channels[1]
|
||||
self.first_conv = nn.Sequential(
|
||||
nn.Conv2d(3, input_channel, 3, 2, 1, bias=False),
|
||||
get_norm(bn_norm, input_channel),
|
||||
nn.ReLU(inplace=True),
|
||||
)
|
||||
|
||||
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
|
||||
|
||||
self.features = []
|
||||
for idxstage in range(len(self.stage_repeats)):
|
||||
numrepeat = self.stage_repeats[idxstage]
|
||||
output_channel = self.stage_out_channels[idxstage + 2]
|
||||
|
||||
for i in range(numrepeat):
|
||||
if i == 0:
|
||||
self.features.append(ShuffleV2Block(bn_norm, input_channel, output_channel,
|
||||
mid_channels=output_channel // 2, ksize=3, stride=2))
|
||||
else:
|
||||
self.features.append(ShuffleV2Block(bn_norm, input_channel // 2, output_channel,
|
||||
mid_channels=output_channel // 2, ksize=3, stride=1))
|
||||
|
||||
input_channel = output_channel
|
||||
|
||||
self.features = nn.Sequential(*self.features)
|
||||
|
||||
self.conv_last = nn.Sequential(
|
||||
nn.Conv2d(input_channel, self.stage_out_channels[-1], 1, 1, 0, bias=False),
|
||||
get_norm(bn_norm, self.stage_out_channels[-1]),
|
||||
nn.ReLU(inplace=True)
|
||||
)
|
||||
|
||||
self._initialize_weights()
|
||||
|
||||
def forward(self, x):
|
||||
x = self.first_conv(x)
|
||||
x = self.maxpool(x)
|
||||
x = self.features(x)
|
||||
x = self.conv_last(x)
|
||||
|
||||
return x
|
||||
|
||||
def _initialize_weights(self):
|
||||
for name, m in self.named_modules():
|
||||
if isinstance(m, nn.Conv2d):
|
||||
if 'first' in name:
|
||||
nn.init.normal_(m.weight, 0, 0.01)
|
||||
else:
|
||||
nn.init.normal_(m.weight, 0, 1.0 / m.weight.shape[1])
|
||||
if m.bias is not None:
|
||||
nn.init.constant_(m.bias, 0)
|
||||
elif isinstance(m, nn.BatchNorm2d):
|
||||
nn.init.constant_(m.weight, 1)
|
||||
if m.bias is not None:
|
||||
nn.init.constant_(m.bias, 0.0001)
|
||||
nn.init.constant_(m.running_mean, 0)
|
||||
elif isinstance(m, nn.BatchNorm1d):
|
||||
nn.init.constant_(m.weight, 1)
|
||||
if m.bias is not None:
|
||||
nn.init.constant_(m.bias, 0.0001)
|
||||
nn.init.constant_(m.running_mean, 0)
|
||||
elif isinstance(m, nn.Linear):
|
||||
nn.init.normal_(m.weight, 0, 0.01)
|
||||
if m.bias is not None:
|
||||
nn.init.constant_(m.bias, 0)
|
||||
|
||||
|
||||
@BACKBONE_REGISTRY.register()
|
||||
def build_shufflenetv2_backbone(cfg):
|
||||
# fmt: off
|
||||
pretrain = cfg.MODEL.BACKBONE.PRETRAIN
|
||||
pretrain_path = cfg.MODEL.BACKBONE.PRETRAIN_PATH
|
||||
bn_norm = cfg.MODEL.BACKBONE.NORM
|
||||
model_size = cfg.MODEL.BACKBONE.DEPTH
|
||||
# fmt: on
|
||||
|
||||
model = ShuffleNetV2(bn_norm, model_size=model_size)
|
||||
|
||||
if pretrain:
|
||||
new_state_dict = OrderedDict()
|
||||
state_dict = torch.load(pretrain_path)["state_dict"]
|
||||
for k, v in state_dict.items():
|
||||
if k[:7] == 'module.':
|
||||
k = k[7:]
|
||||
new_state_dict[k] = v
|
||||
|
||||
incompatible = model.load_state_dict(new_state_dict, strict=False)
|
||||
if incompatible.missing_keys:
|
||||
logger.info(
|
||||
get_missing_parameters_message(incompatible.missing_keys)
|
||||
)
|
||||
if incompatible.unexpected_keys:
|
||||
logger.info(
|
||||
get_unexpected_parameters_message(incompatible.unexpected_keys)
|
||||
)
|
||||
|
||||
return model
|
|
@ -8,4 +8,3 @@ from .build import REID_HEADS_REGISTRY, build_heads
|
|||
|
||||
# import all the meta_arch, so they will be registered
|
||||
from .embedding_head import EmbeddingHead
|
||||
from .attr_head import AttrHead
|
||||
|
|
|
@ -16,9 +16,9 @@ The call is expected to return an :class:`ROIHeads`.
|
|||
"""
|
||||
|
||||
|
||||
def build_heads(cfg):
|
||||
def build_heads(cfg, **kwargs):
|
||||
"""
|
||||
Build REIDHeads defined by `cfg.MODEL.REID_HEADS.NAME`.
|
||||
"""
|
||||
head = cfg.MODEL.HEADS.NAME
|
||||
return REID_HEADS_REGISTRY.get(head)(cfg)
|
||||
return REID_HEADS_REGISTRY.get(head)(cfg, **kwargs)
|
||||
|
|
|
@ -50,7 +50,7 @@ class EmbeddingHead(nn.Module):
|
|||
|
||||
self.bottleneck = nn.Sequential(*bottleneck)
|
||||
|
||||
# identity classification layer
|
||||
# classification layer
|
||||
# fmt: off
|
||||
if cls_type == 'linear': self.classifier = nn.Linear(feat_dim, num_classes, bias=False)
|
||||
elif cls_type == 'arcSoftmax': self.classifier = ArcSoftmax(cfg, feat_dim, num_classes)
|
||||
|
|
|
@ -10,3 +10,5 @@ from .build import META_ARCH_REGISTRY, build_model
|
|||
# import all the meta_arch, so they will be registered
|
||||
from .baseline import Baseline
|
||||
from .mgn import MGN
|
||||
from .moco import MoCo
|
||||
from .distiller import Distiller
|
||||
|
|
|
@ -46,10 +46,8 @@ class Baseline(nn.Module):
|
|||
if targets.sum() < 0: targets.zero_()
|
||||
|
||||
outputs = self.heads(features, targets)
|
||||
return {
|
||||
"outputs": outputs,
|
||||
"targets": targets,
|
||||
}
|
||||
losses = self.losses(outputs, targets)
|
||||
return losses
|
||||
else:
|
||||
outputs = self.heads(features)
|
||||
return outputs
|
||||
|
@ -68,15 +66,13 @@ class Baseline(nn.Module):
|
|||
images.sub_(self.pixel_mean).div_(self.pixel_std)
|
||||
return images
|
||||
|
||||
def losses(self, outs):
|
||||
def losses(self, outputs, gt_labels):
|
||||
r"""
|
||||
Compute loss from modeling's outputs, the loss function input arguments
|
||||
must be the same as the outputs of the model forwarding.
|
||||
"""
|
||||
# fmt: off
|
||||
outputs = outs["outputs"]
|
||||
gt_labels = outs["targets"]
|
||||
# model predictions
|
||||
# fmt: off
|
||||
pred_class_logits = outputs['pred_class_logits'].detach()
|
||||
cls_outputs = outputs['cls_outputs']
|
||||
pred_features = outputs['features']
|
||||
|
|
|
@ -15,12 +15,12 @@ and expected to return a `nn.Module` object.
|
|||
"""
|
||||
|
||||
|
||||
def build_model(cfg):
|
||||
def build_model(cfg, **kwargs):
|
||||
"""
|
||||
Build the whole model architecture, defined by ``cfg.MODEL.META_ARCHITECTURE``.
|
||||
Note that it does not load any weights from ``cfg``.
|
||||
"""
|
||||
meta_arch = cfg.MODEL.META_ARCHITECTURE
|
||||
model = META_ARCH_REGISTRY.get(meta_arch)(cfg)
|
||||
model = META_ARCH_REGISTRY.get(meta_arch)(cfg, **kwargs)
|
||||
model.to(torch.device(cfg.MODEL.DEVICE))
|
||||
return model
|
||||
|
|
|
@ -0,0 +1,88 @@
|
|||
# encoding: utf-8
|
||||
"""
|
||||
@author: l1aoxingyu
|
||||
@contact: sherlockliao01@gmail.com
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
|
||||
from fastreid.config import get_cfg
|
||||
from fastreid.modeling.meta_arch import META_ARCH_REGISTRY, build_model, Baseline
|
||||
from fastreid.utils.checkpoint import Checkpointer
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@META_ARCH_REGISTRY.register()
|
||||
class Distiller(Baseline):
|
||||
def __init__(self, cfg):
|
||||
super(Distiller, self).__init__(cfg)
|
||||
|
||||
# Get teacher model config
|
||||
cfg_t = get_cfg()
|
||||
cfg_t.merge_from_file(cfg.KD.MODEL_CONFIG)
|
||||
|
||||
model_t = build_model(cfg_t)
|
||||
logger.info("Teacher model:\n{}".format(model_t))
|
||||
|
||||
# No gradients for teacher model
|
||||
for param in model_t.parameters():
|
||||
param.requires_grad_(False)
|
||||
|
||||
logger.info("Loading teacher model weights ...")
|
||||
Checkpointer(model_t).load(cfg.KD.MODEL_WEIGHTS)
|
||||
|
||||
# Not register teacher model as `nn.Module`, this is
|
||||
# make sure teacher model weights not saved
|
||||
self.model_t = [model_t.backbone, model_t.heads]
|
||||
|
||||
def forward(self, batched_inputs):
|
||||
if self.training:
|
||||
images = self.preprocess_image(batched_inputs)
|
||||
# student model forward
|
||||
s_feat = self.backbone(images)
|
||||
assert "targets" in batched_inputs, "Labels are missing in training!"
|
||||
targets = batched_inputs["targets"].to(self.device)
|
||||
|
||||
if targets.sum() < 0: targets.zero_()
|
||||
|
||||
s_outputs = self.heads(s_feat, targets)
|
||||
|
||||
# teacher model forward
|
||||
with torch.no_grad():
|
||||
t_feat = self.model_t[0](images)
|
||||
t_outputs = self.model_t[1](t_feat, targets)
|
||||
|
||||
losses = self.losses(s_outputs, t_outputs, targets)
|
||||
return losses
|
||||
|
||||
# Eval mode, just conventional reid feature extraction
|
||||
else:
|
||||
return super(Distiller, self).forward(batched_inputs)
|
||||
|
||||
def losses(self, s_outputs, t_outputs, gt_labels):
|
||||
r"""
|
||||
Compute loss from modeling's outputs, the loss function input arguments
|
||||
must be the same as the outputs of the model forwarding.
|
||||
"""
|
||||
loss_dict = super(Distiller, self).losses(s_outputs, gt_labels)
|
||||
|
||||
s_logits = s_outputs["pred_class_logits"]
|
||||
t_logits = t_outputs["pred_class_logits"].detach()
|
||||
loss_dict["loss_jsdiv"] = self.jsdiv_loss(s_logits, t_logits)
|
||||
|
||||
return loss_dict
|
||||
|
||||
@staticmethod
|
||||
def _kldiv(y_s, y_t, t):
|
||||
p_s = F.log_softmax(y_s / t, dim=1)
|
||||
p_t = F.softmax(y_t / t, dim=1)
|
||||
loss = F.kl_div(p_s, p_t, reduction="sum") * (t ** 2) / y_s.shape[0]
|
||||
return loss
|
||||
|
||||
def jsdiv_loss(self, y_s, y_t, t=16):
|
||||
loss = (self._kldiv(y_s, y_t, t) + self._kldiv(y_t, y_s, t)) / 2
|
||||
return loss
|
|
@ -111,17 +111,11 @@ class MGN(nn.Module):
|
|||
b32_outputs = self.b32_head(b32_feat, targets)
|
||||
b33_outputs = self.b33_head(b33_feat, targets)
|
||||
|
||||
return {
|
||||
"b1_outputs": b1_outputs,
|
||||
"b2_outputs": b2_outputs,
|
||||
"b21_outputs": b21_outputs,
|
||||
"b22_outputs": b22_outputs,
|
||||
"b3_outputs": b3_outputs,
|
||||
"b31_outputs": b31_outputs,
|
||||
"b32_outputs": b32_outputs,
|
||||
"b33_outputs": b33_outputs,
|
||||
"targets": targets,
|
||||
}
|
||||
losses = self.losses(b1_outputs,
|
||||
b2_outputs, b21_outputs, b22_outputs,
|
||||
b3_outputs, b31_outputs, b32_outputs, b33_outputs,
|
||||
targets)
|
||||
return losses
|
||||
else:
|
||||
b1_pool_feat = self.b1_head(b1_feat)
|
||||
b2_pool_feat = self.b2_head(b2_feat)
|
||||
|
@ -150,18 +144,12 @@ class MGN(nn.Module):
|
|||
images.sub_(self.pixel_mean).div_(self.pixel_std)
|
||||
return images
|
||||
|
||||
def losses(self, outs):
|
||||
# fmt: off
|
||||
b1_outputs = outs["b1_outputs"]
|
||||
b2_outputs = outs["b2_outputs"]
|
||||
b21_outputs = outs["b21_outputs"]
|
||||
b22_outputs = outs["b22_outputs"]
|
||||
b3_outputs = outs["b3_outputs"]
|
||||
b31_outputs = outs["b31_outputs"]
|
||||
b32_outputs = outs["b32_outputs"]
|
||||
b33_outputs = outs["b33_outputs"]
|
||||
gt_labels = outs["targets"]
|
||||
def losses(self,
|
||||
b1_outputs,
|
||||
b2_outputs, b21_outputs, b22_outputs,
|
||||
b3_outputs, b31_outputs, b32_outputs, b33_outputs, gt_labels):
|
||||
# model predictions
|
||||
# fmt: off
|
||||
pred_class_logits = b1_outputs['pred_class_logits'].detach()
|
||||
b1_logits = b1_outputs['cls_outputs']
|
||||
b2_logits = b2_outputs['cls_outputs']
|
||||
|
|
|
@ -0,0 +1,126 @@
|
|||
# encoding: utf-8
|
||||
"""
|
||||
@author: xingyu liao
|
||||
@contact: sherlockliao01@gmail.com
|
||||
"""
|
||||
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from torch import nn
|
||||
|
||||
from fastreid.modeling.losses.utils import concat_all_gather
|
||||
from fastreid.utils import comm
|
||||
from .baseline import Baseline
|
||||
from .build import META_ARCH_REGISTRY
|
||||
|
||||
|
||||
@META_ARCH_REGISTRY.register()
|
||||
class MoCo(Baseline):
|
||||
def __init__(self, cfg):
|
||||
super(MoCo, self).__init__(cfg)
|
||||
|
||||
dim = cfg.MODEL.HEADS.EMBEDDING_DIM if cfg.MODEL.HEADS.EMBEDDING_DIM \
|
||||
else cfg.MODEL.BACKBONE.FEAT_DIM
|
||||
size = cfg.MODEL.QUEUE_SIZE
|
||||
self.memory = Memory(dim, size)
|
||||
|
||||
def losses(self, outputs, gt_labels):
|
||||
"""
|
||||
Compute loss from modeling's outputs, the loss function input arguments
|
||||
must be the same as the outputs of the model forwarding.
|
||||
"""
|
||||
# reid loss
|
||||
loss_dict = super(MoCo, self).losses(outputs, gt_labels)
|
||||
|
||||
# memory loss
|
||||
pred_features = outputs['features']
|
||||
loss_mb = self.memory(pred_features, gt_labels)
|
||||
loss_dict["loss_mb"] = loss_mb
|
||||
return loss_dict
|
||||
|
||||
|
||||
class Memory(nn.Module):
|
||||
"""
|
||||
Build a MoCo memory with a queue
|
||||
https://arxiv.org/abs/1911.05722
|
||||
"""
|
||||
|
||||
def __init__(self, dim=512, K=65536):
|
||||
"""
|
||||
dim: feature dimension (default: 128)
|
||||
K: queue size; number of negative keys (default: 65536)
|
||||
"""
|
||||
super().__init__()
|
||||
self.K = K
|
||||
|
||||
self.margin = 0.25
|
||||
self.gamma = 32
|
||||
|
||||
# create the queue
|
||||
self.register_buffer("queue", torch.randn(dim, K))
|
||||
self.queue = F.normalize(self.queue, dim=0)
|
||||
|
||||
self.register_buffer("queue_label", torch.zeros((1, K), dtype=torch.long))
|
||||
self.register_buffer("queue_ptr", torch.zeros(1, dtype=torch.long))
|
||||
|
||||
@torch.no_grad()
|
||||
def _dequeue_and_enqueue(self, keys, targets):
|
||||
# gather keys/targets before updating queue
|
||||
if comm.get_world_size() > 1:
|
||||
keys = concat_all_gather(keys)
|
||||
targets = concat_all_gather(targets)
|
||||
else:
|
||||
keys = keys.detach()
|
||||
targets = targets.detach()
|
||||
|
||||
batch_size = keys.shape[0]
|
||||
|
||||
ptr = int(self.queue_ptr)
|
||||
assert self.K % batch_size == 0 # for simplicity
|
||||
|
||||
# replace the keys at ptr (dequeue and enqueue)
|
||||
self.queue[:, ptr:ptr + batch_size] = keys.T
|
||||
self.queue_label[:, ptr:ptr + batch_size] = targets
|
||||
ptr = (ptr + batch_size) % self.K # move pointer
|
||||
|
||||
self.queue_ptr[0] = ptr
|
||||
|
||||
def forward(self, feat_q, targets):
|
||||
"""
|
||||
Memory bank enqueue and compute metric loss
|
||||
Args:
|
||||
feat_q: model features
|
||||
targets: gt labels
|
||||
|
||||
Returns:
|
||||
"""
|
||||
# normalize embedding features
|
||||
feat_q = F.normalize(feat_q, p=2, dim=1)
|
||||
# dequeue and enqueue
|
||||
self._dequeue_and_enqueue(feat_q.detach(), targets)
|
||||
# compute loss
|
||||
loss = self._pairwise_cosface(feat_q, targets)
|
||||
return loss
|
||||
|
||||
def _pairwise_cosface(self, feat_q, targets):
|
||||
dist_mat = torch.matmul(feat_q, self.queue)
|
||||
|
||||
N, M = dist_mat.size() # (bsz, memory)
|
||||
is_pos = targets.view(N, 1).expand(N, M).eq(self.queue_label.expand(N, M)).float()
|
||||
is_neg = targets.view(N, 1).expand(N, M).ne(self.queue_label.expand(N, M)).float()
|
||||
|
||||
# Mask scores related to themselves
|
||||
same_indx = torch.eye(N, N, device=is_pos.device)
|
||||
other_indx = torch.zeros(N, M - N, device=is_pos.device)
|
||||
same_indx = torch.cat((same_indx, other_indx), dim=1)
|
||||
is_pos = is_pos - same_indx
|
||||
|
||||
s_p = dist_mat * is_pos
|
||||
s_n = dist_mat * is_neg
|
||||
|
||||
logit_p = -self.gamma * s_p + (-99999999.) * (1 - is_pos)
|
||||
logit_n = self.gamma * (s_n + self.margin) + (-99999999.) * (1 - is_neg)
|
||||
|
||||
loss = F.softplus(torch.logsumexp(logit_p, dim=1) + torch.logsumexp(logit_n, dim=1)).mean()
|
||||
|
||||
return loss
|
|
@ -23,27 +23,25 @@ def build_optimizer(cfg, model):
|
|||
params += [{"name": key, "params": [value], "lr": lr, "weight_decay": weight_decay}]
|
||||
|
||||
solver_opt = cfg.SOLVER.OPT
|
||||
# fmt: off
|
||||
if solver_opt == "SGD": opt_fns = getattr(optim, solver_opt)(params, momentum=cfg.SOLVER.MOMENTUM)
|
||||
else: opt_fns = getattr(optim, solver_opt)(params)
|
||||
# fmt: on
|
||||
if solver_opt == "SGD":
|
||||
opt_fns = getattr(optim, solver_opt)(
|
||||
params,
|
||||
momentum=cfg.SOLVER.MOMENTUM,
|
||||
nesterov=True if cfg.SOLVER.MOMENTUM and cfg.SOLVER.NESTEROV else False
|
||||
)
|
||||
else:
|
||||
opt_fns = getattr(optim, solver_opt)(params)
|
||||
return opt_fns
|
||||
|
||||
|
||||
def build_lr_scheduler(cfg, optimizer):
|
||||
cfg = cfg.clone()
|
||||
cfg.defrost()
|
||||
cfg.SOLVER.MAX_EPOCH = cfg.SOLVER.MAX_EPOCH - max(
|
||||
cfg.SOLVER.WARMUP_EPOCHS + 1, cfg.SOLVER.DELAY_EPOCHS)
|
||||
|
||||
scheduler_dict = {}
|
||||
|
||||
if cfg.SOLVER.WARMUP_ITERS > 0:
|
||||
warmup_args = {
|
||||
"optimizer": optimizer,
|
||||
|
||||
# warmup options
|
||||
"warmup_factor": cfg.SOLVER.WARMUP_FACTOR,
|
||||
"warmup_iters": cfg.SOLVER.WARMUP_ITERS,
|
||||
"warmup_method": cfg.SOLVER.WARMUP_METHOD,
|
||||
}
|
||||
scheduler_dict["warmup_sched"] = lr_scheduler.WarmupLR(**warmup_args)
|
||||
|
||||
scheduler_args = {
|
||||
"MultiStepLR": {
|
||||
"optimizer": optimizer,
|
||||
|
@ -63,4 +61,15 @@ def build_lr_scheduler(cfg, optimizer):
|
|||
scheduler_dict["lr_sched"] = getattr(lr_scheduler, cfg.SOLVER.SCHED)(
|
||||
**scheduler_args[cfg.SOLVER.SCHED])
|
||||
|
||||
if cfg.SOLVER.WARMUP_EPOCHS > 0:
|
||||
warmup_args = {
|
||||
"optimizer": optimizer,
|
||||
|
||||
# warmup options
|
||||
"warmup_factor": cfg.SOLVER.WARMUP_FACTOR,
|
||||
"warmup_epochs": cfg.SOLVER.WARMUP_EPOCHS,
|
||||
"warmup_method": cfg.SOLVER.WARMUP_METHOD,
|
||||
}
|
||||
scheduler_dict["warmup_sched"] = lr_scheduler.WarmupLR(**warmup_args)
|
||||
|
||||
return scheduler_dict
|
||||
|
|
|
@ -8,26 +8,25 @@ from typing import List
|
|||
|
||||
import torch
|
||||
from torch.optim.lr_scheduler import *
|
||||
from torch.optim.lr_scheduler import _LRScheduler
|
||||
|
||||
|
||||
class WarmupLR(_LRScheduler):
|
||||
class WarmupLR(torch.optim.lr_scheduler._LRScheduler):
|
||||
def __init__(
|
||||
self,
|
||||
optimizer: torch.optim.Optimizer,
|
||||
warmup_factor: float = 0.1,
|
||||
warmup_iters: int = 10,
|
||||
warmup_epochs: int = 10,
|
||||
warmup_method: str = "linear",
|
||||
last_epoch: int = -1,
|
||||
):
|
||||
self.warmup_factor = warmup_factor
|
||||
self.warmup_iters = warmup_iters
|
||||
self.warmup_epochs = warmup_epochs
|
||||
self.warmup_method = warmup_method
|
||||
super().__init__(optimizer, last_epoch)
|
||||
|
||||
def get_lr(self) -> List[float]:
|
||||
warmup_factor = _get_warmup_factor_at_iter(
|
||||
self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor
|
||||
warmup_factor = _get_warmup_factor_at_epoch(
|
||||
self.warmup_method, self.last_epoch, self.warmup_epochs, self.warmup_factor
|
||||
)
|
||||
return [
|
||||
base_lr * warmup_factor for base_lr in self.base_lrs
|
||||
|
@ -38,30 +37,30 @@ class WarmupLR(_LRScheduler):
|
|||
return self.get_lr()
|
||||
|
||||
|
||||
def _get_warmup_factor_at_iter(
|
||||
method: str, iter: int, warmup_iters: int, warmup_factor: float
|
||||
def _get_warmup_factor_at_epoch(
|
||||
method: str, epoch: int, warmup_epochs: int, warmup_factor: float
|
||||
) -> float:
|
||||
"""
|
||||
Return the learning rate warmup factor at a specific iteration.
|
||||
See https://arxiv.org/abs/1706.02677 for more details.
|
||||
Args:
|
||||
method (str): warmup method; either "constant" or "linear".
|
||||
iter (int): iteration at which to calculate the warmup factor.
|
||||
warmup_iters (int): the number of warmup iterations.
|
||||
epoch (int): epoch at which to calculate the warmup factor.
|
||||
warmup_epochs (int): the number of warmup epochs.
|
||||
warmup_factor (float): the base warmup factor (the meaning changes according
|
||||
to the method used).
|
||||
Returns:
|
||||
float: the effective warmup factor at the given iteration.
|
||||
"""
|
||||
if iter >= warmup_iters:
|
||||
if epoch >= warmup_epochs:
|
||||
return 1.0
|
||||
|
||||
if method == "constant":
|
||||
return warmup_factor
|
||||
elif method == "linear":
|
||||
alpha = (1 - iter / warmup_iters) * (1 - warmup_factor)
|
||||
return 1 - alpha
|
||||
alpha = epoch / warmup_epochs
|
||||
return warmup_factor * (1 - alpha) + alpha
|
||||
elif method == "exp":
|
||||
return warmup_factor ** (1 - iter / warmup_iters)
|
||||
return warmup_factor ** (1 - epoch / warmup_epochs)
|
||||
else:
|
||||
raise ValueError("Unknown warmup method: {}".format(method))
|
||||
|
|
|
@ -1,3 +1,9 @@
|
|||
# encoding: utf-8
|
||||
"""
|
||||
@author: xingyu liao
|
||||
@contact: sherlockliao01@gmail.com
|
||||
"""
|
||||
|
||||
from .lamb import Lamb
|
||||
from .swa import SWA
|
||||
from torch.optim import *
|
||||
|
|
|
@ -1,116 +0,0 @@
|
|||
import math
|
||||
|
||||
import torch
|
||||
from torch.optim.optimizer import Optimizer
|
||||
|
||||
|
||||
class Adam(Optimizer):
|
||||
r"""Implements Adam algorithm.
|
||||
It has been proposed in `Adam: A Method for Stochastic Optimization`_.
|
||||
The implementation of the L2 penalty follows changes proposed in
|
||||
`Decoupled Weight Decay Regularization`_.
|
||||
Arguments:
|
||||
params (iterable): iterable of parameters to optimize or dicts defining
|
||||
parameter groups
|
||||
lr (float, optional): learning rate (default: 1e-3)
|
||||
betas (Tuple[float, float], optional): coefficients used for computing
|
||||
running averages of gradient and its square (default: (0.9, 0.999))
|
||||
eps (float, optional): term added to the denominator to improve
|
||||
numerical stability (default: 1e-8)
|
||||
weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
|
||||
amsgrad (boolean, optional): whether to use the AMSGrad variant of this
|
||||
algorithm from the paper `On the Convergence of Adam and Beyond`_
|
||||
(default: False)
|
||||
.. _Adam\: A Method for Stochastic Optimization:
|
||||
https://arxiv.org/abs/1412.6980
|
||||
.. _Decoupled Weight Decay Regularization:
|
||||
https://arxiv.org/abs/1711.05101
|
||||
.. _On the Convergence of Adam and Beyond:
|
||||
https://openreview.net/forum?id=ryQu7f-RZ
|
||||
"""
|
||||
|
||||
def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8,
|
||||
weight_decay=0, amsgrad=False):
|
||||
if not 0.0 <= lr:
|
||||
raise ValueError("Invalid learning rate: {}".format(lr))
|
||||
if not 0.0 <= eps:
|
||||
raise ValueError("Invalid epsilon value: {}".format(eps))
|
||||
if not 0.0 <= betas[0] < 1.0:
|
||||
raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
|
||||
if not 0.0 <= betas[1] < 1.0:
|
||||
raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
|
||||
if not 0.0 <= weight_decay:
|
||||
raise ValueError("Invalid weight_decay value: {}".format(weight_decay))
|
||||
defaults = dict(lr=lr, betas=betas, eps=eps,
|
||||
weight_decay=weight_decay, amsgrad=amsgrad)
|
||||
super(Adam, self).__init__(params, defaults)
|
||||
|
||||
def __setstate__(self, state):
|
||||
super(Adam, self).__setstate__(state)
|
||||
for group in self.param_groups:
|
||||
group.setdefault('amsgrad', False)
|
||||
|
||||
@torch.no_grad()
|
||||
def step(self, closure=None):
|
||||
"""Performs a single optimization step.
|
||||
Arguments:
|
||||
closure (callable, optional): A closure that reevaluates the model
|
||||
and returns the loss.
|
||||
"""
|
||||
loss = None
|
||||
if closure is not None:
|
||||
with torch.enable_grad():
|
||||
loss = closure()
|
||||
|
||||
for group in self.param_groups:
|
||||
if group['freeze']: continue
|
||||
|
||||
for p in group['params']:
|
||||
if p.grad is None:
|
||||
continue
|
||||
grad = p.grad
|
||||
if grad.is_sparse:
|
||||
raise RuntimeError('Adam does not support sparse gradients, please consider SparseAdam instead')
|
||||
amsgrad = group['amsgrad']
|
||||
|
||||
state = self.state[p]
|
||||
|
||||
# State initialization
|
||||
if len(state) == 0:
|
||||
state['step'] = 0
|
||||
# Exponential moving average of gradient values
|
||||
state['exp_avg'] = torch.zeros_like(p, memory_format=torch.preserve_format)
|
||||
# Exponential moving average of squared gradient values
|
||||
state['exp_avg_sq'] = torch.zeros_like(p, memory_format=torch.preserve_format)
|
||||
if amsgrad:
|
||||
# Maintains max of all exp. moving avg. of sq. grad. values
|
||||
state['max_exp_avg_sq'] = torch.zeros_like(p, memory_format=torch.preserve_format)
|
||||
|
||||
exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
|
||||
if amsgrad:
|
||||
max_exp_avg_sq = state['max_exp_avg_sq']
|
||||
beta1, beta2 = group['betas']
|
||||
|
||||
state['step'] += 1
|
||||
bias_correction1 = 1 - beta1 ** state['step']
|
||||
bias_correction2 = 1 - beta2 ** state['step']
|
||||
|
||||
if group['weight_decay'] != 0:
|
||||
grad = grad.add(p, alpha=group['weight_decay'])
|
||||
|
||||
# Decay the first and second moment running average coefficient
|
||||
exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1)
|
||||
exp_avg_sq.mul_(beta2).addcmul_(grad, grad, value=1 - beta2)
|
||||
if amsgrad:
|
||||
# Maintains the maximum of all 2nd moment running avg. till now
|
||||
torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
|
||||
# Use the max. for normalizing running avg. of gradient
|
||||
denom = (max_exp_avg_sq.sqrt() / math.sqrt(bias_correction2)).add_(group['eps'])
|
||||
else:
|
||||
denom = (exp_avg_sq.sqrt() / math.sqrt(bias_correction2)).add_(group['eps'])
|
||||
|
||||
step_size = group['lr'] / bias_correction1
|
||||
|
||||
p.addcdiv_(exp_avg, denom, value=-step_size)
|
||||
|
||||
return loss
|
|
@ -68,7 +68,7 @@ class Lamb(Optimizer):
|
|||
|
||||
for group in self.param_groups:
|
||||
for p in group['params']:
|
||||
if p.grad is None or group['freeze']:
|
||||
if p.grad is None:
|
||||
continue
|
||||
grad = p.grad.data
|
||||
if grad.is_sparse:
|
||||
|
|
|
@ -1,104 +0,0 @@
|
|||
import torch
|
||||
from torch.optim.optimizer import Optimizer, required
|
||||
|
||||
|
||||
class SGD(Optimizer):
|
||||
r"""Implements stochastic gradient descent (optionally with momentum).
|
||||
Nesterov momentum is based on the formula from
|
||||
`On the importance of initialization and momentum in deep learning`__.
|
||||
Args:
|
||||
params (iterable): iterable of parameters to optimize or dicts defining
|
||||
parameter groups
|
||||
lr (float): learning rate
|
||||
momentum (float, optional): momentum factor (default: 0)
|
||||
weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
|
||||
dampening (float, optional): dampening for momentum (default: 0)
|
||||
nesterov (bool, optional): enables Nesterov momentum (default: False)
|
||||
Example:
|
||||
>>> optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
|
||||
>>> optimizer.zero_grad()
|
||||
>>> loss_fn(model(input), target).backward()
|
||||
>>> optimizer.step()
|
||||
__ http://www.cs.toronto.edu/%7Ehinton/absps/momentum.pdf
|
||||
.. note::
|
||||
The implementation of SGD with Momentum/Nesterov subtly differs from
|
||||
Sutskever et. al. and implementations in some other frameworks.
|
||||
Considering the specific case of Momentum, the update can be written as
|
||||
.. math::
|
||||
\begin{aligned}
|
||||
v_{t+1} & = \mu * v_{t} + g_{t+1}, \\
|
||||
p_{t+1} & = p_{t} - \text{lr} * v_{t+1},
|
||||
\end{aligned}
|
||||
where :math:`p`, :math:`g`, :math:`v` and :math:`\mu` denote the
|
||||
parameters, gradient, velocity, and momentum respectively.
|
||||
This is in contrast to Sutskever et. al. and
|
||||
other frameworks which employ an update of the form
|
||||
.. math::
|
||||
\begin{aligned}
|
||||
v_{t+1} & = \mu * v_{t} + \text{lr} * g_{t+1}, \\
|
||||
p_{t+1} & = p_{t} - v_{t+1}.
|
||||
\end{aligned}
|
||||
The Nesterov version is analogously modified.
|
||||
"""
|
||||
|
||||
def __init__(self, params, lr=required, momentum=0, dampening=0,
|
||||
weight_decay=0, nesterov=False):
|
||||
if lr is not required and lr < 0.0:
|
||||
raise ValueError("Invalid learning rate: {}".format(lr))
|
||||
if momentum < 0.0:
|
||||
raise ValueError("Invalid momentum value: {}".format(momentum))
|
||||
if weight_decay < 0.0:
|
||||
raise ValueError("Invalid weight_decay value: {}".format(weight_decay))
|
||||
|
||||
defaults = dict(lr=lr, momentum=momentum, dampening=dampening,
|
||||
weight_decay=weight_decay, nesterov=nesterov)
|
||||
if nesterov and (momentum <= 0 or dampening != 0):
|
||||
raise ValueError("Nesterov momentum requires a momentum and zero dampening")
|
||||
super(SGD, self).__init__(params, defaults)
|
||||
|
||||
def __setstate__(self, state):
|
||||
super(SGD, self).__setstate__(state)
|
||||
for group in self.param_groups:
|
||||
group.setdefault('nesterov', False)
|
||||
|
||||
@torch.no_grad()
|
||||
def step(self, closure=None):
|
||||
"""Performs a single optimization step.
|
||||
Arguments:
|
||||
closure (callable, optional): A closure that reevaluates the model
|
||||
and returns the loss.
|
||||
"""
|
||||
loss = None
|
||||
if closure is not None:
|
||||
with torch.enable_grad():
|
||||
loss = closure()
|
||||
|
||||
for group in self.param_groups:
|
||||
if group['freeze']: continue
|
||||
|
||||
weight_decay = group['weight_decay']
|
||||
momentum = group['momentum']
|
||||
dampening = group['dampening']
|
||||
nesterov = group['nesterov']
|
||||
|
||||
for p in group['params']:
|
||||
if p.grad is None:
|
||||
continue
|
||||
d_p = p.grad
|
||||
if weight_decay != 0:
|
||||
d_p = d_p.add(p, alpha=weight_decay)
|
||||
if momentum != 0:
|
||||
param_state = self.state[p]
|
||||
if 'momentum_buffer' not in param_state:
|
||||
buf = param_state['momentum_buffer'] = torch.clone(d_p).detach()
|
||||
else:
|
||||
buf = param_state['momentum_buffer']
|
||||
buf.mul_(momentum).add_(d_p, alpha=1 - dampening)
|
||||
if nesterov:
|
||||
d_p = d_p.add(buf, alpha=momentum)
|
||||
else:
|
||||
d_p = buf
|
||||
|
||||
p.add_(d_p, alpha=-group['lr'])
|
||||
|
||||
return loss
|
|
@ -322,20 +322,21 @@ class PeriodicCheckpointer:
|
|||
additional_state = {"epoch": epoch}
|
||||
additional_state.update(kwargs)
|
||||
if (epoch + 1) % self.period == 0 and epoch < self.max_epoch - 1:
|
||||
self.checkpointer.save(
|
||||
"model_{:04d}".format(epoch), **additional_state
|
||||
)
|
||||
if additional_state["metric"] > self.best_metric:
|
||||
self.checkpointer.save(
|
||||
"model_best", **additional_state
|
||||
)
|
||||
self.best_metric = additional_state["metric"]
|
||||
# Put it behind best model save to make last checkpoint valid
|
||||
self.checkpointer.save(
|
||||
"model_{:04d}".format(epoch), **additional_state
|
||||
)
|
||||
if epoch >= self.max_epoch - 1:
|
||||
self.checkpointer.save("model_final", **additional_state)
|
||||
if additional_state["metric"] > self.best_metric:
|
||||
self.checkpointer.save(
|
||||
"model_best", **additional_state
|
||||
)
|
||||
self.checkpointer.save("model_final", **additional_state)
|
||||
|
||||
def save(self, name: str, **kwargs: Any):
|
||||
"""
|
||||
|
|
|
@ -4,7 +4,6 @@
|
|||
@contact: sherlockliao01@gmail.com
|
||||
"""
|
||||
|
||||
import math
|
||||
from torch import nn
|
||||
|
||||
__all__ = [
|
||||
|
@ -25,7 +24,6 @@ def weights_init_kaiming(m):
|
|||
nn.init.constant_(m.bias, 0.0)
|
||||
elif classname.find('BatchNorm') != -1:
|
||||
if m.affine:
|
||||
# nn.init.normal_(m.weight, 1.0, 0.02)
|
||||
nn.init.constant_(m.weight, 1.0)
|
||||
nn.init.constant_(m.bias, 0.0)
|
||||
|
||||
|
|
|
@ -1,47 +0,0 @@
|
|||
# Model Distillation in FastReID
|
||||
|
||||
This project provides a training script of small model
|
||||
for both fast inference and high accuracy.
|
||||
|
||||
|
||||
## Datasets Prepration
|
||||
- Market1501
|
||||
- DukeMTMC-reID
|
||||
- MSMT17
|
||||
|
||||
|
||||
## Train and Evaluation
|
||||
```shell script
|
||||
# a demo on DukeMTMC-reID dataset
|
||||
# please see more in ./configs
|
||||
# train BagTricksIBN50 as teacher model
|
||||
python3 projects/DistillReID/train_net.py --config-file projects/DistillReID/configs/DukeMTMC/bot50ibn.yml
|
||||
# train BagTricksIBN18 as student model
|
||||
python3 projects/DistillReID/train_net.py --config-file projects/DistillReID/configs/DukeMTMC/KD-bot50ibn-bot18ibn.yml --kd
|
||||
```
|
||||
|
||||
## Experimental Results and Trained Models
|
||||
|
||||
### Settings
|
||||
|
||||
All the experiments are conducted with a P40 GPU and
|
||||
- CPU: Intel(R) Xeon(R) CPU E5-2683 v4 @ 2.10GHz
|
||||
- GPU:Tesla P40 (Memory 22919MB)
|
||||
|
||||
### DukeMTMC-reID
|
||||
|
||||
<table><thead><tr><th colspan="2" rowspan="2">Rank-1 (mAP) / <br>Q.Time/batch(128)</th><th colspan="4">Student (BagTricks)</th></tr><tr><td>IBN-101</td><td>IBN-50</td><td>IBN-34</td><td>IBN-18</td></tr></thead><tbody><tr><td rowspan="4">Teacher<br>(BagTricks)</td><td>IBN-101</td><td>90.8(80.8)/0.3395s</td><td>90.8(81.1)/0.1984s</td><td>89.63(78.9)/0.1760s</td><td>86.96(75.75)/0.0854s</td></tr><tr><td>IBN-50</td><td>-</td><td>89.8(79.8)/0.2264s</td><td>88.82(78.9)/0.1761s</td><td>87.75(76.18)/0.0838s</td></tr><tr><td>IBN-34</td><td>-</td><td>-</td><td>88.64(76.4)/0.1766s</td><td>87.43(75.66)/0.0845s</td></tr><tr><td>IBN-18</td><td>-</td><td>-</td><td>-</td><td>85.50(71.60)/0.9178s</td></tr></tbody></table>
|
||||
|
||||
### Market-1501
|
||||
|
||||
<table><thead><tr><th colspan="2" rowspan="2">Rank-1 (mAP) / <br>Q.Time/batch(128)</th><th colspan="4">Student (BagTricks)</th></tr><tr><td>IBN-101</td><td>IBN-50</td><td>IBN-34</td><td>IBN-18</td></tr></thead><tbody><tr><td rowspan="4">Teacher<br>(BagTricks)</td><td>IBN-101</td><td>95.43(88.95)/0.2698s</td><td>95.19(89.52)/0.1791s</td><td>94.51(87.82)/0.0869s</td><td>93.85(85.77)/0.0612s</td></tr><tr><td>IBN-50</td><td>-</td><td>95.25(88.16)/0.1823s</td><td>95.13(87.28)/0.0863s</td><td>94.18(85.81)/0.0614s</td></tr><tr><td>IBN-34</td><td></td><td>-</td><td>94.63(84.91)/0.0860s</td><td>93.71(85.20)/0.0620s</td></tr><tr><td>IBN-18</td><td>-</td><td>-</td><td>-</td><td>92.87(81.22)/0.0615s</td></tr><tr><td colspan="2">Average Q.Time</td><td>0.2698s</td><td>0.1807s</td><td>0.0864s</td><td>0.0616s</td></tr></tbody></table>
|
||||
|
||||
### MSMT17
|
||||
|
||||
<table><thead><tr><th colspan="2" rowspan="2">Rank-1 (mAP) / <br>Q.Time/batch(128)</th><th colspan="4">Student (BagTricks)</th></tr><tr><td>IBN-101</td><td>IBN-50</td><td>IBN-34</td><td>IBN-18</td></tr></thead><tbody><tr><td rowspan="4">Teacher<br>(BagTricks)</td><td>IBN-101</td><td>81.95(60.51)/0.2693s</td><td>82.37(62.08)/0.1792s</td><td>81.07(58.56)/0.0872s</td><td>77.77(52.77)/0.0610s</td></tr><tr><td>IBN-50</td><td>-</td><td>80.18(57.80)/0.1789s</td><td>81.28(58.27)/0.0863s</td><td>78.11(53.10)/0.0623s</td></tr><tr><td>IBN-34</td><td></td><td>-</td><td>78.27(53.41)/0.0873s</td><td>77.65(52.82)/0.0615s</td></tr><tr><td>IBN-18</td><td>-</td><td>-</td><td>-</td><td>74.11(47.26)/0.0621s</td></tr><tr><td colspan="2">Average Q.Time</td><td>0.2693s</td><td>0.1801s</td><td>0.0868s</td><td>0.0617s</td></tr></tbody></table>
|
||||
|
||||
|
||||
## Contact
|
||||
This project is conducted by [Guan'an Wang](https://wangguanan.github.io/) (guan.wang0706@gmail) and [Xingyu Liao](https://github.com/L1aoXingyu).
|
||||
|
||||
|
|
@ -1,30 +0,0 @@
|
|||
_BASE_: "../../../configs/Base-bagtricks.yml"
|
||||
|
||||
MODEL_TEACHER:
|
||||
META_ARCHITECTURE: "Baseline"
|
||||
|
||||
BACKBONE:
|
||||
NAME: "build_resnet_backbone"
|
||||
NORM: "BN"
|
||||
DEPTH: "101x"
|
||||
FEAT_DIM: 2048
|
||||
LAST_STRIDE: 1
|
||||
WITH_IBN: True
|
||||
PRETRAIN: True
|
||||
|
||||
HEADS:
|
||||
NAME: "EmbeddingHead"
|
||||
NORM: "BN"
|
||||
POOL_LAYER: "avgpool"
|
||||
NECK_FEAT: "before"
|
||||
CLS_LAYER: "linear"
|
||||
|
||||
MODEL:
|
||||
BACKBONE:
|
||||
NAME: "build_resnet_backbone"
|
||||
DEPTH: "50x"
|
||||
FEAT_DIM: 2048
|
||||
WITH_IBN: True
|
||||
|
||||
STUDENT_WEIGHTS: ""
|
||||
TEACHER_WEIGHTS: "logs/dukemtmc/bagtricks_R34-ibn/model_final.pth"
|
|
@ -1,37 +0,0 @@
|
|||
_BASE_: "../../../configs/Base-Strongerbaseline.yml"
|
||||
|
||||
MODEL_TEACHER:
|
||||
META_ARCHITECTURE: "Baseline"
|
||||
|
||||
BACKBONE:
|
||||
NAME: "build_resnet_backbone"
|
||||
NORM: "BN"
|
||||
DEPTH: "101x"
|
||||
FEAT_DIM: 2048
|
||||
LAST_STRIDE: 1
|
||||
WITH_NL: False
|
||||
WITH_IBN: True
|
||||
PRETRAIN: True
|
||||
|
||||
HEADS:
|
||||
NAME: "EmbeddingHead"
|
||||
NORM: "BN"
|
||||
NECK_FEAT: "after"
|
||||
POOL_LAYER: "gempoolP"
|
||||
CLS_LAYER: "circleSoftmax"
|
||||
SCALE: 64
|
||||
MARGIN: 0.35
|
||||
|
||||
MODEL:
|
||||
BACKBONE:
|
||||
NAME: "build_resnet_backbone"
|
||||
DEPTH: "50x"
|
||||
FEAT_DIM: 2048
|
||||
WITH_IBN: True
|
||||
|
||||
STUDENT_WEIGHTS: ""
|
||||
TEACHER_WEIGHTS: "logs/dukemtmc/bagtricks_R34-ibn/model_final.pth"
|
||||
|
||||
INPUT:
|
||||
SIZE_TRAIN: [ 256, 128 ]
|
||||
SIZE_TEST: [ 256, 128 ]
|
|
@ -1,20 +0,0 @@
|
|||
_BASE_: "../Base-bot-kd.yml"
|
||||
|
||||
MODEL_TEACHER:
|
||||
BACKBONE:
|
||||
DEPTH: "101x"
|
||||
FEAT_DIM: 2048
|
||||
|
||||
MODEL:
|
||||
BACKBONE:
|
||||
DEPTH: "18x"
|
||||
FEAT_DIM: 512
|
||||
|
||||
STUDENT_WEIGHTS: ""
|
||||
TEACHER_WEIGHTS: "projects/DistillReID/logs/dukemtmc/bagtricks_R101-ibn"
|
||||
|
||||
DATASETS:
|
||||
NAMES: ("DukeMTMC",)
|
||||
TESTS: ("DukeMTMC",)
|
||||
|
||||
OUTPUT_DIR: "projects/DistillReID/logs/dukemtmc/bot101ibn-kd-bot18ibn"
|
|
@ -1,20 +0,0 @@
|
|||
_BASE_: "../Base-bot-kd.yml"
|
||||
|
||||
MODEL_TEACHER:
|
||||
BACKBONE:
|
||||
DEPTH: "101x"
|
||||
FEAT_DIM: 2048
|
||||
|
||||
MODEL:
|
||||
BACKBONE:
|
||||
DEPTH: "50x"
|
||||
FEAT_DIM: 2048
|
||||
|
||||
STUDENT_WEIGHTS: ""
|
||||
TEACHER_WEIGHTS: "projects/DistillReID/logs/dukemtmc/bagtricks_R101-ibn"
|
||||
|
||||
DATASETS:
|
||||
NAMES: ("DukeMTMC",)
|
||||
TESTS: ("DukeMTMC",)
|
||||
|
||||
OUTPUT_DIR: "projects/DistillReID/logs/dukemtmc/dukemtmc/bot101ibn-kd-bot50ibn"
|
|
@ -1,20 +0,0 @@
|
|||
_BASE_: "../Base-bot-kd.yml"
|
||||
|
||||
MODEL_TEACHER:
|
||||
BACKBONE:
|
||||
DEPTH: "50x"
|
||||
FEAT_DIM: 2048
|
||||
|
||||
MODEL:
|
||||
BACKBONE:
|
||||
DEPTH: "18x"
|
||||
FEAT_DIM: 512
|
||||
|
||||
STUDENT_WEIGHTS: ""
|
||||
TEACHER_WEIGHTS: "projects/DistillReID/logs/dukemtmc/bagtricks_R50-ibn/model_final.pth"
|
||||
|
||||
DATASETS:
|
||||
NAMES: ("DukeMTMC",)
|
||||
TESTS: ("DukeMTMC",)
|
||||
|
||||
OUTPUT_DIR: "projects/DistillReID/logs/dukemtmc/bot50ibn-kd-bot18ibn"
|
|
@ -1,20 +0,0 @@
|
|||
_BASE_: "../Base-sbs-kd.yml"
|
||||
|
||||
MODEL_TEACHER:
|
||||
BACKBONE:
|
||||
DEPTH: "101x"
|
||||
FEAT_DIM: 2048
|
||||
|
||||
MODEL:
|
||||
BACKBONE:
|
||||
DEPTH: "34x"
|
||||
FEAT_DIM: 512
|
||||
|
||||
STUDENT_WEIGHTS: ""
|
||||
TEACHER_WEIGHTS: "projects/DistillReID/logs/dukemtmc/sbs_R101-ibn/model_final.pth"
|
||||
|
||||
DATASETS:
|
||||
NAMES: ("DukeMTMC",)
|
||||
TESTS: ("DukeMTMC",)
|
||||
|
||||
OUTPUT_DIR: "projects/DistillReID/logs/dukemtmc/sbs101ibn-kd-sbs18ibn"
|
|
@ -1,20 +0,0 @@
|
|||
_BASE_: "../Base-sbs-kd.yml"
|
||||
|
||||
MODEL_TEACHER:
|
||||
BACKBONE:
|
||||
DEPTH: "101x"
|
||||
FEAT_DIM: 2048
|
||||
|
||||
MODEL:
|
||||
BACKBONE:
|
||||
DEPTH: "50x"
|
||||
FEAT_DIM: 2048
|
||||
|
||||
STUDENT_WEIGHTS: ""
|
||||
TEACHER_WEIGHTS: "projects/DistillReID/logs/dukemtmc/sbs_R101-ibn/model_final.pth"
|
||||
|
||||
DATASETS:
|
||||
NAMES: ("DukeMTMC",)
|
||||
TESTS: ("DukeMTMC",)
|
||||
|
||||
OUTPUT_DIR: "projects/DistillReID/logs/dukemtmc/sbs101ibn-kd-sbs50ibn"
|
|
@ -1,20 +0,0 @@
|
|||
_BASE_: "../Base-sbs-kd.yml"
|
||||
|
||||
MODEL_TEACHER:
|
||||
BACKBONE:
|
||||
DEPTH: "50x"
|
||||
FEAT_DIM: 2048
|
||||
|
||||
MODEL:
|
||||
BACKBONE:
|
||||
DEPTH: "18x"
|
||||
FEAT_DIM: 512
|
||||
|
||||
STUDENT_WEIGHTS: ""
|
||||
TEACHER_WEIGHTS: "projects/DistillReID/logs/dukemtmc/sbs_R50-ibn/model_final.pth"
|
||||
|
||||
DATASETS:
|
||||
NAMES: ("DukeMTMC",)
|
||||
TESTS: ("DukeMTMC",)
|
||||
|
||||
OUTPUT_DIR: "projects/DistillReID/logs/dukemtmc/sbs50ibn-kd-sbs18ibn"
|
|
@ -1,12 +0,0 @@
|
|||
_BASE_: "../../../../configs/Base-bagtricks.yml"
|
||||
|
||||
MODEL:
|
||||
BACKBONE:
|
||||
DEPTH: "101x"
|
||||
WITH_IBN: True
|
||||
|
||||
DATASETS:
|
||||
NAMES: ("DukeMTMC",)
|
||||
TESTS: ("DukeMTMC",)
|
||||
|
||||
OUTPUT_DIR: "projects/DistillReID/logs/dukemtmc/bagtricks_R101-ibn"
|
|
@ -1,13 +0,0 @@
|
|||
_BASE_: "../../../../configs/Base-bagtricks.yml"
|
||||
|
||||
MODEL:
|
||||
BACKBONE:
|
||||
DEPTH: "18x"
|
||||
WITH_IBN: True
|
||||
FEAT_DIM: 512
|
||||
|
||||
DATASETS:
|
||||
NAMES: ("DukeMTMC",)
|
||||
TESTS: ("DukeMTMC",)
|
||||
|
||||
OUTPUT_DIR: "projects/DistillReID/logs/dukemtmc/bagtricks_R18-ibn"
|
|
@ -1,12 +0,0 @@
|
|||
_BASE_: "../../../../configs/Base-bagtricks.yml"
|
||||
|
||||
MODEL:
|
||||
BACKBONE:
|
||||
DEPTH: "50x"
|
||||
WITH_IBN: True
|
||||
|
||||
DATASETS:
|
||||
NAMES: ("DukeMTMC",)
|
||||
TESTS: ("DukeMTMC",)
|
||||
|
||||
OUTPUT_DIR: "projects/DistillReID/logs/dukemtmc/bagtricks_R50-ibn"
|
|
@ -1,13 +0,0 @@
|
|||
_BASE_: "../../../configs/Base-Strongerbaseline.yml"
|
||||
|
||||
MODEL:
|
||||
BACKBONE:
|
||||
DEPTH: "101x"
|
||||
WITH_IBN: True
|
||||
FEAT_DIM: 2048
|
||||
|
||||
DATASETS:
|
||||
NAMES: ("DukeMTMC",)
|
||||
TESTS: ("DukeMTMC",)
|
||||
|
||||
OUTPUT_DIR: "projects/DistillReID/logs/dukemtmc/sbs_R101-ibn"
|
|
@ -1,13 +0,0 @@
|
|||
_BASE_: "../../../configs/Base-Strongerbaseline.yml"
|
||||
|
||||
MODEL:
|
||||
BACKBONE:
|
||||
DEPTH: "18x"
|
||||
WITH_IBN: True
|
||||
FEAT_DIM: 512
|
||||
|
||||
DATASETS:
|
||||
NAMES: ("DukeMTMC",)
|
||||
TESTS: ("DukeMTMC",)
|
||||
|
||||
OUTPUT_DIR: "projects/DistillReID/logs/dukemtmc/sbs_R18-ibn"
|
|
@ -1,13 +0,0 @@
|
|||
_BASE_: "../../../configs/Base-Strongerbaseline.yml"
|
||||
|
||||
MODEL:
|
||||
BACKBONE:
|
||||
DEPTH: "50x"
|
||||
WITH_IBN: True
|
||||
FEAT_DIM: 2048
|
||||
|
||||
DATASETS:
|
||||
NAMES: ("DukeMTMC",)
|
||||
TESTS: ("DukeMTMC",)
|
||||
|
||||
OUTPUT_DIR: "projects/DistillReID/logs/dukemtmc/sbs_R50-ibn"
|
|
@ -1,9 +0,0 @@
|
|||
# encoding: utf-8
|
||||
"""
|
||||
@author: l1aoxingyu
|
||||
@contact: sherlockliao01@gmail.com
|
||||
"""
|
||||
|
||||
from .config import add_kdreid_config, add_shufflenet_config
|
||||
from .kd_trainer import KDTrainer
|
||||
from .modeling import build_shufflenetv2_backbone
|
|
@ -1,105 +0,0 @@
|
|||
# encoding: utf-8
|
||||
"""
|
||||
@author: l1aoxingyu, guan'an wang
|
||||
@contact: sherlockliao01@gmail.com, guan.wang0706@gmail.com
|
||||
"""
|
||||
|
||||
from fastreid.config import CfgNode as CN
|
||||
|
||||
|
||||
def add_shufflenet_config(cfg):
|
||||
_C = cfg
|
||||
_C.MODEL.BACKBONE.MODEL_SIZE = '1.0x'
|
||||
|
||||
|
||||
def add_kdreid_config(cfg):
|
||||
_C = cfg
|
||||
|
||||
_C.MODEL_TEACHER = CN()
|
||||
_C.MODEL_TEACHER.META_ARCHITECTURE = 'Baseline'
|
||||
|
||||
# ---------------------------------------------------------------------------- #
|
||||
# teacher model Backbone options
|
||||
# ---------------------------------------------------------------------------- #
|
||||
_C.MODEL_TEACHER.BACKBONE = CN()
|
||||
|
||||
_C.MODEL_TEACHER.BACKBONE.NAME = "build_resnet_backbone"
|
||||
_C.MODEL_TEACHER.BACKBONE.DEPTH = "50x"
|
||||
_C.MODEL_TEACHER.BACKBONE.LAST_STRIDE = 1
|
||||
# If use IBN block in backbone
|
||||
_C.MODEL_TEACHER.BACKBONE.WITH_IBN = False
|
||||
# If use SE block in backbone
|
||||
_C.MODEL_TEACHER.BACKBONE.WITH_SE = False
|
||||
# If use Non-local block in backbone
|
||||
_C.MODEL_TEACHER.BACKBONE.WITH_NL = False
|
||||
# Input feature dimension
|
||||
_C.MODEL_TEACHER.BACKBONE.FEAT_DIM = 2048
|
||||
|
||||
# for shufflenet
|
||||
_C.MODEL_TEACHER.BACKBONE.MODEL_SIZE = '1.0x'
|
||||
|
||||
#
|
||||
_C.MODEL_TEACHER.BACKBONE.NORM = 'BN'
|
||||
_C.MODEL_TEACHER.BACKBONE.PRETRAIN = False
|
||||
|
||||
# ---------------------------------------------------------------------------- #
|
||||
# teacher model HEADS options
|
||||
# ---------------------------------------------------------------------------- #
|
||||
_C.MODEL_TEACHER.HEADS = CN()
|
||||
_C.MODEL_TEACHER.HEADS.NAME = "EmbeddingHead"
|
||||
|
||||
# Pooling layer type
|
||||
_C.MODEL_TEACHER.HEADS.POOL_LAYER = "avgpool"
|
||||
_C.MODEL_TEACHER.HEADS.NECK_FEAT = "before"
|
||||
_C.MODEL_TEACHER.HEADS.CLS_LAYER = "linear"
|
||||
|
||||
# Pretrained teacher and student model weights
|
||||
_C.MODEL.TEACHER_WEIGHTS = ""
|
||||
_C.MODEL.STUDENT_WEIGHTS = ""
|
||||
|
||||
#
|
||||
_C.MODEL_TEACHER.HEADS.NORM = 'BN'
|
||||
_C.MODEL_TEACHER.HEADS.SCALE = 64
|
||||
_C.MODEL_TEACHER.HEADS.MARGIN = 0.35
|
||||
|
||||
|
||||
def update_model_teacher_config(cfg):
|
||||
cfg = cfg.clone()
|
||||
|
||||
frozen = cfg.is_frozen()
|
||||
|
||||
cfg.defrost()
|
||||
cfg.MODEL.META_ARCHITECTURE = cfg.MODEL_TEACHER.META_ARCHITECTURE
|
||||
# ---------------------------------------------------------------------------- #
|
||||
# teacher model Backbone options
|
||||
# ---------------------------------------------------------------------------- #
|
||||
cfg.MODEL.BACKBONE.NAME = cfg.MODEL_TEACHER.BACKBONE.NAME
|
||||
cfg.MODEL.BACKBONE.DEPTH = cfg.MODEL_TEACHER.BACKBONE.DEPTH
|
||||
cfg.MODEL.BACKBONE.LAST_STRIDE = cfg.MODEL_TEACHER.BACKBONE.LAST_STRIDE
|
||||
# If use IBN block in backbone
|
||||
cfg.MODEL.BACKBONE.WITH_IBN = cfg.MODEL_TEACHER.BACKBONE.WITH_IBN
|
||||
# If use SE block in backbone
|
||||
cfg.MODEL.BACKBONE.WITH_SE = cfg.MODEL_TEACHER.BACKBONE.WITH_SE
|
||||
# If use Non-local block in backbone
|
||||
cfg.MODEL.BACKBONE.WITH_NL = cfg.MODEL_TEACHER.BACKBONE.WITH_NL
|
||||
# Input feature dimension
|
||||
cfg.MODEL.BACKBONE.FEAT_DIM = cfg.MODEL_TEACHER.BACKBONE.FEAT_DIM
|
||||
cfg.MODEL.BACKBONE.PRETRAIN = False
|
||||
|
||||
# for shufflenet
|
||||
cfg.MODEL.BACKBONE.MODEL_SIZE = cfg.MODEL_TEACHER.BACKBONE.MODEL_SIZE
|
||||
|
||||
# ---------------------------------------------------------------------------- #
|
||||
# teacher model HEADS options
|
||||
# ---------------------------------------------------------------------------- #
|
||||
cfg.MODEL.HEADS.NAME = cfg.MODEL_TEACHER.HEADS.NAME
|
||||
|
||||
# Pooling layer type
|
||||
cfg.MODEL.HEADS.POOL_LAYER = cfg.MODEL_TEACHER.HEADS.POOL_LAYER
|
||||
|
||||
cfg.MODEL.HEADS.SCALE = cfg.MODEL_TEACHER.HEADS.SCALE
|
||||
cfg.MODEL.HEADS.MARGIN = cfg.MODEL_TEACHER.HEADS.MARGIN
|
||||
|
||||
if frozen: cfg.freeze()
|
||||
|
||||
return cfg
|
|
@ -1,139 +0,0 @@
|
|||
# encoding: utf-8
|
||||
"""
|
||||
@author: l1aoxingyu
|
||||
@contact: sherlockliao01@gmail.com
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from torch import nn
|
||||
from torch.nn.parallel import DistributedDataParallel
|
||||
|
||||
from fastreid.engine import DefaultTrainer
|
||||
from fastreid.utils.file_io import PathManager
|
||||
from fastreid.modeling.meta_arch import build_model
|
||||
from fastreid.utils.checkpoint import Checkpointer
|
||||
from .config import update_model_teacher_config
|
||||
|
||||
|
||||
class KDTrainer(DefaultTrainer):
|
||||
"""
|
||||
A knowledge distillation trainer for person reid of task.
|
||||
"""
|
||||
|
||||
def __init__(self, cfg):
|
||||
"""
|
||||
Args:
|
||||
cfg (CfgNode):
|
||||
"""
|
||||
super().__init__(cfg)
|
||||
|
||||
model_t = self.build_model_teacher(self.cfg)
|
||||
for param in model_t.parameters():
|
||||
param.requires_grad = False
|
||||
|
||||
logger = logging.getLogger('fastreid.' + __name__)
|
||||
|
||||
# Load pre-trained teacher model
|
||||
logger.info("Loading teacher model ...")
|
||||
Checkpointer(model_t).load(cfg.MODEL.TEACHER_WEIGHTS)
|
||||
|
||||
if PathManager.exists(cfg.MODEL.STUDENT_WEIGHTS):
|
||||
logger.info("Loading student model ...")
|
||||
Checkpointer(self.model).load(cfg.MODEL.STUDENT_WEIGHTS)
|
||||
else:
|
||||
logger.info("No student model checkpoints")
|
||||
|
||||
self.model_t = model_t
|
||||
|
||||
def run_step(self):
|
||||
"""
|
||||
Implement the moco training logic described above.
|
||||
"""
|
||||
assert self.model.training, "[KDTrainer] base model was changed to eval mode!"
|
||||
start = time.perf_counter()
|
||||
"""
|
||||
If your want to do something with the data, you can wrap the dataloader.
|
||||
"""
|
||||
data = next(self._data_loader_iter)
|
||||
|
||||
data_time = time.perf_counter() - start
|
||||
|
||||
outs = self.model(data)
|
||||
|
||||
# Compute reid loss
|
||||
if isinstance(self.model, DistributedDataParallel):
|
||||
loss_dict = self.model.module.losses(outs)
|
||||
else:
|
||||
loss_dict = self.model.losses(outs)
|
||||
|
||||
with torch.no_grad():
|
||||
outs_t = self.model_t(data)
|
||||
|
||||
q_logits = outs["outputs"]["pred_class_logits"]
|
||||
t_logits = outs_t["outputs"]["pred_class_logits"].detach()
|
||||
loss_dict['loss_kl'] = self.distill_loss(q_logits, t_logits, t=16)
|
||||
|
||||
losses = sum(loss_dict.values())
|
||||
|
||||
with torch.cuda.stream(torch.cuda.Stream()):
|
||||
metrics_dict = loss_dict
|
||||
metrics_dict["data_time"] = data_time
|
||||
self._write_metrics(metrics_dict)
|
||||
self._detect_anomaly(losses, loss_dict)
|
||||
|
||||
"""
|
||||
If you need accumulate gradients or something similar, you can
|
||||
wrap the optimizer with your custom `zero_grad()` method.
|
||||
"""
|
||||
self.optimizer.zero_grad()
|
||||
losses.backward()
|
||||
|
||||
"""
|
||||
If you need gradient clipping/scaling or other processing, you can
|
||||
wrap the optimizer with your custom `step()` method.
|
||||
"""
|
||||
self.optimizer.step()
|
||||
|
||||
@classmethod
|
||||
def build_model_teacher(cls, cfg) -> nn.Module:
|
||||
cfg_t = update_model_teacher_config(cfg)
|
||||
model_t = build_model(cfg_t)
|
||||
return model_t
|
||||
|
||||
@staticmethod
|
||||
def pkt_loss(output_net, target_net, eps=0.0000001):
|
||||
# Normalize each vector by its norm
|
||||
output_net_norm = torch.sqrt(torch.sum(output_net ** 2, dim=1, keepdim=True))
|
||||
output_net = output_net / (output_net_norm + eps)
|
||||
output_net[output_net != output_net] = 0
|
||||
|
||||
target_net_norm = torch.sqrt(torch.sum(target_net ** 2, dim=1, keepdim=True))
|
||||
target_net = target_net / (target_net_norm + eps)
|
||||
target_net[target_net != target_net] = 0
|
||||
|
||||
# Calculate the cosine similarity
|
||||
model_similarity = torch.mm(output_net, output_net.transpose(0, 1))
|
||||
target_similarity = torch.mm(target_net, target_net.transpose(0, 1))
|
||||
|
||||
# Scale cosine similarity to 0..1
|
||||
model_similarity = (model_similarity + 1.0) / 2.0
|
||||
target_similarity = (target_similarity + 1.0) / 2.0
|
||||
|
||||
# Transform them into probabilities
|
||||
model_similarity = model_similarity / torch.sum(model_similarity, dim=1, keepdim=True)
|
||||
target_similarity = target_similarity / torch.sum(target_similarity, dim=1, keepdim=True)
|
||||
|
||||
# Calculate the KL-divergence
|
||||
loss = torch.mean(target_similarity * torch.log((target_similarity + eps) / (model_similarity + eps)))
|
||||
return loss
|
||||
|
||||
@staticmethod
|
||||
def distill_loss(y_s, y_t, t=4):
|
||||
p_s = F.log_softmax(y_s / t, dim=1)
|
||||
p_t = F.softmax(y_t / t, dim=1)
|
||||
loss = F.kl_div(p_s, p_t, reduction='sum') * (t ** 2) / y_s.shape[0]
|
||||
return loss
|
|
@ -1 +0,0 @@
|
|||
from .backbones import build_shufflenetv2_backbone
|
|
@ -1 +0,0 @@
|
|||
from .shufflenetv2 import build_shufflenetv2_backbone
|
|
@ -1,43 +0,0 @@
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
from collections import OrderedDict
|
||||
|
||||
from fastreid.modeling.backbones.build import BACKBONE_REGISTRY
|
||||
from .network import ShuffleNetV2
|
||||
|
||||
|
||||
__all__ = ['build_shufflenetv2_backbone']
|
||||
|
||||
|
||||
@BACKBONE_REGISTRY.register()
|
||||
def build_shufflenetv2_backbone(cfg):
|
||||
|
||||
pretrain = cfg.MODEL.BACKBONE.PRETRAIN
|
||||
pretrain_path = cfg.MODEL.BACKBONE.PRETRAIN_PATH
|
||||
model_size = cfg.MODEL.BACKBONE.MODEL_SIZE
|
||||
|
||||
return ShuffleNetV2Backbone(model_size=model_size, pretrained=pretrain, pretrain_path=pretrain_path)
|
||||
|
||||
|
||||
class ShuffleNetV2Backbone(nn.Module):
|
||||
|
||||
def __init__(self, model_size, pretrained=False, pretrain_path=''):
|
||||
super(ShuffleNetV2Backbone, self).__init__()
|
||||
|
||||
model = ShuffleNetV2(model_size=model_size)
|
||||
if pretrained:
|
||||
new_state_dict = OrderedDict()
|
||||
state_dict = torch.load(pretrain_path)['state_dict']
|
||||
for k, v in state_dict.items():
|
||||
if k[:7] == 'module.':
|
||||
k = k[7:]
|
||||
new_state_dict[k] = v
|
||||
model.load_state_dict(new_state_dict, strict=True)
|
||||
|
||||
self.backbone = nn.Sequential(
|
||||
model.first_conv, model.maxpool, model.features, model.conv_last)
|
||||
|
||||
def forward(self, x):
|
||||
return self.backbone(x)
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue