Merge pull request #2 from Jiahao000/xjh_dev

[Refactor] refactor models, benchmarks, tools and other miscs, etc.
2025-06-03 14:59:38 +08:00 · 2021-12-15 18:56:38 +08:00 · 2021-12-15 18:56:38 +08:00 · 00e51990fb
commit 00e51990fb
parent 86bf9bce47 dfaa8215ae
166 changed files with 893 additions and 9098 deletions
--- a/.gitignore
+++ b/.gitignore
@ -2,6 +2,7 @@
 __pycache__/
 *.py[cod]
 *$py.class
 **/*.pyc
 # C extensions
 *.so
@ -103,22 +104,16 @@ venv.bak/
 # mypy
 .mypy_cache/
-openselfsup/version.py
+# custom
-version.py
+/data
 data
 .vscode
 .idea
 # custom
 *.pkl
 *.pkl.json
 *.log.json
 work_dirs/
 /mmselfsup/.mim
 pretrains
 # Pytorch
 *.pth
 *.swp
 source.sh
 tensorboard.sh
@ -126,3 +121,6 @@ tensorboard.sh
 replace.sh
 benchmarks/detection/datasets
 benchmarks/detection/output
 # Pytorch
 *.pth
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -0,0 +1,46 @@
 exclude: ^tests/data/
 repos:
  - repo: https://gitlab.com/pycqa/flake8.git
    rev: 3.8.3
    hooks:
      - id: flake8
  - repo: https://github.com/asottile/seed-isort-config
    rev: v2.2.0
    hooks:
      - id: seed-isort-config
  - repo: https://github.com/timothycrosley/isort
    rev: 4.3.21
    hooks:
      - id: isort
  - repo: https://github.com/pre-commit/mirrors-yapf
    rev: v0.30.0
    hooks:
      - id: yapf
  - repo: https://github.com/pre-commit/pre-commit-hooks
    rev: v3.1.0
    hooks:
      - id: trailing-whitespace
      - id: check-yaml
        exclude: configs/benchmarks/detectron2/Base-RetinaNet.yaml
      - id: end-of-file-fixer
      - id: requirements-txt-fixer
      - id: double-quote-string-fixer
      - id: check-merge-conflict
      - id: fix-encoding-pragma
        args: ["--remove"]
      - id: mixed-line-ending
        args: ["--fix=lf"]
  - repo: https://github.com/markdownlint/markdownlint
    rev: v0.11.0
    hooks:
      - id: markdownlint
        args: ["-r", "~MD002,~MD013,~MD024,~MD029,~MD033,~MD034,~MD036", "-t", "allow_different_nesting"]
  - repo: https://github.com/codespell-project/codespell
    rev: v2.1.0
    hooks:
      - id: codespell
  - repo: https://github.com/myint/docformatter
    rev: v1.3.1
    hooks:
      - id: docformatter
        args: ["--in-place", "--wrap-descriptions", "79"]
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@ -0,0 +1,9 @@
 version: 2
 formats: all
 python:
  version: 3.7
  install:
    - requirements: requirements/docs.txt
    - requirements: requirements/readthedocs.txt
--- a/benchmarks/detection/README.md
+++ b/benchmarks/detection/README.md
@ -1,12 +0,0 @@
 ## Transferring to Detection
 We follow the evaluation setting in MoCo when trasferring to object detection.
 ### Instruction
 1. Install [detectron2](https://github.com/facebookresearch/detectron2/blob/master/INSTALL.md).
 1. Put dataset under "benchmarks/detection/datasets" directory,
   following the [directory structure](https://github.com/facebookresearch/detectron2/tree/master/datasets)
 	 requried by detectron2.
--- a/benchmarks/detection/configs/Base-Keypoint-RCNN-FPN.yaml
+++ b/benchmarks/detection/configs/Base-Keypoint-RCNN-FPN.yaml
@ -1,15 +0,0 @@
 _BASE_: "Base-RCNN-FPN.yaml"
 MODEL:
  KEYPOINT_ON: True
  ROI_HEADS:
    NUM_CLASSES: 1
  ROI_BOX_HEAD:
    SMOOTH_L1_BETA: 0.5  # Keypoint AP degrades (though box AP improves) when using plain L1 loss
  RPN:
    # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2.
    # 1000 proposals per-image is found to hurt box AP.
    # Therefore we increase it to 1500 per-image.
    POST_NMS_TOPK_TRAIN: 1500
 DATASETS:
  TRAIN: ("keypoints_coco_2017_train",)
  TEST: ("keypoints_coco_2017_val",)
--- a/benchmarks/detection/configs/Base-RCNN-C4-BN.yaml
+++ b/benchmarks/detection/configs/Base-RCNN-C4-BN.yaml
@ -1,17 +0,0 @@
 MODEL:
  META_ARCHITECTURE: "GeneralizedRCNN"
  RPN:
    PRE_NMS_TOPK_TEST: 6000
    POST_NMS_TOPK_TEST: 1000
  ROI_HEADS:
    NAME: "Res5ROIHeadsExtraNorm"
  BACKBONE:
    FREEZE_AT: 0
  RESNETS:
    NORM: "SyncBN"
 TEST:
  PRECISE_BN:
    ENABLED: True
 SOLVER:
  IMS_PER_BATCH: 16
  BASE_LR: 0.02
--- a/benchmarks/detection/configs/Base-RCNN-FPN.yaml
+++ b/benchmarks/detection/configs/Base-RCNN-FPN.yaml
@ -1,42 +0,0 @@
 MODEL:
  META_ARCHITECTURE: "GeneralizedRCNN"
  BACKBONE:
    NAME: "build_resnet_fpn_backbone"
  RESNETS:
    OUT_FEATURES: ["res2", "res3", "res4", "res5"]
  FPN:
    IN_FEATURES: ["res2", "res3", "res4", "res5"]
  ANCHOR_GENERATOR:
    SIZES: [[32], [64], [128], [256], [512]]  # One size for each in feature map
    ASPECT_RATIOS: [[0.5, 1.0, 2.0]]  # Three aspect ratios (same for all in feature maps)
  RPN:
    IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
    PRE_NMS_TOPK_TRAIN: 2000  # Per FPN level
    PRE_NMS_TOPK_TEST: 1000  # Per FPN level
    # Detectron1 uses 2000 proposals per-batch,
    # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
    # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
    POST_NMS_TOPK_TRAIN: 1000
    POST_NMS_TOPK_TEST: 1000
  ROI_HEADS:
    NAME: "StandardROIHeads"
    IN_FEATURES: ["p2", "p3", "p4", "p5"]
  ROI_BOX_HEAD:
    NAME: "FastRCNNConvFCHead"
    NUM_FC: 2
    POOLER_RESOLUTION: 7
  ROI_MASK_HEAD:
    NAME: "MaskRCNNConvUpsampleHead"
    NUM_CONV: 4
    POOLER_RESOLUTION: 14
 DATASETS:
  TRAIN: ("coco_2017_train",)
  TEST: ("coco_2017_val",)
 SOLVER:
  IMS_PER_BATCH: 16
  BASE_LR: 0.02
  STEPS: (60000, 80000)
  MAX_ITER: 90000
 INPUT:
  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
 VERSION: 2
--- a/benchmarks/detection/configs/Base-RetinaNet.yaml
+++ b/benchmarks/detection/configs/Base-RetinaNet.yaml
@ -1,25 +0,0 @@
 MODEL:
  META_ARCHITECTURE: "RetinaNet"
  BACKBONE:
    NAME: "build_retinanet_resnet_fpn_backbone"
  RESNETS:
    OUT_FEATURES: ["res3", "res4", "res5"]
  ANCHOR_GENERATOR:
    SIZES: !!python/object/apply:eval ["[[x, x * 2**(1.0/3), x * 2**(2.0/3) ] for x in [32, 64, 128, 256, 512 ]]"]
  FPN:
    IN_FEATURES: ["res3", "res4", "res5"]
  RETINANET:
    IOU_THRESHOLDS: [0.4, 0.5]
    IOU_LABELS: [0, -1, 1]
    SMOOTH_L1_LOSS_BETA: 0.0
 DATASETS:
  TRAIN: ("coco_2017_train",)
  TEST: ("coco_2017_val",)
 SOLVER:
  IMS_PER_BATCH: 16
  BASE_LR: 0.01  # Note that RetinaNet uses a different default learning rate
  STEPS: (60000, 80000)
  MAX_ITER: 90000
 INPUT:
  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
 VERSION: 2
--- a/benchmarks/detection/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml
+++ b/benchmarks/detection/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml
@ -1,30 +0,0 @@
 _BASE_: "../Base-RCNN-FPN.yaml"
 MODEL:
  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
  MASK_ON: True
  ROI_HEADS:
    NUM_CLASSES: 8
  BACKBONE:
    FREEZE_AT: 0
  RESNETS:
    DEPTH: 50
    NORM: "SyncBN"
  FPN:
    NORM: "SyncBN"
 INPUT:
  MIN_SIZE_TRAIN: (800, 832, 864, 896, 928, 960, 992, 1024)
  MIN_SIZE_TRAIN_SAMPLING: "choice"
  MIN_SIZE_TEST: 1024
  MAX_SIZE_TRAIN: 2048
  MAX_SIZE_TEST: 2048
 DATASETS:
  TRAIN: ("cityscapes_fine_instance_seg_train",)
  TEST: ("cityscapes_fine_instance_seg_val",)
 SOLVER:
  BASE_LR: 0.01
  STEPS: (18000,)
  MAX_ITER: 24000
  IMS_PER_BATCH: 8
 TEST:
  PRECISE_BN:
    ENABLED: True
--- a/benchmarks/detection/configs/Cityscapes/mask_rcnn_R_50_FPN_moco.yaml
+++ b/benchmarks/detection/configs/Cityscapes/mask_rcnn_R_50_FPN_moco.yaml
@ -1,9 +0,0 @@
 _BASE_: "mask_rcnn_R_50_FPN.yaml"
 MODEL:
  PIXEL_MEAN: [123.675, 116.280, 103.530]
  PIXEL_STD: [58.395, 57.120, 57.375]
  WEIGHTS: "See Instructions"
  RESNETS:
    STRIDE_IN_1X1: False
 INPUT:
  FORMAT: "RGB"
--- a/benchmarks/detection/configs/coco_R_50_C4_1x.yaml
+++ b/benchmarks/detection/configs/coco_R_50_C4_1x.yaml
@ -1,4 +0,0 @@
 _BASE_: "coco_R_50_C4_2x.yaml"
 SOLVER:
  STEPS: (60000, 80000)
  MAX_ITER: 90000
--- a/benchmarks/detection/configs/coco_R_50_C4_1x_moco.yaml
+++ b/benchmarks/detection/configs/coco_R_50_C4_1x_moco.yaml
@ -1,4 +0,0 @@
 _BASE_: "coco_R_50_C4_2x_moco.yaml"
 SOLVER:
  STEPS: (60000, 80000)
  MAX_ITER: 90000
--- a/benchmarks/detection/configs/coco_R_50_C4_2x.yaml
+++ b/benchmarks/detection/configs/coco_R_50_C4_2x.yaml
@ -1,13 +0,0 @@
 _BASE_: "Base-RCNN-C4-BN.yaml"
 MODEL:
  MASK_ON: True
  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 INPUT:
  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
  MIN_SIZE_TEST: 800
 DATASETS:
  TRAIN: ("coco_2017_train",)
  TEST: ("coco_2017_val",)
 SOLVER:
  STEPS: (120000, 160000)
  MAX_ITER: 180000
--- a/benchmarks/detection/configs/coco_R_50_C4_2x_moco.yaml
+++ b/benchmarks/detection/configs/coco_R_50_C4_2x_moco.yaml
@ -1,10 +0,0 @@
 _BASE_: "coco_R_50_C4_2x.yaml"
 MODEL:
  PIXEL_MEAN: [123.675, 116.280, 103.530]
  PIXEL_STD: [58.395, 57.120, 57.375]
  WEIGHTS: "See Instructions"
  RESNETS:
    STRIDE_IN_1X1: False
 INPUT:
  MAX_SIZE_TRAIN: 1200
  FORMAT: "RGB"
--- a/benchmarks/detection/configs/coco_R_50_FPN_1x.yaml
+++ b/benchmarks/detection/configs/coco_R_50_FPN_1x.yaml
@ -1,17 +0,0 @@
 _BASE_: "Base-RCNN-FPN.yaml"
 MODEL:
  MASK_ON: True
  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
  BACKBONE:
    FREEZE_AT: 0
  RESNETS:
    DEPTH: 50
    NORM: "SyncBN"
  FPN:
    NORM: "SyncBN"
 TEST:
  PRECISE_BN:
    ENABLED: True
 SOLVER:
  STEPS: (60000, 80000)
  MAX_ITER: 90000
--- a/benchmarks/detection/configs/coco_R_50_FPN_1x_moco.yaml
+++ b/benchmarks/detection/configs/coco_R_50_FPN_1x_moco.yaml
@ -1,9 +0,0 @@
 _BASE_: "coco_R_50_FPN_1x.yaml"
 MODEL:
  PIXEL_MEAN: [123.675, 116.280, 103.530]
  PIXEL_STD: [58.395, 57.120, 57.375]
  WEIGHTS: "See Instructions"
  RESNETS:
    STRIDE_IN_1X1: False
 INPUT:
  FORMAT: "RGB"
--- a/benchmarks/detection/configs/coco_R_50_FPN_2x.yaml
+++ b/benchmarks/detection/configs/coco_R_50_FPN_2x.yaml
@ -1,4 +0,0 @@
 _BASE_: "coco_R_50_FPN_1x.yaml"
 SOLVER:
  STEPS: (120000, 160000)
  MAX_ITER: 180000
--- a/benchmarks/detection/configs/coco_R_50_FPN_2x_moco.yaml
+++ b/benchmarks/detection/configs/coco_R_50_FPN_2x_moco.yaml
@ -1,4 +0,0 @@
 _BASE_: "coco_R_50_FPN_1x_moco.yaml"
 SOLVER:
  STEPS: (120000, 160000)
  MAX_ITER: 180000
--- a/benchmarks/detection/configs/coco_R_50_RetinaNet_1x.yaml
+++ b/benchmarks/detection/configs/coco_R_50_RetinaNet_1x.yaml
@ -1,13 +0,0 @@
 _BASE_: "Base-RetinaNet.yaml"
 MODEL:
  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
  BACKBONE:
    FREEZE_AT: 0
  RESNETS:
    DEPTH: 50
    NORM: "SyncBN"
  FPN:
    NORM: "SyncBN"
 TEST:
  PRECISE_BN:
    ENABLED: True
--- a/benchmarks/detection/configs/coco_R_50_RetinaNet_1x_moco.yaml
+++ b/benchmarks/detection/configs/coco_R_50_RetinaNet_1x_moco.yaml
@ -1,9 +0,0 @@
 _BASE_: "coco_R_50_RetinaNet_1x.yaml"
 MODEL:
  PIXEL_MEAN: [123.675, 116.280, 103.530]
  PIXEL_STD: [58.395, 57.120, 57.375]
  WEIGHTS: "See Instructions"
  RESNETS:
    STRIDE_IN_1X1: False
 INPUT:
  FORMAT: "RGB"
--- a/benchmarks/detection/configs/coco_R_50_RetinaNet_2x.yaml
+++ b/benchmarks/detection/configs/coco_R_50_RetinaNet_2x.yaml
@ -1,4 +0,0 @@
 _BASE_: "coco_R_50_RetinaNet_1x.yaml"
 SOLVER:
  STEPS: (120000, 160000)
  MAX_ITER: 180000
--- a/benchmarks/detection/configs/coco_R_50_RetinaNet_2x_moco.yaml
+++ b/benchmarks/detection/configs/coco_R_50_RetinaNet_2x_moco.yaml
@ -1,4 +0,0 @@
 _BASE_: "coco_R_50_RetinaNet_1x_moco.yaml"
 SOLVER:
  STEPS: (120000, 160000)
  MAX_ITER: 180000
--- a/benchmarks/detection/configs/keypoint_rcnn_R_50_FPN_2x.yaml
+++ b/benchmarks/detection/configs/keypoint_rcnn_R_50_FPN_2x.yaml
@ -1,16 +0,0 @@
 _BASE_: "Base-Keypoint-RCNN-FPN.yaml"
 MODEL:
  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
  BACKBONE:
    FREEZE_AT: 0
  RESNETS:
    DEPTH: 50
    NORM: "SyncBN"
  FPN:
    NORM: "SyncBN"
 TEST:
  PRECISE_BN:
    ENABLED: True
 SOLVER:
  STEPS: (120000, 160000)
  MAX_ITER: 180000
--- a/benchmarks/detection/configs/keypoint_rcnn_R_50_FPN_2x_moco.yaml
+++ b/benchmarks/detection/configs/keypoint_rcnn_R_50_FPN_2x_moco.yaml
@ -1,9 +0,0 @@
 _BASE_: "keypoint_rcnn_R_50_FPN_2x.yaml"
 MODEL:
  PIXEL_MEAN: [123.675, 116.280, 103.530]
  PIXEL_STD: [58.395, 57.120, 57.375]
  WEIGHTS: "See Instructions"
  RESNETS:
    STRIDE_IN_1X1: False
 INPUT:
  FORMAT: "RGB"
--- a/benchmarks/detection/configs/pascal_voc_R_50_C4_24k.yaml
+++ b/benchmarks/detection/configs/pascal_voc_R_50_C4_24k.yaml
@ -1,16 +0,0 @@
 _BASE_: "Base-RCNN-C4-BN.yaml"
 MODEL:
  MASK_ON: False
  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
  ROI_HEADS:
    NUM_CLASSES: 20
 INPUT:
  MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800)
  MIN_SIZE_TEST: 800
 DATASETS:
  TRAIN: ('voc_2007_trainval', 'voc_2012_trainval')
  TEST: ('voc_2007_test',)
 SOLVER:
  STEPS: (18000, 22000)
  MAX_ITER: 24000
  WARMUP_ITERS: 100
--- a/benchmarks/detection/configs/pascal_voc_R_50_C4_24k_moco.yaml
+++ b/benchmarks/detection/configs/pascal_voc_R_50_C4_24k_moco.yaml
@ -1,9 +0,0 @@
 _BASE_: "pascal_voc_R_50_C4_24k.yaml"
 MODEL:
  PIXEL_MEAN: [123.675, 116.280, 103.530]
  PIXEL_STD: [58.395, 57.120, 57.375]
  WEIGHTS: "See Instructions"
  RESNETS:
    STRIDE_IN_1X1: False
 INPUT:
  FORMAT: "RGB"
--- a/benchmarks/detection/convert-pretrain-to-detectron2.py
+++ b/benchmarks/detection/convert-pretrain-to-detectron2.py
@ -1,36 +0,0 @@
 #!/usr/bin/env python
 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 import pickle as pkl
 import sys
 import torch
 if __name__ == "__main__":
    input = sys.argv[1]
    obj = torch.load(input, map_location="cpu")
    obj = obj["state_dict"]
    newmodel = {}
    for k, v in obj.items():
        old_k = k
        if "layer" not in k:
            k = "stem." + k
        for t in [1, 2, 3, 4]:
            k = k.replace("layer{}".format(t), "res{}".format(t + 1))
        for t in [1, 2, 3]:
            k = k.replace("bn{}".format(t), "conv{}.norm".format(t))
        k = k.replace("downsample.0", "shortcut")
        k = k.replace("downsample.1", "shortcut.norm")
        print(old_k, "->", k)
        newmodel[k] = v.numpy()
    res = {
        "model": newmodel,
        "__author__": "OpenSelfSup",
        "matching_heuristics": True
    }
    assert sys.argv[2].endswith('.pkl')
    with open(sys.argv[2], "wb") as f:
        pkl.dump(res, f)
--- a/benchmarks/detection/run.sh
+++ b/benchmarks/detection/run.sh
@ -1,6 +0,0 @@
 #!/bin/bash
 DET_CFG=$1
 WEIGHTS=$2
 python $(dirname "$0")/train_net.py --config-file $DET_CFG \
    --num-gpus 8 MODEL.WEIGHTS $WEIGHTS
--- a/benchmarks/detection/train_net.py
+++ b/benchmarks/detection/train_net.py
@ -1,77 +0,0 @@
 #!/usr/bin/env python
 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 import os
 from detectron2.checkpoint import DetectionCheckpointer
 from detectron2.config import get_cfg
 from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, launch
 from detectron2.evaluation import COCOEvaluator, PascalVOCDetectionEvaluator
 from detectron2.layers import get_norm
 from detectron2.modeling.roi_heads import ROI_HEADS_REGISTRY, Res5ROIHeads
@ROI_HEADS_REGISTRY.register()
 class Res5ROIHeadsExtraNorm(Res5ROIHeads):
    """
    As described in the MOCO paper, there is an extra BN layer
    following the res5 stage.
    """
    def _build_res5_block(self, cfg):
        seq, out_channels = super()._build_res5_block(cfg)
        norm = cfg.MODEL.RESNETS.NORM
        norm = get_norm(norm, out_channels)
        seq.add_module("norm", norm)
        return seq, out_channels
 class Trainer(DefaultTrainer):
    @classmethod
    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
        if output_folder is None:
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
        if "coco" in dataset_name:
            return COCOEvaluator(dataset_name, cfg, True, output_folder)
        else:
            assert "voc" in dataset_name
            return PascalVOCDetectionEvaluator(dataset_name)
 def setup(args):
    cfg = get_cfg()
    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()
    default_setup(cfg, args)
    return cfg
 def main(args):
    cfg = setup(args)
    if args.eval_only:
        model = Trainer.build_model(cfg)
        DetectionCheckpointer(
            model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
                cfg.MODEL.WEIGHTS, resume=args.resume)
        res = Trainer.test(cfg, model)
        return res
    trainer = Trainer(cfg)
    trainer.resume_or_load(resume=args.resume)
    return trainer.train()
 if __name__ == "__main__":
    args = default_argument_parser().parse_args()
    print("Command Line Args:", args)
    launch(
        main,
        args.num_gpus,
        num_machines=args.num_machines,
        machine_rank=args.machine_rank,
        dist_url=args.dist_url,
        args=(args, ),
    )
--- a/benchmarks/dist_test_svm_epoch.sh
+++ b/benchmarks/dist_test_svm_epoch.sh
@ -1,28 +0,0 @@
 #!/bin/bash
 set -e
 set -x
 CFG=$1
 EPOCH=$2
 FEAT_LIST=$3 # e.g.: "feat5", "feat4 feat5". If leave empty, the default is "feat5"
 GPUS=${4:-8}
 WORK_DIR=$(echo ${CFG%.*} | sed -e "s/configs/work_dirs/g")/
 if [ "$CFG" == "" ] || [ "$EPOCH" == "" ]; then
    echo "ERROR: Missing arguments."
    exit
 fi
 if [ ! -f $WORK_DIR/epoch_${EPOCH}.pth ]; then
    echo "ERROR: File not exist: $WORK_DIR/epoch_${EPOCH}.pth"
    exit
 fi
 mkdir -p $WORK_DIR/logs
 echo "Testing checkpoint: $WORK_DIR/epoch_${EPOCH}.pth" 2>&1 | tee -a $WORK_DIR/logs/eval_svm.log
 bash tools/dist_extract.sh $CFG $GPUS $WORK_DIR --checkpoint $WORK_DIR/epoch_${EPOCH}.pth
 bash benchmarks/svm_tools/eval_svm_full.sh $WORK_DIR "$FEAT_LIST"
 bash benchmarks/svm_tools/eval_svm_lowshot.sh $WORK_DIR "$FEAT_LIST"
--- a/benchmarks/dist_test_svm_pretrain.sh
+++ b/benchmarks/dist_test_svm_pretrain.sh
@ -1,28 +0,0 @@
 #!/bin/bash
 set -e
 set -x
 CFG=$1
 PRETRAIN=$2 # pretrained model or "random" (random init)
 FEAT_LIST=$3 # e.g.: "feat5", "feat4 feat5". If leave empty, the default is "feat5"
 GPUS=${4:-8}
 WORK_DIR="$(echo ${CFG%.*} | sed -e "s/configs/work_dirs/g")/$(echo $PRETRAIN | rev | cut -d/ -f 1 | rev)"
 if [ "$CFG" == "" ] || [ "$PRETRAIN" == "" ]; then
    echo "ERROR: Missing arguments."
    exit
 fi
 if [ ! -f $PRETRAIN ] && [ "$PRETRAIN" != "random" ]; then
    echo "ERROR: PRETRAIN should be a file or a string \"random\", got: $PRETRAIN"
    exit
 fi
 mkdir -p $WORK_DIR/logs
 echo "Testing pretrain: $PRETRAIN" 2>&1 | tee -a $WORK_DIR/logs/eval_svm.log
 bash tools/dist_extract.sh $CFG $GPUS $WORK_DIR --pretrained $PRETRAIN
 bash benchmarks/svm_tools/eval_svm_full.sh $WORK_DIR "$FEAT_LIST"
 bash benchmarks/svm_tools/eval_svm_lowshot.sh $WORK_DIR "$FEAT_LIST"
--- a/benchmarks/dist_train_linear.sh
+++ b/benchmarks/dist_train_linear.sh
@ -1,24 +0,0 @@
 #!/usr/bin/env bash
 set -e
 set -x
 CFG=$1 # use cfgs under "configs/benchmarks/linear_classification/"
 PRETRAIN=$2
 PY_ARGS=${@:3} # --resume_from --deterministic
 GPUS=8 # When changing GPUS, please also change imgs_per_gpu in the config file accordingly to ensure the total batch size is 256.
 PORT=${PORT:-29500}
 if [ "$CFG" == "" ] || [ "$PRETRAIN" == "" ]; then
    echo "ERROR: Missing arguments."
    exit
 fi
 WORK_DIR="$(echo ${CFG%.*} | sed -e "s/configs/work_dirs/g")/$(echo $PRETRAIN | rev | cut -d/ -f 1 | rev)"
 # train
 python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
    tools/train.py \
    $CFG \
    --pretrained $PRETRAIN \
    --work_dir $WORK_DIR --seed 0 --launcher="pytorch" ${PY_ARGS}
--- a/benchmarks/dist_train_semi.sh
+++ b/benchmarks/dist_train_semi.sh
@ -1,24 +0,0 @@
 #!/usr/bin/env bash
 set -e
 set -x
 CFG=$1 # use cfgs under "configs/benchmarks/semi_classification/imagenet_*percent/"
 PRETRAIN=$2
 PY_ARGS=${@:3}
 GPUS=4 # in the standard setting, GPUS=4
 PORT=${PORT:-29500}
 if [ "$CFG" == "" ] || [ "$PRETRAIN" == "" ]; then
    echo "ERROR: Missing arguments."
    exit
 fi
 WORK_DIR="$(echo ${CFG%.*} | sed -e "s/configs/work_dirs/g")/$(echo $PRETRAIN | rev | cut -d/ -f 1 | rev)"
 # train
 python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
    tools/train.py \
    $CFG \
    --pretrained $PRETRAIN \
    --work_dir $WORK_DIR --seed 0 --launcher="pytorch" ${PY_ARGS}
--- a/benchmarks/extract_info/voc07.py
+++ b/benchmarks/extract_info/voc07.py
@ -1,20 +0,0 @@
 data_source_cfg = dict(type='ImageList', memcached=False, mclient_path=None)
 data_root = "data/VOCdevkit/VOC2007/JPEGImages"
 data_all_list = "data/VOCdevkit/VOC2007/Lists/trainvaltest.txt"
 split_at = [5011]
 split_name = ['voc07_trainval', 'voc07_test']
 img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 data = dict(
    imgs_per_gpu=32,
    workers_per_gpu=2,
    extract=dict(
        type="ExtractDataset",
        data_source=dict(
            list_file=data_all_list, root=data_root, **data_source_cfg),
        pipeline=[
            dict(type='Resize', size=256),
            dict(type='Resize', size=(224, 224)),
            dict(type='ToTensor'),
            dict(type='Normalize', **img_norm_cfg),
        ]))
--- a/benchmarks/srun_test_svm_epoch.sh
+++ b/benchmarks/srun_test_svm_epoch.sh
@ -1,24 +0,0 @@
 #!/usr/bin/env bash
 set -e
 set -x
 PARTITION=$1
 CFG=$2
 EPOCH=$3
 FEAT_LIST=$4 # e.g.: "feat5", "feat4 feat5". If leave empty, the default is "feat5"
 GPUS=${5:-8}
 WORK_DIR=$(echo ${CFG%.*} | sed -e "s/configs/work_dirs/g")/
 if [ ! -f $WORK_DIR/epoch_${EPOCH}.pth ]; then
    echo "ERROR: File not exist: $WORK_DIR/epoch_${EPOCH}.pth"
    exit
 fi
 mkdir -p $WORK_DIR/logs
 echo "Testing checkpoint: $WORK_DIR/epoch_${EPOCH}.pth" 2>&1 | tee -a $WORK_DIR/logs/eval_svm.log
 bash tools/srun_extract.sh $PARTITION $CFG $GPUS $WORK_DIR --checkpoint $WORK_DIR/epoch_${EPOCH}.pth
 srun -p $PARTITION bash benchmarks/svm_tools/eval_svm_full.sh $WORK_DIR "$FEAT_LIST"
 srun -p $PARTITION bash benchmarks/svm_tools/eval_svm_lowshot.sh $WORK_DIR "$FEAT_LIST"
--- a/benchmarks/srun_test_svm_pretrain.sh
+++ b/benchmarks/srun_test_svm_pretrain.sh
@ -1,24 +0,0 @@
 #!/usr/bin/env bash
 set -e
 set -x
 PARTITION=$1
 CFG=$2
 PRETRAIN=$3 # pretrained model or "random" (random init)
 FEAT_LIST=$4 # e.g.: "feat5", "feat4 feat5". If leave empty, the default is "feat5"
 GPUS=${5:-8}
 WORK_DIR="$(echo ${CFG%.*} | sed -e "s/configs/work_dirs/g")/$(echo $PRETRAIN | rev | cut -d/ -f 1 | rev)"
 if [ ! -f $PRETRAIN ] and [ "$PRETRAIN" != "random" ]; then
    echo "ERROR: PRETRAIN should be a file or a string \"random\", got: $PRETRAIN"
    exit
 fi
 mkdir -p $WORK_DIR/logs
 echo "Testing pretrain: $PRETRAIN" 2>&1 | tee -a $WORK_DIR/logs/eval_svm.log
 bash tools/srun_extract.sh $PARTITION $CFG $GPUS $WORK_DIR --pretrained $PRETRAIN
 srun -p $PARTITION bash benchmarks/svm_tools/eval_svm_full.sh $WORK_DIR "$FEAT_LIST"
 srun -p $PARTITION bash benchmarks/svm_tools/eval_svm_lowshot.sh $WORK_DIR "$FEAT_LIST"
--- a/benchmarks/srun_train_linear.sh
+++ b/benchmarks/srun_train_linear.sh
@ -1,31 +0,0 @@
 #!/usr/bin/env bash
 set -e
 set -x
 PARTITION=$1
 CFG=$2
 PRETRAIN=$3
 PY_ARGS=${@:4}
 JOB_NAME="openselfsup"
 GPUS=8 # When changing GPUS, please also change imgs_per_gpu in the config file accordingly to ensure the total batch size is 256.
 GPUS_PER_NODE=${GPUS_PER_NODE:-8}
 CPUS_PER_TASK=${CPUS_PER_TASK:-5}
 SRUN_ARGS=${SRUN_ARGS:-""}
 WORK_DIR="$(echo ${CFG%.*} | sed -e "s/configs/work_dirs/g")/$(echo $PRETRAIN | rev | cut -d/ -f 1 | rev)"
 # train
 GLOG_vmodule=MemcachedClient=-1 \
 srun -p ${PARTITION} \
    --job-name=${JOB_NAME} \
    --gres=gpu:${GPUS_PER_NODE} \
    --ntasks=${GPUS} \
    --ntasks-per-node=${GPUS_PER_NODE} \
    --cpus-per-task=${CPUS_PER_TASK} \
    --kill-on-bad-exit=1 \
    ${SRUN_ARGS} \
    python -u tools/train.py \
        $CFG \
        --pretrained $PRETRAIN \
        --work_dir $WORK_DIR --seed 0 --launcher="slurm" ${PY_ARGS}
--- a/benchmarks/srun_train_semi.sh
+++ b/benchmarks/srun_train_semi.sh
@ -1,31 +0,0 @@
 #!/usr/bin/env bash
 set -e
 set -x
 PARTITION=$1
 CFG=$2
 PRETRAIN=$3
 PY_ARGS=${@:4}
 JOB_NAME="openselfsup"
 GPUS=4 # in the standard setting, GPUS=4
 GPUS_PER_NODE=${GPUS_PER_NODE:-4}
 CPUS_PER_TASK=${CPUS_PER_TASK:-5}
 SRUN_ARGS=${SRUN_ARGS:-""}
 WORK_DIR="$(echo ${CFG%.*} | sed -e "s/configs/work_dirs/g")/$(echo $PRETRAIN | rev | cut -d/ -f 1 | rev)"
 # train
 GLOG_vmodule=MemcachedClient=-1 \
 srun -p ${PARTITION} \
    --job-name=${JOB_NAME} \
    --gres=gpu:${GPUS_PER_NODE} \
    --ntasks=${GPUS} \
    --ntasks-per-node=${GPUS_PER_NODE} \
    --cpus-per-task=${CPUS_PER_TASK} \
    --kill-on-bad-exit=1 \
    ${SRUN_ARGS} \
    python -u tools/train.py \
        $CFG \
        --pretrained $PRETRAIN \
        --work_dir $WORK_DIR --seed 0 --launcher="slurm" ${PY_ARGS}
--- a/benchmarks/svm_tools/aggregate_low_shot_svm_stats.py
+++ b/benchmarks/svm_tools/aggregate_low_shot_svm_stats.py
@ -1,127 +0,0 @@
 # Copyright (c) Facebook, Inc. and its affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the license found in the
 # LICENSE file in the root directory of this source tree.
 #
 ################################################################################
 """
 Aggregate the stats over various independent samples for low-shot svm training.
 Stats computed: mean, max, min, std
 Relevant transfer tasks: Low-shot Image Classification VOC07 and Places205 low
 shot samples.
 """
 from __future__ import division
 from __future__ import absolute_import
 from __future__ import unicode_literals
 from __future__ import print_function
 import argparse
 import logging
 import numpy as np
 import os
 import sys
 # create the logger
 FORMAT = '[%(levelname)s: %(filename)s: %(lineno)4d]: %(message)s'
 logging.basicConfig(level=logging.INFO, format=FORMAT, stream=sys.stdout)
 logger = logging.getLogger(__name__)
 def save_stats(output_dir, stat, output):
    out_file = os.path.join(output_dir, 'test_ap_{}.npy'.format(stat))
    #logger.info('Saving {} to: {} {}'.format(stat, out_file, output.shape))
    np.save(out_file, output)
 def aggregate_stats(opts):
    k_values = [int(val) for val in opts.k_values.split(",")]
    sample_inds = [int(val) for val in opts.sample_inds.split(",")]
    #logger.info(
    #    'Aggregating stats for k-values: {} and sample_inds: {}'.format(
    #        k_values, sample_inds))
    output_mean, output_max, output_min, output_std = [], [], [], []
    for k_idx in range(len(k_values)):
        k_low = k_values[k_idx]
        k_val_output = []
        for inds in range(len(sample_inds)):
            sample_idx = sample_inds[inds]
            file_name = 'test_ap_sample{}_k{}.npy'.format(
                sample_idx + 1, k_low)
            filepath = os.path.join(opts.output_path, file_name)
            if os.path.exists(filepath):
                k_val_output.append(np.load(filepath, encoding='latin1'))
            else:
                logger.info('file does not exist: {}'.format(filepath))
        k_val_output = np.concatenate(k_val_output, axis=0)
        k_low_max = np.max(
            k_val_output, axis=0).reshape(-1, k_val_output.shape[1])
        k_low_min = np.min(
            k_val_output, axis=0).reshape(-1, k_val_output.shape[1])
        k_low_mean = np.mean(
            k_val_output, axis=0).reshape(-1, k_val_output.shape[1])
        k_low_std = np.std(
            k_val_output, axis=0).reshape(-1, k_val_output.shape[1])
        output_mean.append(k_low_mean)
        output_min.append(k_low_min)
        output_max.append(k_low_max)
        output_std.append(k_low_std)
    output_mean = np.concatenate(output_mean, axis=0)
    output_min = np.concatenate(output_min, axis=0)
    output_max = np.concatenate(output_max, axis=0)
    output_std = np.concatenate(output_std, axis=0)
    save_stats(opts.output_path, 'mean', output_mean)
    save_stats(opts.output_path, 'min', output_min)
    save_stats(opts.output_path, 'max', output_max)
    save_stats(opts.output_path, 'std', output_std)
    argmax_cls = np.argmax(output_mean, axis=1)
    argmax_mean, argmax_min, argmax_max, argmax_std = [], [], [], []
    for idx in range(len(argmax_cls)):
        argmax_mean.append(100.0 * output_mean[idx, argmax_cls[idx]])
        argmax_min.append(100.0 * output_min[idx, argmax_cls[idx]])
        argmax_max.append(100.0 * output_max[idx, argmax_cls[idx]])
        argmax_std.append(100.0 * output_std[idx, argmax_cls[idx]])
    for idx in range(len(argmax_max)):
        logger.info('mean/min/max/std: {} / {} / {} / {}'.format(
            round(argmax_mean[idx], 2),
            round(argmax_min[idx], 2),
            round(argmax_max[idx], 2),
            round(argmax_std[idx], 2),
        ))
    #logger.info('All done!!')
 def main():
    parser = argparse.ArgumentParser(description='Low shot SVM model test')
    parser.add_argument(
        '--output_path',
        type=str,
        default=None,
        help="Numpy file containing test AP result files")
    parser.add_argument(
        '--k_values',
        type=str,
        default=None,
        help="Low-shot k-values for svm testing. Comma separated")
    parser.add_argument(
        '--sample_inds',
        type=str,
        default=None,
        help="sample_inds for which to test svm. Comma separated")
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(1)
    opts = parser.parse_args()
    #logger.info(opts)
    aggregate_stats(opts)
 if __name__ == '__main__':
    main()
--- a/benchmarks/svm_tools/eval_svm_full.sh
+++ b/benchmarks/svm_tools/eval_svm_full.sh
@ -1,40 +0,0 @@
 #!/bin/bash
 set -x
 set -e
 WORK_DIR=$1
 FEAT_LIST=${2:-"feat5"} # "feat1 feat2 feat3 feat4 feat5"
 TRAIN_SVM_FLAG=true
 TEST_SVM_FLAG=true
 DATA="data/VOCdevkit/VOC2007/SVMLabels"
 # config svm
 costs="1.0,10.0,100.0"
 for feat in $FEAT_LIST; do
    echo "For feature: $feat" 2>&1 | tee -a $WORK_DIR/logs/eval_svm.log
    # train svm
    if $TRAIN_SVM_FLAG; then
        rm -rf $WORK_DIR/svm
        mkdir -p $WORK_DIR/svm/voc07_${feat}
        echo "training svm ..."
        python benchmarks/svm_tools/train_svm_kfold_parallel.py \
            --data_file $WORK_DIR/features/voc07_trainval_${feat}.npy \
            --targets_data_file $DATA/train_labels.npy \
            --costs_list $costs \
            --output_path $WORK_DIR/svm/voc07_${feat}
    fi
    # test svm
    if $TEST_SVM_FLAG; then
        echo "testing svm ..."
        python benchmarks/svm_tools/test_svm.py \
            --data_file $WORK_DIR/features/voc07_test_${feat}.npy \
            --json_targets $DATA/test_targets.json \
            --targets_data_file $DATA/test_labels.npy \
            --costs_list $costs \
            --generate_json 1 \
            --output_path $WORK_DIR/svm/voc07_${feat} 2>&1 | tee -a $WORK_DIR/logs/eval_svm.log
    fi
 done
--- a/benchmarks/svm_tools/eval_svm_lowshot.sh
+++ b/benchmarks/svm_tools/eval_svm_lowshot.sh
@ -1,64 +0,0 @@
 #!/bin/bash
 set -x
 set -e
 WORK_DIR=$1
 MODE="full"
 FEAT_LIST=${2:-"feat5"} # "feat1 feat2 feat3 feat4 feat5"
 TRAIN_SVM_LOWSHOT_FLAG=true
 TEST_SVM_LOWSHOT_FLAG=true
 AGGREGATE_FLAG=true
 DATA="data/VOCdevkit/VOC2007/SVMLabels"
 # config svm
 costs="1.0,10.0,100.0"
 if [ "$MODE" == "fast" ]; then
    shots="96"
 else
    shots="1 2 4 8 16 32 64 96"
 fi
 for feat in $FEAT_LIST; do
    echo "For feature: $feat" 2>&1 | tee -a $WORK_DIR/logs/eval_svm.log
    # train lowshot svm
    if $TRAIN_SVM_LOWSHOT_FLAG; then
        rm -rf $WORK_DIR/svm_lowshot
        mkdir -p $WORK_DIR/svm_lowshot/voc07_${feat}
        echo "training svm low-shot ..."
        for s in {1..5}; do
            for k in $shots; do
                echo -e "\ts${s} k${k}"
                python benchmarks/svm_tools/train_svm_low_shot.py \
                    --data_file $WORK_DIR/features/voc07_trainval_${feat}.npy \
                    --targets_data_file $DATA/low_shot/labels/train_targets_sample${s}_k${k}.npy \
                    --costs_list $costs \
                    --output_path $WORK_DIR/svm_lowshot/voc07_${feat}
            done
        done
    fi
    # test lowshot svm
    if $TEST_SVM_LOWSHOT_FLAG; then
        echo "testing svm low-shot ..."
        python benchmarks/svm_tools/test_svm_low_shot.py \
            --data_file $WORK_DIR/features/voc07_test_${feat}.npy \
            --targets_data_file $DATA/test_labels.npy \
            --json_targets $DATA/test_targets.json \
            --generate_json 1 \
            --costs_list $costs \
            --output_path $WORK_DIR/svm_lowshot/voc07_${feat} \
            --k_values "${shots// /,}" \
            --sample_inds "0,1,2,3,4" \
            --dataset "voc"
    fi
    # aggregate testing results
    if $AGGREGATE_FLAG; then
        echo "aggregating svm low-shot ..."
        python benchmarks/svm_tools/aggregate_low_shot_svm_stats.py \
            --output_path $WORK_DIR/svm_lowshot/voc07_${feat} \
            --k_values "${shots// /,}" \
            --sample_inds "0,1,2,3,4" 2>&1 | tee -a $WORK_DIR/logs/eval_svm.log
    fi
 done
--- a/benchmarks/svm_tools/svm_helper.py
+++ b/benchmarks/svm_tools/svm_helper.py
@ -1,171 +0,0 @@
 # Copyright (c) Facebook, Inc. and its affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the license found in the
 # LICENSE file in the root directory of this source tree.
 #
 ################################################################################
 """
 Helper module for svm training and testing.
 """
 from __future__ import division
 from __future__ import absolute_import
 from __future__ import unicode_literals
 from __future__ import print_function
 import logging
 import numpy as np
 import os
 import sys
 # create the logger
 FORMAT = '[%(levelname)s: %(filename)s: %(lineno)4d]: %(message)s'
 logging.basicConfig(level=logging.INFO, format=FORMAT, stream=sys.stdout)
 logger = logging.getLogger(__name__)
 # Python 2 and python 3 have different floating point precision. The following
 # trick helps keep the backwards compatibility.
 def py2_py3_compatible_cost(cost):
    return str(float("{:.17f}".format(cost)))
 def get_svm_train_output_files(cls, cost, output_path):
    cls_cost = str(cls) + '_cost' + py2_py3_compatible_cost(cost)
    out_file = os.path.join(output_path, 'cls' + cls_cost + '.pickle')
    ap_matrix_out_file = os.path.join(output_path,
                                      'AP_cls' + cls_cost + '.npy')
    return out_file, ap_matrix_out_file
 def parse_cost_list(costs):
    costs_list = [float(cost) for cost in costs.split(",")]
    start_num, end_num = 4, 20
    for num in range(start_num, end_num):
        costs_list.append(0.5**num)
    return costs_list
 def normalize_features(features):
    feats_norm = np.linalg.norm(features, axis=1)
    features = features / (feats_norm + 1e-5)[:, np.newaxis]
    return features
 def load_input_data(data_file, targets_file):
    # load the features and the targets
    #logger.info('loading features and targets...')
    targets = np.load(targets_file, encoding='latin1')
    features = np.array(np.load(data_file,
                                encoding='latin1')).astype(np.float64)
    assert features.shape[0] == targets.shape[0], "Mismatched #images"
    #logger.info('Loaded features: {} and targets: {}'.format(
    #    features.shape, targets.shape))
    return features, targets
 def calculate_ap(rec, prec):
    """
    Computes the AP under the precision recall curve.
    """
    rec, prec = rec.reshape(rec.size, 1), prec.reshape(prec.size, 1)
    z, o = np.zeros((1, 1)), np.ones((1, 1))
    mrec, mpre = np.vstack((z, rec, o)), np.vstack((z, prec, z))
    for i in range(len(mpre) - 2, -1, -1):
        mpre[i] = max(mpre[i], mpre[i + 1])
    indices = np.where(mrec[1:] != mrec[0:-1])[0] + 1
    ap = 0
    for i in indices:
        ap = ap + (mrec[i] - mrec[i - 1]) * mpre[i]
    return ap
 def get_precision_recall(targets, preds):
    """
    [P, R, score, ap] = get_precision_recall(targets, preds)
    Input    :
        targets  : number of occurrences of this class in the ith image
        preds    : score for this image
    Output   :
        P, R   : precision and recall
        score  : score which corresponds to the particular precision and recall
        ap     : average precision
    """
    # binarize targets
    targets = np.array(targets > 0, dtype=np.float32)
    tog = np.hstack((targets[:, np.newaxis].astype(np.float64),
                     preds[:, np.newaxis].astype(np.float64)))
    ind = np.argsort(preds)
    ind = ind[::-1]
    score = np.array([tog[i, 1] for i in ind])
    sortcounts = np.array([tog[i, 0] for i in ind])
    tp = sortcounts
    fp = sortcounts.copy()
    for i in range(sortcounts.shape[0]):
        if sortcounts[i] >= 1:
            fp[i] = 0.
        elif sortcounts[i] < 1:
            fp[i] = 1.
    P = np.cumsum(tp) / (np.cumsum(tp) + np.cumsum(fp))
    numinst = np.sum(targets)
    R = np.cumsum(tp) / numinst
    ap = calculate_ap(R, P)
    return P, R, score, ap
 def get_low_shot_output_file(opts, cls, cost, suffix):
    # in case of low-shot training, we train for 5 independent samples
    # (sample{}) and vary low-shot amount (k{}). The input data should have
    # sample{}_k{} information that we extract in suffix below.
    # logger.info('Suffix: {}'.format(suffix))
    cls_cost = str(cls) + '_cost' + py2_py3_compatible_cost(cost)
    out_file = os.path.join(opts.output_path,
                            'cls' + cls_cost + '_' + suffix + '.pickle')
    return out_file
 def get_low_shot_svm_classes(targets, dataset):
    # classes for which SVM testing should be done
    num_classes, cls_list = None, None
    if dataset == 'voc':
        num_classes = targets.shape[1]
        cls_list = range(num_classes)
    elif dataset == 'places':
        # each image in places has a target cls [0, .... ,204]
        num_classes = len(set(targets[:, 0].tolist()))
        cls_list = list(set(targets[:, 0].tolist()))
    else:
        logger.info('Dataset not recognized. Abort!')
    #logger.info('Testing SVM for classes: {}'.format(cls_list))
    #logger.info('Num classes: {}'.format(num_classes))
    return num_classes, cls_list
 def get_cls_feats_labels(cls, features, targets, dataset):
    out_feats, out_cls_labels = None, None
    if dataset == 'voc':
        cls_labels = targets[:, cls].astype(dtype=np.int32, copy=True)
        # find the indices for positive/negative imgs. Remove the ignore label.
        out_data_inds = (targets[:, cls] != -1)
        out_feats = features[out_data_inds]
        out_cls_labels = cls_labels[out_data_inds]
        # label 0 = not present, set it to -1 as svm train target.
        # Make the svm train target labels as -1, 1.
        out_cls_labels[np.where(out_cls_labels == 0)] = -1
    elif dataset == 'places':
        out_feats = features
        out_cls_labels = targets.astype(dtype=np.int32, copy=True)
        # for the given class, get the relevant positive/negative images and
        # make the label 1, -1
        cls_inds = np.where(targets[:, 0] == cls)
        non_cls_inds = (targets[:, 0] != cls)
        out_cls_labels[non_cls_inds] = -1
        out_cls_labels[cls_inds] = 1
        # finally reshape into the format taken by sklearn svm package.
        out_cls_labels = out_cls_labels.reshape(-1)
    else:
        raise Exception('args.dataset not recognized')
    return out_feats, out_cls_labels
--- a/benchmarks/svm_tools/test_svm.py
+++ b/benchmarks/svm_tools/test_svm.py
@ -1,174 +0,0 @@
 # Copyright (c) Facebook, Inc. and its affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the license found in the
 # LICENSE file in the root directory of this source tree.
 #
 ################################################################################
 """
 SVM test for image classification.
 Relevant transfer tasks: Image Classification VOC07 and COCO2014.
 """
 from __future__ import division
 from __future__ import absolute_import
 from __future__ import unicode_literals
 from __future__ import print_function
 import argparse
 import json
 import logging
 import numpy as np
 import os
 import pickle
 import six
 import sys
 import svm_helper
 # create the logger
 FORMAT = '[%(levelname)s: %(filename)s: %(lineno)4d]: %(message)s'
 logging.basicConfig(level=logging.INFO, format=FORMAT, stream=sys.stdout)
 logger = logging.getLogger(__name__)
 def get_chosen_costs(opts, num_classes):
    costs_list = svm_helper.parse_cost_list(opts.costs_list)
    train_ap_matrix = np.zeros((num_classes, len(costs_list)))
    for cls in range(num_classes):
        for cost_idx in range(len(costs_list)):
            cost = costs_list[cost_idx]
            _, ap_out_file = svm_helper.get_svm_train_output_files(
                cls, cost, opts.output_path)
            train_ap_matrix[cls][cost_idx] = float(
                np.load(ap_out_file, encoding='latin1')[0])
    argmax_cls = np.argmax(train_ap_matrix, axis=1)
    chosen_cost = [costs_list[idx] for idx in argmax_cls]
    #logger.info('chosen_cost: {}'.format(chosen_cost))
    np.save(
        os.path.join(opts.output_path, 'crossval_ap.npy'),
        np.array(train_ap_matrix))
    np.save(
        os.path.join(opts.output_path, 'chosen_cost.npy'),
        np.array(chosen_cost))
    #logger.info('saved crossval_ap AP to file: {}'.format(
    #    os.path.join(opts.output_path, 'crossval_ap.npy')))
    #logger.info('saved chosen costs to file: {}'.format(
    #    os.path.join(opts.output_path, 'chosen_cost.npy')))
    return np.array(chosen_cost)
 def load_json(file_path):
    assert os.path.exists(file_path), "{} does not exist".format(file_path)
    with open(file_path, 'r') as fp:
        data = json.load(fp)
    img_ids = list(data.keys())
    cls_names = list(data[img_ids[0]].keys())
    return img_ids, cls_names
 def test_svm(opts):
    assert os.path.exists(opts.data_file), "Data file not found. Abort!"
    json_predictions, img_ids, cls_names = {}, [], []
    if opts.generate_json:
        img_ids, cls_names = load_json(opts.json_targets)
    features, targets = svm_helper.load_input_data(opts.data_file,
                                                   opts.targets_data_file)
    # normalize the features: N x 9216 (example shape)
    features = svm_helper.normalize_features(features)
    num_classes = targets.shape[1]
    #logger.info('Num classes: {}'.format(num_classes))
    # get the chosen cost that maximizes the cross-validation AP per class
    costs_list = get_chosen_costs(opts, num_classes)
    ap_matrix = np.zeros((num_classes, 1))
    for cls in range(num_classes):
        cost = costs_list[cls]
        #logger.info('Testing model for cls: {} cost: {}'.format(cls, cost))
        model_file = os.path.join(
            opts.output_path,
            'cls' + str(cls) + '_cost' + str(cost) + '.pickle')
        with open(model_file, 'rb') as fopen:
            if six.PY2:
                model = pickle.load(fopen)
            else:
                model = pickle.load(fopen, encoding='latin1')
        prediction = model.decision_function(features)
        if opts.generate_json:
            cls_name = cls_names[cls]
            for idx in range(len(prediction)):
                img_id = img_ids[idx]
                if img_id in json_predictions:
                    json_predictions[img_id][cls_name] = prediction[idx]
                else:
                    out_lbl = {}
                    out_lbl[cls_name] = prediction[idx]
                    json_predictions[img_id] = out_lbl
        cls_labels = targets[:, cls]
        # meaning of labels in VOC/COCO original loaded target files:
        # label 0 = not present, set it to -1 as svm train target
        # label 1 = present. Make the svm train target labels as -1, 1.
        evaluate_data_inds = (targets[:, cls] != -1)
        eval_preds = prediction[evaluate_data_inds]
        eval_cls_labels = cls_labels[evaluate_data_inds]
        eval_cls_labels[np.where(eval_cls_labels == 0)] = -1
        P, R, score, ap = svm_helper.get_precision_recall(
            eval_cls_labels, eval_preds)
        ap_matrix[cls][0] = ap
    if opts.generate_json:
        output_file = os.path.join(opts.output_path, 'json_preds.json')
        with open(output_file, 'w') as fp:
            json.dump(json_predictions, fp)
        #logger.info('Saved json predictions to: {}'.format(output_file))
    logger.info('Mean AP: {}'.format(np.mean(ap_matrix, axis=0)))
    np.save(os.path.join(opts.output_path, 'test_ap.npy'), np.array(ap_matrix))
    #logger.info('saved test AP to file: {}'.format(
    #    os.path.join(opts.output_path, 'test_ap.npy')))
 def main():
    parser = argparse.ArgumentParser(description='SVM model test')
    parser.add_argument(
        '--data_file',
        type=str,
        default=None,
        help="Numpy file containing image features and labels")
    parser.add_argument(
        '--json_targets',
        type=str,
        default=None,
        help="Json file containing json targets")
    parser.add_argument(
        '--targets_data_file',
        type=str,
        default=None,
        help="Numpy file containing image labels")
    parser.add_argument(
        '--costs_list',
        type=str,
        default="0.01,0.1",
        help="comma separated string containing list of costs")
    parser.add_argument(
        '--output_path',
        type=str,
        default=None,
        help="path where trained SVM models are saved")
    parser.add_argument(
        '--generate_json',
        type=int,
        default=0,
        help="Whether to generate json files for output")
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(1)
    opts = parser.parse_args()
    #logger.info(opts)
    test_svm(opts)
 if __name__ == '__main__':
    main()
--- a/benchmarks/svm_tools/test_svm_low_shot.py
+++ b/benchmarks/svm_tools/test_svm_low_shot.py
@ -1,212 +0,0 @@
 # Copyright (c) Facebook, Inc. and its affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the license found in the
 # LICENSE file in the root directory of this source tree.
 #
 ################################################################################
 """
 SVM test for low shot image classification.
 Relevant transfer tasks: Low-shot Image Classification VOC07 and Places205 low
 shot samples.
 """
 from __future__ import division
 from __future__ import absolute_import
 from __future__ import unicode_literals
 from __future__ import print_function
 import argparse
 import json
 import logging
 import numpy as np
 import os
 import pickle
 import six
 import sys
 import svm_helper
 # create the logger
 FORMAT = '[%(levelname)s: %(filename)s: %(lineno)4d]: %(message)s'
 logging.basicConfig(level=logging.INFO, format=FORMAT, stream=sys.stdout)
 logger = logging.getLogger(__name__)
 def load_json(file_path):
    assert os.path.exists(file_path), "{} does not exist".format(file_path)
    with open(file_path, 'r') as fp:
        data = json.load(fp)
    img_ids = list(data.keys())
    cls_names = list(data[img_ids[0]].keys())
    return img_ids, cls_names
 def save_json_predictions(opts, cost, sample_idx, k_low, features, cls_list,
                          cls_names, img_ids):
    num_classes = len(cls_list)
    json_predictions = {}
    for cls in range(num_classes):
        suffix = 'sample{}_k{}'.format(sample_idx + 1, k_low)
        model_file = svm_helper.get_low_shot_output_file(
            opts, cls, cost, suffix)
        with open(model_file, 'rb') as fopen:
            if six.PY2:
                model = pickle.load(fopen)
            else:
                model = pickle.load(fopen, encoding='latin1')
        prediction = model.decision_function(features)
        cls_name = cls_names[cls]
        for idx in range(len(prediction)):
            img_id = img_ids[idx]
            if img_id in json_predictions:
                json_predictions[img_id][cls_name] = prediction[idx]
            else:
                out_lbl = {}
                out_lbl[cls_name] = prediction[idx]
                json_predictions[img_id] = out_lbl
    output_file = os.path.join(opts.output_path,
                               'test_{}_json_preds.json'.format(suffix))
    with open(output_file, 'w') as fp:
        json.dump(json_predictions, fp)
    #logger.info('Saved json predictions to: {}'.format(output_file))
 def test_svm_low_shot(opts):
    k_values = [int(val) for val in opts.k_values.split(",")]
    sample_inds = [int(val) for val in opts.sample_inds.split(",")]
    #logger.info('Testing svm for k-values: {} and sample_inds: {}'.format(
    #    k_values, sample_inds))
    img_ids, cls_names = [], []
    if opts.generate_json:
        img_ids, cls_names = load_json(opts.json_targets)
    assert os.path.exists(opts.data_file), "Data file not found. Abort!"
    # we test the svms on the full test set. Given the test features and the
    # targets, we test it for various k-values (low-shot), cost values and
    # 5 independent samples.
    features, targets = svm_helper.load_input_data(opts.data_file,
                                                   opts.targets_data_file)
    # normalize the features: N x 9216 (example shape)
    features = svm_helper.normalize_features(features)
    # parse the cost values for training the SVM on
    costs_list = svm_helper.parse_cost_list(opts.costs_list)
    #logger.info('Testing SVM for costs: {}'.format(costs_list))
    # classes for which SVM testing should be done
    num_classes, cls_list = svm_helper.get_low_shot_svm_classes(
        targets, opts.dataset)
    # create the output for per sample, per k-value and per cost.
    sample_ap_matrices = []
    for _ in range(len(sample_inds)):
        ap_matrix = np.zeros((len(k_values), len(costs_list)))
        sample_ap_matrices.append(ap_matrix)
    # the test goes like this: For a given sample, for a given k-value and a
    # given cost value, we evaluate the trained svm model for all classes.
    # After computing over all classes, we get the mean AP value over all
    # classes. We hence end up with: output = [sample][k_value][cost]
    for inds in range(len(sample_inds)):
        sample_idx = sample_inds[inds]
        for k_idx in range(len(k_values)):
            k_low = k_values[k_idx]
            suffix = 'sample{}_k{}'.format(sample_idx + 1, k_low)
            for cost_idx in range(len(costs_list)):
                cost = costs_list[cost_idx]
                local_cost_ap = np.zeros((num_classes, 1))
                for cls in cls_list:
                    #logger.info(
                    #    'Test sample/k_value/cost/cls: {}/{}/{}/{}'.format(
                    #        sample_idx + 1, k_low, cost, cls))
                    model_file = svm_helper.get_low_shot_output_file(
                        opts, cls, cost, suffix)
                    with open(model_file, 'rb') as fopen:
                        if six.PY2:
                            model = pickle.load(fopen)
                        else:
                            model = pickle.load(fopen, encoding='latin1')
                    prediction = model.decision_function(features)
                    eval_preds, eval_cls_labels = svm_helper.get_cls_feats_labels(
                        cls, prediction, targets, opts.dataset)
                    P, R, score, ap = svm_helper.get_precision_recall(
                        eval_cls_labels, eval_preds)
                    local_cost_ap[cls][0] = ap
                mean_cost_ap = np.mean(local_cost_ap, axis=0)
                sample_ap_matrices[inds][k_idx][cost_idx] = mean_cost_ap
            out_k_sample_file = os.path.join(
                opts.output_path,
                'test_ap_sample{}_k{}.npy'.format(sample_idx + 1, k_low))
            save_data = sample_ap_matrices[inds][k_idx]
            save_data = save_data.reshape((1, -1))
            np.save(out_k_sample_file, save_data)
            #logger.info('Saved sample test k_idx AP to file: {} {}'.format(
            #    out_k_sample_file, save_data.shape))
            if opts.generate_json:
                argmax_cls = np.argmax(save_data, axis=1)
                chosen_cost = costs_list[argmax_cls[0]]
                #logger.info('chosen cost: {}'.format(chosen_cost))
                save_json_predictions(opts, chosen_cost, sample_idx, k_low,
                                      features, cls_list, cls_names, img_ids)
    #logger.info('All done!!')
 def main():
    parser = argparse.ArgumentParser(description='Low shot SVM model test')
    parser.add_argument(
        '--data_file',
        type=str,
        default=None,
        help="Numpy file containing image features and labels")
    parser.add_argument(
        '--targets_data_file',
        type=str,
        default=None,
        help="Numpy file containing image labels")
    parser.add_argument(
        '--json_targets',
        type=str,
        default=None,
        help="Numpy file containing json targets")
    parser.add_argument(
        '--generate_json',
        type=int,
        default=0,
        help="Whether to generate json files for output")
    parser.add_argument(
        '--costs_list',
        type=str,
        default=
        "0.0000001,0.000001,0.00001,0.0001,0.001,0.01,0.1,1.0,10.0,100.0",
        help="comma separated string containing list of costs")
    parser.add_argument(
        '--output_path',
        type=str,
        default=None,
        help="path where trained SVM models are saved")
    parser.add_argument(
        '--k_values',
        type=str,
        default="1,2,4,8,16,32,64,96",
        help="Low-shot k-values for svm testing. Comma separated")
    parser.add_argument(
        '--sample_inds',
        type=str,
        default="0,1,2,3,4",
        help="sample_inds for which to test svm. Comma separated")
    parser.add_argument(
        '--dataset', type=str, default="voc", help='voc | places')
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(1)
    opts = parser.parse_args()
    #logger.info(opts)
    test_svm_low_shot(opts)
 if __name__ == '__main__':
    main()
--- a/benchmarks/svm_tools/train_svm_kfold.py
+++ b/benchmarks/svm_tools/train_svm_kfold.py
@ -1,162 +0,0 @@
 # Copyright (c) Facebook, Inc. and its affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the license found in the
 # LICENSE file in the root directory of this source tree.
 #
 ################################################################################
 """
 SVM training using 3-fold cross-validation.
 Relevant transfer tasks: Image Classification VOC07 and COCO2014.
 """
 from __future__ import division
 from __future__ import absolute_import
 from __future__ import unicode_literals
 from __future__ import print_function
 import argparse
 import logging
 import numpy as np
 import os
 import pickle
 import sys
 from tqdm import tqdm
 from sklearn.svm import LinearSVC
 from sklearn.model_selection import cross_val_score
 import svm_helper
 import time
 # create the logger
 FORMAT = '[%(levelname)s: %(filename)s: %(lineno)4d]: %(message)s'
 logging.basicConfig(level=logging.INFO, format=FORMAT, stream=sys.stdout)
 logger = logging.getLogger(__name__)
 def train_svm(opts):
    assert os.path.exists(opts.data_file), "Data file not found. Abort!"
    if not os.path.exists(opts.output_path):
        os.makedirs(opts.output_path)
    features, targets = svm_helper.load_input_data(opts.data_file,
                                                   opts.targets_data_file)
    # normalize the features: N x 9216 (example shape)
    features = svm_helper.normalize_features(features)
    # parse the cost values for training the SVM on
    costs_list = svm_helper.parse_cost_list(opts.costs_list)
    #logger.info('Training SVM for costs: {}'.format(costs_list))
    # classes for which SVM training should be done
    if opts.cls_list:
        cls_list = [int(cls) for cls in opts.cls_list.split(",")]
    else:
        num_classes = targets.shape[1]
        cls_list = range(num_classes)
    #logger.info('Training SVM for classes: {}'.format(cls_list))
    for cls_idx in tqdm(range(len(cls_list))):
        cls = cls_list[cls_idx]
        for cost_idx in range(len(costs_list)):
            start = time.time()
            cost = costs_list[cost_idx]
            out_file, ap_out_file = svm_helper.get_svm_train_output_files(
                cls, cost, opts.output_path)
            if os.path.exists(out_file) and os.path.exists(ap_out_file):
                logger.info('SVM model exists: {}'.format(out_file))
                logger.info('AP file exists: {}'.format(ap_out_file))
            else:
                #logger.info('Training model with the cost: {}'.format(cost))
                clf = LinearSVC(
                    C=cost,
                    class_weight={
                        1: 2,
                        -1: 1
                    },
                    intercept_scaling=1.0,
                    verbose=0,
                    penalty='l2',
                    loss='squared_hinge',
                    tol=0.0001,
                    dual=True,
                    max_iter=2000,
                )
                cls_labels = targets[:, cls].astype(dtype=np.int32, copy=True)
                # meaning of labels in VOC/COCO original loaded target files:
                # label 0 = not present, set it to -1 as svm train target
                # label 1 = present. Make the svm train target labels as -1, 1.
                cls_labels[np.where(cls_labels == 0)] = -1
                #num_positives = len(np.where(cls_labels == 1)[0])
                #num_negatives = len(cls_labels) - num_positives
                #logger.info('cls: {} has +ve: {} -ve: {} ratio: {}'.format(
                #    cls, num_positives, num_negatives,
                #    float(num_positives) / num_negatives)
                #)
                #logger.info('features: {} cls_labels: {}'.format(
                #    features.shape, cls_labels.shape))
                ap_scores = cross_val_score(
                    clf,
                    features,
                    cls_labels,
                    cv=3,
                    scoring='average_precision')
                clf.fit(features, cls_labels)
                #logger.info('cls: {} cost: {} AP: {} mean:{}'.format(
                #    cls, cost, ap_scores, ap_scores.mean()))
                #logger.info('Saving cls cost AP to: {}'.format(ap_out_file))
                np.save(ap_out_file, np.array([ap_scores.mean()]))
                #logger.info('Saving SVM model to: {}'.format(out_file))
                with open(out_file, 'wb') as fwrite:
                    pickle.dump(clf, fwrite)
            print("time: {:.4g} s".format(time.time() - start))
 def main():
    parser = argparse.ArgumentParser(description='SVM model training')
    parser.add_argument(
        '--data_file',
        type=str,
        default=None,
        help="Numpy file containing image features")
    parser.add_argument(
        '--targets_data_file',
        type=str,
        default=None,
        help="Numpy file containing image labels")
    parser.add_argument(
        '--output_path',
        type=str,
        default=None,
        help="path where to save the trained SVM models")
    parser.add_argument(
        '--costs_list',
        type=str,
        default="0.01,0.1",
        help="comma separated string containing list of costs")
    parser.add_argument(
        '--random_seed',
        type=int,
        default=100,
        help="random seed for SVM classifier training")
    parser.add_argument(
        '--cls_list',
        type=str,
        default=None,
        help="comma separated string list of classes to train")
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(1)
    opts = parser.parse_args()
    #logger.info(opts)
    train_svm(opts)
 if __name__ == '__main__':
    main()
--- a/benchmarks/svm_tools/train_svm_kfold_parallel.py
+++ b/benchmarks/svm_tools/train_svm_kfold_parallel.py
@ -1,151 +0,0 @@
 # Copyright (c) Facebook, Inc. and its affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the license found in the
 # LICENSE file in the root directory of this source tree.
 #
 ################################################################################
 """
 SVM training using 3-fold cross-validation.
 Relevant transfer tasks: Image Classification VOC07 and COCO2014.
 """
 from __future__ import division
 from __future__ import absolute_import
 from __future__ import unicode_literals
 from __future__ import print_function
 import multiprocessing as mp
 import tqdm
 import argparse
 import logging
 import numpy as np
 import os
 import pickle
 import sys
 from sklearn.svm import LinearSVC
 from sklearn.model_selection import cross_val_score
 import svm_helper
 import pdb
 def task(cls, cost, opts, features, targets):
    out_file, ap_out_file = svm_helper.get_svm_train_output_files(
        cls, cost, opts.output_path)
    if not (os.path.exists(out_file) and os.path.exists(ap_out_file)):
        clf = LinearSVC(
            C=cost,
            class_weight={
                1: 2,
                -1: 1
            },
            intercept_scaling=1.0,
            verbose=0,
            penalty='l2',
            loss='squared_hinge',
            tol=0.0001,
            dual=True,
            max_iter=2000,
        )
        cls_labels = targets[:, cls].astype(dtype=np.int32, copy=True)
        cls_labels[np.where(cls_labels == 0)] = -1
        ap_scores = cross_val_score(
            clf, features, cls_labels, cv=3, scoring='average_precision')
        clf.fit(features, cls_labels)
        np.save(ap_out_file, np.array([ap_scores.mean()]))
        with open(out_file, 'wb') as fwrite:
            pickle.dump(clf, fwrite)
    return 0
 def mp_helper(args):
    return task(*args)
 def train_svm(opts):
    assert os.path.exists(opts.data_file), "Data file not found. Abort!"
    if not os.path.exists(opts.output_path):
        os.makedirs(opts.output_path)
    features, targets = svm_helper.load_input_data(opts.data_file,
                                                   opts.targets_data_file)
    # normalize the features: N x 9216 (example shape)
    features = svm_helper.normalize_features(features)
    # parse the cost values for training the SVM on
    costs_list = svm_helper.parse_cost_list(opts.costs_list)
    # classes for which SVM training should be done
    if opts.cls_list:
        cls_list = [int(cls) for cls in opts.cls_list.split(",")]
    else:
        num_classes = targets.shape[1]
        cls_list = range(num_classes)
    num_task = len(cls_list) * len(costs_list)
    args_cls = []
    args_cost = []
    for cls in cls_list:
        for cost in costs_list:
            args_cls.append(cls)
            args_cost.append(cost)
    args_opts = [opts] * num_task
    args_features = [features] * num_task
    args_targets = [targets] * num_task
    pool = mp.Pool(mp.cpu_count())
    for _ in tqdm.tqdm(
            pool.imap_unordered(
                mp_helper,
                zip(args_cls, args_cost, args_opts, args_features,
                    args_targets)),
            total=num_task):
        pass
 def main():
    parser = argparse.ArgumentParser(description='SVM model training')
    parser.add_argument(
        '--data_file',
        type=str,
        default=None,
        help="Numpy file containing image features")
    parser.add_argument(
        '--targets_data_file',
        type=str,
        default=None,
        help="Numpy file containing image labels")
    parser.add_argument(
        '--output_path',
        type=str,
        default=None,
        help="path where to save the trained SVM models")
    parser.add_argument(
        '--costs_list',
        type=str,
        default="0.01,0.1",
        help="comma separated string containing list of costs")
    parser.add_argument(
        '--random_seed',
        type=int,
        default=100,
        help="random seed for SVM classifier training")
    parser.add_argument(
        '--cls_list',
        type=str,
        default=None,
        help="comma separated string list of classes to train")
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(1)
    opts = parser.parse_args()
    train_svm(opts)
 if __name__ == '__main__':
    main()
--- a/benchmarks/svm_tools/train_svm_low_shot.py
+++ b/benchmarks/svm_tools/train_svm_low_shot.py
@ -1,144 +0,0 @@
 # Copyright (c) Facebook, Inc. and its affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the license found in the
 # LICENSE file in the root directory of this source tree.
 #
 ################################################################################
 """
 Low Shot SVM training.
 Relevant transfer tasks: Low-shot Image Classification VOC07 and Places205 low
 shot samples.
 """
 from __future__ import division
 from __future__ import absolute_import
 from __future__ import unicode_literals
 from __future__ import print_function
 import argparse
 import logging
 import numpy as np
 import os
 import pickle
 import sys
 from sklearn.svm import LinearSVC
 from tqdm import tqdm
 import svm_helper
 import time
 # create the logger
 FORMAT = '[%(levelname)s: %(filename)s: %(lineno)4d]: %(message)s'
 logging.basicConfig(level=logging.INFO, format=FORMAT, stream=sys.stdout)
 logger = logging.getLogger(__name__)
 def train_svm_low_shot(opts):
    assert os.path.exists(opts.data_file), "Data file not found. Abort!"
    if not os.path.exists(opts.output_path):
        os.makedirs(opts.output_path)
    features, targets = svm_helper.load_input_data(opts.data_file,
                                                   opts.targets_data_file)
    # normalize the features: N x 9216 (example shape)
    features = svm_helper.normalize_features(features)
    # parse the cost values for training the SVM on
    costs_list = svm_helper.parse_cost_list(opts.costs_list)
    #logger.info('Training SVM for costs: {}'.format(costs_list))
    # classes for which SVM testing should be done
    num_classes, cls_list = svm_helper.get_low_shot_svm_classes(
        targets, opts.dataset)
    for cls in tqdm(cls_list):
        for cost_idx in range(len(costs_list)):
            start = time.time()
            cost = costs_list[cost_idx]
            suffix = '_'.join(
                opts.targets_data_file.split('/')[-1].split('.')[0].split('_')
                [-2:])
            out_file = svm_helper.get_low_shot_output_file(
                opts, cls, cost, suffix)
            if os.path.exists(out_file):
                logger.info('SVM model exists: {}'.format(out_file))
            else:
                #logger.info('SVM model not found: {}'.format(out_file))
                #logger.info('Training model with the cost: {}'.format(cost))
                clf = LinearSVC(
                    C=cost,
                    class_weight={
                        1: 2,
                        -1: 1
                    },
                    intercept_scaling=1.0,
                    verbose=0,
                    penalty='l2',
                    loss='squared_hinge',
                    tol=0.0001,
                    dual=True,
                    max_iter=2000,
                )
                train_feats, train_cls_labels = svm_helper.get_cls_feats_labels(
                    cls, features, targets, opts.dataset)
                #num_positives = len(np.where(train_cls_labels == 1)[0])
                #num_negatives = len(np.where(train_cls_labels == -1)[0])
                #logger.info('cls: {} has +ve: {} -ve: {} ratio: {}'.format(
                #    cls, num_positives, num_negatives,
                #    float(num_positives) / num_negatives)
                #)
                #logger.info('features: {} cls_labels: {}'.format(
                #    train_feats.shape, train_cls_labels.shape))
                clf.fit(train_feats, train_cls_labels)
                #logger.info('Saving SVM model to: {}'.format(out_file))
                with open(out_file, 'wb') as fwrite:
                    pickle.dump(clf, fwrite)
            #print("time: {:.4g} s".format(time.time() - start))
    #logger.info('All done!')
 def main():
    parser = argparse.ArgumentParser(description='Low-shot SVM model training')
    parser.add_argument(
        '--data_file',
        type=str,
        default=None,
        help="Numpy file containing image features")
    parser.add_argument(
        '--targets_data_file',
        type=str,
        default=None,
        help="Numpy file containing image labels")
    parser.add_argument(
        '--costs_list',
        type=str,
        default="0.01,0.1",
        help="comma separated string containing list of costs")
    parser.add_argument(
        '--output_path',
        type=str,
        default=None,
        help="path where to save the trained SVM models")
    parser.add_argument(
        '--random_seed',
        type=int,
        default=100,
        help="random seed for SVM classifier training")
    parser.add_argument(
        '--dataset', type=str, default="voc", help='voc | places')
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(1)
    opts = parser.parse_args()
    #logger.info(opts)
    train_svm_low_shot(opts)
 if __name__ == '__main__':
    main()
--- a/benchmarks/svm_tools/train_svm_low_shot_parallel.py
+++ b/benchmarks/svm_tools/train_svm_low_shot_parallel.py
@ -1,145 +0,0 @@
 # Copyright (c) Facebook, Inc. and its affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the license found in the
 # LICENSE file in the root directory of this source tree.
 #
 ################################################################################
 """
 Low Shot SVM training.
 Relevant transfer tasks: Low-shot Image Classification VOC07 and Places205 low
 shot samples.
 """
 from __future__ import division
 from __future__ import absolute_import
 from __future__ import unicode_literals
 from __future__ import print_function
 import multiprocessing as mp
 import tqdm
 import argparse
 import logging
 import numpy as np
 import os
 import pickle
 import sys
 from sklearn.svm import LinearSVC
 import svm_helper
 import pdb
 def task(cls, cost, opts, features, targets):
    suffix = '_'.join(
        opts.targets_data_file.split('/')[-1].split('.')[0].split('_')[-2:])
    out_file = svm_helper.get_low_shot_output_file(opts, cls, cost, suffix)
    if not os.path.exists(out_file):
        clf = LinearSVC(
            C=cost,
            class_weight={
                1: 2,
                -1: 1
            },
            intercept_scaling=1.0,
            verbose=0,
            penalty='l2',
            loss='squared_hinge',
            tol=0.0001,
            dual=True,
            max_iter=2000,
        )
        train_feats, train_cls_labels = svm_helper.get_cls_feats_labels(
            cls, features, targets, opts.dataset)
        clf.fit(train_feats, train_cls_labels)
        #cls_labels = targets[:, cls].astype(dtype=np.int32, copy=True)
        #cls_labels[np.where(cls_labels == 0)] = -1
        #clf.fit(features, cls_labels)
        with open(out_file, 'wb') as fwrite:
            pickle.dump(clf, fwrite)
    return 0
 def mp_helper(args):
    return task(*args)
 def train_svm_low_shot(opts):
    assert os.path.exists(opts.data_file), "Data file not found. Abort!"
    if not os.path.exists(opts.output_path):
        os.makedirs(opts.output_path)
    features, targets = svm_helper.load_input_data(opts.data_file,
                                                   opts.targets_data_file)
    # normalize the features: N x 9216 (example shape)
    features = svm_helper.normalize_features(features)
    # parse the cost values for training the SVM on
    costs_list = svm_helper.parse_cost_list(opts.costs_list)
    # classes for which SVM testing should be done
    num_classes, cls_list = svm_helper.get_low_shot_svm_classes(
        targets, opts.dataset)
    num_task = len(cls_list) * len(costs_list)
    args_cls = []
    args_cost = []
    for cls in cls_list:
        for cost in costs_list:
            args_cls.append(cls)
            args_cost.append(cost)
    args_opts = [opts] * num_task
    args_features = [features] * num_task
    args_targets = [targets] * num_task
    pool = mp.Pool(mp.cpu_count())
    for _ in tqdm.tqdm(
            pool.imap_unordered(
                mp_helper,
                zip(args_cls, args_cost, args_opts, args_features,
                    args_targets)),
            total=num_task):
        pass
 def main():
    parser = argparse.ArgumentParser(description='Low-shot SVM model training')
    parser.add_argument(
        '--data_file',
        type=str,
        default=None,
        help="Numpy file containing image features")
    parser.add_argument(
        '--targets_data_file',
        type=str,
        default=None,
        help="Numpy file containing image labels")
    parser.add_argument(
        '--costs_list',
        type=str,
        default="0.01,0.1",
        help="comma separated string containing list of costs")
    parser.add_argument(
        '--output_path',
        type=str,
        default=None,
        help="path where to save the trained SVM models")
    parser.add_argument(
        '--random_seed',
        type=int,
        default=100,
        help="random seed for SVM classifier training")
    parser.add_argument(
        '--dataset', type=str, default="voc", help='voc | places')
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(1)
    opts = parser.parse_args()
    train_svm_low_shot(opts)
 if __name__ == '__main__':
    main()
--- a/configs/benchmarks/linear_classification/imagenet/r50_last.py
+++ b/configs/benchmarks/linear_classification/imagenet/r50_last.py
@ -1,76 +0,0 @@
 _base_ = '../../../base.py'
 # model settings
 model = dict(
    type='Classification',
    pretrained=None,
    with_sobel=False,
    backbone=dict(
        type='ResNet',
        depth=50,
        in_channels=3,
        out_indices=[4],  # 0: conv-1, x: stage-x
        norm_cfg=dict(type='BN'),
        frozen_stages=4),
    head=dict(
        type='ClsHead', with_avg_pool=True, in_channels=2048,
        num_classes=1000))
 # dataset settings
 data_source_cfg = dict(
    type='ImageNet',
    memcached=True,
    mclient_path='/mnt/lustre/share/memcached_client')
 data_train_list = 'data/imagenet/meta/train_labeled.txt'
 data_train_root = 'data/imagenet/train'
 data_test_list = 'data/imagenet/meta/val_labeled.txt'
 data_test_root = 'data/imagenet/val'
 dataset_type = 'ClassificationDataset'
 img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 train_pipeline = [
    dict(type='RandomResizedCrop', size=224),
    dict(type='RandomHorizontalFlip'),
 ]
 test_pipeline = [
    dict(type='Resize', size=256),
    dict(type='CenterCrop', size=224),
 ]
 # prefetch
 prefetch = False
 if not prefetch:
    train_pipeline.extend([dict(type='ToTensor'), dict(type='Normalize', **img_norm_cfg)])
    test_pipeline.extend([dict(type='ToTensor'), dict(type='Normalize', **img_norm_cfg)])
 data = dict(
    imgs_per_gpu=32,  # total 32*8=256, 8GPU linear cls
    workers_per_gpu=5,
    train=dict(
        type=dataset_type,
        data_source=dict(
            list_file=data_train_list, root=data_train_root,
            **data_source_cfg),
        pipeline=train_pipeline,
        prefetch=prefetch),
    val=dict(
        type=dataset_type,
        data_source=dict(
            list_file=data_test_list, root=data_test_root, **data_source_cfg),
        pipeline=test_pipeline,
        prefetch=prefetch))
 # additional hooks
 custom_hooks = [
    dict(
        type='ValidateHook',
        dataset=data['val'],
        initial=True,
        interval=1,
        imgs_per_gpu=128,
        workers_per_gpu=4,
        prefetch=prefetch,
        img_norm_cfg=img_norm_cfg,
        eval_param=dict(topk=(1, 5)))
 ]
 # optimizer
 optimizer = dict(type='SGD', lr=30., momentum=0.9, weight_decay=0.)
 # learning policy
 lr_config = dict(policy='step', step=[60, 80])
 checkpoint_config = dict(interval=10)
 # runtime settings
 total_epochs = 100
--- a/configs/benchmarks/linear_classification/imagenet/r50_last_sobel.py
+++ b/configs/benchmarks/linear_classification/imagenet/r50_last_sobel.py
@ -1,76 +0,0 @@
 _base_ = '../../../base.py'
 # model settings
 model = dict(
    type='Classification',
    pretrained=None,
    with_sobel=True,
    backbone=dict(
        type='ResNet',
        depth=50,
        in_channels=2,
        out_indices=[4],  # 0: conv-1, x: stage-x
        norm_cfg=dict(type='BN'),
        frozen_stages=4),
    head=dict(
        type='ClsHead', with_avg_pool=True, in_channels=2048,
        num_classes=1000))
 # dataset settings
 data_source_cfg = dict(
    type='ImageNet',
    memcached=True,
    mclient_path='/mnt/lustre/share/memcached_client')
 data_train_list = 'data/imagenet/meta/train_labeled.txt'
 data_train_root = 'data/imagenet/train'
 data_test_list = 'data/imagenet/meta/val_labeled.txt'
 data_test_root = 'data/imagenet/val'
 dataset_type = 'ClassificationDataset'
 img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 train_pipeline = [
    dict(type='RandomResizedCrop', size=224),
    dict(type='RandomHorizontalFlip'),
 ]
 test_pipeline = [
    dict(type='Resize', size=256),
    dict(type='CenterCrop', size=224),
 ]
 # prefetch
 prefetch = False
 if not prefetch:
    train_pipeline.extend([dict(type='ToTensor'), dict(type='Normalize', **img_norm_cfg)])
    test_pipeline.extend([dict(type='ToTensor'), dict(type='Normalize', **img_norm_cfg)])
 data = dict(
    imgs_per_gpu=32,  # total 32*8=256, 8GPU linear cls
    workers_per_gpu=5,
    train=dict(
        type=dataset_type,
        data_source=dict(
            list_file=data_train_list, root=data_train_root,
            **data_source_cfg),
        pipeline=train_pipeline,
        prefetch=prefetch),
    val=dict(
        type=dataset_type,
        data_source=dict(
            list_file=data_test_list, root=data_test_root, **data_source_cfg),
        pipeline=test_pipeline,
        prefetch=prefetch))
 # additional hooks
 custom_hooks = [
    dict(
        type='ValidateHook',
        dataset=data['val'],
        initial=True,
        interval=1,
        imgs_per_gpu=128,
        workers_per_gpu=4,
        prefetch=prefetch,
        img_norm_cfg=img_norm_cfg,
        eval_param=dict(topk=(1, 5)))
 ]
 # optimizer
 optimizer = dict(type='SGD', lr=30., momentum=0.9, weight_decay=0.)
 # learning policy
 lr_config = dict(policy='step', step=[60, 80])
 checkpoint_config = dict(interval=10)
 # runtime settings
 total_epochs = 100
--- a/configs/benchmarks/linear_classification/imagenet/r50_multihead.py
+++ b/configs/benchmarks/linear_classification/imagenet/r50_multihead.py
@ -1,89 +0,0 @@
 _base_ = '../../../base.py'
 # model settings
 model = dict(
    type='Classification',
    pretrained=None,
    with_sobel=False,
    backbone=dict(
        type='ResNet',
        depth=50,
        in_channels=3,
        out_indices=[0, 1, 2, 3, 4],  # 0: conv-1, x: stage-x
        norm_cfg=dict(type='BN'),
        frozen_stages=4),
    head=dict(
        type='MultiClsHead',
        pool_type='specified',
        in_indices=[0, 1, 2, 3, 4],
        with_last_layer_unpool=False,
        backbone='resnet50',
        norm_cfg=dict(type='SyncBN', momentum=0.1, affine=False),
        num_classes=1000))
 # dataset settings
 data_source_cfg = dict(
    type='ImageNet',
    memcached=True,
    mclient_path='/mnt/lustre/share/memcached_client')
 data_train_list = 'data/imagenet/meta/train_labeled.txt'
 data_train_root = 'data/imagenet/train'
 data_test_list = 'data/imagenet/meta/val_labeled.txt'
 data_test_root = 'data/imagenet/val'
 dataset_type = 'ClassificationDataset'
 img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 train_pipeline = [
    dict(type='RandomResizedCrop', size=224),
    dict(type='RandomHorizontalFlip'),
    dict(
        type='ColorJitter',
        brightness=0.4,
        contrast=0.4,
        saturation=0.4,
        hue=0.),
    dict(type='ToTensor'),
    dict(type='Lighting'),
    dict(type='Normalize', **img_norm_cfg),
 ]
 test_pipeline = [
    dict(type='Resize', size=256),
    dict(type='CenterCrop', size=224),
    dict(type='ToTensor'),
    dict(type='Normalize', **img_norm_cfg),
 ]
 data = dict(
    imgs_per_gpu=32,  # total 32x8=256
    workers_per_gpu=5,
    train=dict(
        type=dataset_type,
        data_source=dict(
            list_file=data_train_list, root=data_train_root,
            **data_source_cfg),
        pipeline=train_pipeline),
    val=dict(
        type=dataset_type,
        data_source=dict(
            list_file=data_test_list, root=data_test_root, **data_source_cfg),
        pipeline=test_pipeline))
 # additional hooks
 custom_hooks = [
    dict(
        type='ValidateHook',
        dataset=data['val'],
        initial=True,
        interval=10,
        imgs_per_gpu=128,
        workers_per_gpu=4,
        eval_param=dict(topk=(1, )))
 ]
 # optimizer
 optimizer = dict(
    type='SGD',
    lr=0.01,
    momentum=0.9,
    weight_decay=0.0001,
    paramwise_options=dict(norm_decay_mult=0.),
    nesterov=True)
 # learning policy
 lr_config = dict(policy='step', step=[30, 60, 90])
 checkpoint_config = dict(interval=10)
 # runtime settings
 total_epochs = 90
--- a/configs/benchmarks/linear_classification/imagenet/r50_multihead_sobel.py
+++ b/configs/benchmarks/linear_classification/imagenet/r50_multihead_sobel.py
@ -1,89 +0,0 @@
 _base_ = '../../../base.py'
 # model settings
 model = dict(
    type='Classification',
    pretrained=None,
    with_sobel=True,
    backbone=dict(
        type='ResNet',
        depth=50,
        in_channels=2,
        out_indices=[0, 1, 2, 3, 4],  # 0: conv-1, x: stage-x
        norm_cfg=dict(type='BN'),
        frozen_stages=4),
    head=dict(
        type='MultiClsHead',
        pool_type='specified',
        in_indices=[0, 1, 2, 3, 4],
        with_last_layer_unpool=False,
        backbone='resnet50',
        norm_cfg=dict(type='SyncBN', momentum=0.1, affine=False),
        num_classes=1000))
 # dataset settings
 data_source_cfg = dict(
    type='ImageNet',
    memcached=True,
    mclient_path='/mnt/lustre/share/memcached_client')
 data_train_list = 'data/imagenet/meta/train_labeled.txt'
 data_train_root = 'data/imagenet/train'
 data_test_list = 'data/imagenet/meta/val_labeled.txt'
 data_test_root = 'data/imagenet/val'
 dataset_type = 'ClassificationDataset'
 img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 train_pipeline = [
    dict(type='RandomResizedCrop', size=224),
    dict(type='RandomHorizontalFlip'),
    dict(
        type='ColorJitter',
        brightness=0.4,
        contrast=0.4,
        saturation=0.4,
        hue=0.),
    dict(type='ToTensor'),
    dict(type='Lighting'),
    dict(type='Normalize', **img_norm_cfg),
 ]
 test_pipeline = [
    dict(type='Resize', size=256),
    dict(type='CenterCrop', size=224),
    dict(type='ToTensor'),
    dict(type='Normalize', **img_norm_cfg),
 ]
 data = dict(
    imgs_per_gpu=32,  # total 32x8=256
    workers_per_gpu=5,
    train=dict(
        type=dataset_type,
        data_source=dict(
            list_file=data_train_list, root=data_train_root,
            **data_source_cfg),
        pipeline=train_pipeline),
    val=dict(
        type=dataset_type,
        data_source=dict(
            list_file=data_test_list, root=data_test_root, **data_source_cfg),
        pipeline=test_pipeline))
 # additional hooks
 custom_hooks = [
    dict(
        type='ValidateHook',
        dataset=data['val'],
        initial=True,
        interval=10,
        imgs_per_gpu=128,
        workers_per_gpu=4,
        eval_param=dict(topk=(1, )))
 ]
 # optimizer
 optimizer = dict(
    type='SGD',
    lr=0.01,
    momentum=0.9,
    weight_decay=0.0001,
    paramwise_options=dict(norm_decay_mult=0.),
    nesterov=True)
 # learning policy
 lr_config = dict(policy='step', step=[30, 60, 90])
 checkpoint_config = dict(interval=10)
 # runtime settings
 total_epochs = 90
--- a/configs/benchmarks/linear_classification/places205/r50_multihead.py
+++ b/configs/benchmarks/linear_classification/places205/r50_multihead.py
@ -1,89 +0,0 @@
 _base_ = '../../../base.py'
 # model settings
 model = dict(
    type='Classification',
    pretrained=None,
    with_sobel=False,
    backbone=dict(
        type='ResNet',
        depth=50,
        in_channels=3,
        out_indices=[0, 1, 2, 3, 4],  # 0: conv-1, x: stage-x
        norm_cfg=dict(type='BN'),
        frozen_stages=4),
    head=dict(
        type='MultiClsHead',
        pool_type='specified',
        in_indices=[0, 1, 2, 3, 4],
        with_last_layer_unpool=False,
        backbone='resnet50',
        norm_cfg=dict(type='SyncBN', momentum=0.1, affine=False),
        num_classes=205))
 # dataset settings
 data_source_cfg = dict(
    type='Places205',
    memcached=True,
    mclient_path='/mnt/lustre/share/memcached_client')
 data_train_list = 'data/places205/meta/train_labeled.txt'
 data_train_root = 'data/places205/train'
 data_test_list = 'data/places205/meta/val_labeled.txt'
 data_test_root = 'data/places205/val'
 dataset_type = 'ClassificationDataset'
 img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 train_pipeline = [
    dict(type='Resize', size=256),
    dict(type='CenterCrop', size=256),
    dict(type='RandomCrop', size=224),
    dict(type='RandomHorizontalFlip'),
 ]
 test_pipeline = [
    dict(type='Resize', size=256),
    dict(type='CenterCrop', size=224),
 ]
 # prefetch
 prefetch = False
 if not prefetch:
    train_pipeline.extend([dict(type='ToTensor'), dict(type='Normalize', **img_norm_cfg)])
    test_pipeline.extend([dict(type='ToTensor'), dict(type='Normalize', **img_norm_cfg)])
 data = dict(
    imgs_per_gpu=32,  # total 32x8=256
    workers_per_gpu=4,
    train=dict(
        type=dataset_type,
        data_source=dict(
            list_file=data_train_list, root=data_train_root,
            **data_source_cfg),
        pipeline=train_pipeline,
        prefetch=prefetch),
    val=dict(
        type=dataset_type,
        data_source=dict(
            list_file=data_test_list, root=data_test_root, **data_source_cfg),
        pipeline=test_pipeline,
        prefetch=prefetch))
 # additional hooks
 custom_hooks = [
    dict(
        type='ValidateHook',
        dataset=data['val'],
        initial=True,
        interval=10,
        imgs_per_gpu=32,
        workers_per_gpu=4,
        prefetch=prefetch,
        img_norm_cfg=img_norm_cfg,
        eval_param=dict(topk=(1, )))
 ]
 # optimizer
 optimizer = dict(
    type='SGD',
    lr=0.01,
    momentum=0.9,
    weight_decay=0.0001,
    paramwise_options=dict(norm_decay_mult=0.),
    nesterov=True)
 # learning policy
 lr_config = dict(policy='step', step=[7, 14, 21])
 checkpoint_config = dict(interval=10)
 # runtime settings
 total_epochs = 28
--- a/configs/benchmarks/linear_classification/places205/r50_multihead_sobel.py
+++ b/configs/benchmarks/linear_classification/places205/r50_multihead_sobel.py
@ -1,89 +0,0 @@
 _base_ = '../../../base.py'
 # model settings
 model = dict(
    type='Classification',
    pretrained=None,
    with_sobel=True,
    backbone=dict(
        type='ResNet',
        depth=50,
        in_channels=2,
        out_indices=[0, 1, 2, 3, 4],  # 0: conv-1, x: stage-x
        norm_cfg=dict(type='BN'),
        frozen_stages=4),
    head=dict(
        type='MultiClsHead',
        pool_type='specified',
        in_indices=[0, 1, 2, 3, 4],
        with_last_layer_unpool=False,
        backbone='resnet50',
        norm_cfg=dict(type='SyncBN', momentum=0.1, affine=False),
        num_classes=205))
 # dataset settings
 data_source_cfg = dict(
    type='Places205',
    memcached=True,
    mclient_path='/mnt/lustre/share/memcached_client')
 data_train_list = 'data/places205/meta/train_labeled.txt'
 data_train_root = 'data/places205/train'
 data_test_list = 'data/places205/meta/val_labeled.txt'
 data_test_root = 'data/places205/val'
 dataset_type = 'ClassificationDataset'
 img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 train_pipeline = [
    dict(type='Resize', size=256),
    dict(type='CenterCrop', size=256),
    dict(type='RandomCrop', size=224),
    dict(type='RandomHorizontalFlip'),
 ]
 test_pipeline = [
    dict(type='Resize', size=256),
    dict(type='CenterCrop', size=224),
 ]
 # prefetch
 prefetch = False
 if not prefetch:
    train_pipeline.extend([dict(type='ToTensor'), dict(type='Normalize', **img_norm_cfg)])
    test_pipeline.extend([dict(type='ToTensor'), dict(type='Normalize', **img_norm_cfg)])
 data = dict(
    imgs_per_gpu=32,  # total 32x8=256
    workers_per_gpu=4,
    train=dict(
        type=dataset_type,
        data_source=dict(
            list_file=data_train_list, root=data_train_root,
            **data_source_cfg),
        pipeline=train_pipeline,
        prefetch=prefetch),
    val=dict(
        type=dataset_type,
        data_source=dict(
            list_file=data_test_list, root=data_test_root, **data_source_cfg),
        pipeline=test_pipeline,
        prefetch=prefetch))
 # additional hooks
 custom_hooks = [
    dict(
        type='ValidateHook',
        dataset=data['val'],
        initial=True,
        interval=10,
        imgs_per_gpu=32,
        workers_per_gpu=4,
        prefetch=prefetch,
        img_norm_cfg=img_norm_cfg,
        eval_param=dict(topk=(1, )))
 ]
 # optimizer
 optimizer = dict(
    type='SGD',
    lr=0.01,
    momentum=0.9,
    weight_decay=0.0001,
    paramwise_options=dict(norm_decay_mult=0.),
    nesterov=True)
 # learning policy
 lr_config = dict(policy='step', step=[7, 14, 21])
 checkpoint_config = dict(interval=10)
 # runtime settings
 total_epochs = 28
--- a/configs/benchmarks/semi_classification/imagenet_10percent/base.py
+++ b/configs/benchmarks/semi_classification/imagenet_10percent/base.py
@ -1,66 +0,0 @@
 _base_ = '../../../base.py'
 # model settings
 model = dict(
    type='Classification',
    pretrained=None,
    backbone=dict(
        type='ResNet',
        depth=50,
        out_indices=[4],  # 0: conv-1, x: stage-x
        norm_cfg=dict(type='SyncBN')),
    head=dict(
        type='ClsHead', with_avg_pool=True, in_channels=2048,
        num_classes=1000))
 # dataset settings
 data_source_cfg = dict(
    type='ImageNet',
    memcached=True,
    mclient_path='/mnt/lustre/share/memcached_client')
 data_train_list = 'data/imagenet/meta/train_labeled_10percent.txt'
 data_train_root = 'data/imagenet/train'
 data_test_list = 'data/imagenet/meta/val_labeled.txt'
 data_test_root = 'data/imagenet/val'
 dataset_type = 'ClassificationDataset'
 img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 train_pipeline = [
    dict(type='RandomResizedCrop', size=224),
    dict(type='RandomHorizontalFlip'),
    dict(type='ToTensor'),
    dict(type='Normalize', **img_norm_cfg),
 ]
 test_pipeline = [
    dict(type='Resize', size=256),
    dict(type='CenterCrop', size=224),
    dict(type='ToTensor'),
    dict(type='Normalize', **img_norm_cfg),
 ]
 data = dict(
    imgs_per_gpu=64,  # total 256
    workers_per_gpu=2,
    train=dict(
        type=dataset_type,
        data_source=dict(
            list_file=data_train_list, root=data_train_root,
            **data_source_cfg),
        pipeline=train_pipeline),
    val=dict(
        type=dataset_type,
        data_source=dict(
            list_file=data_test_list, root=data_test_root, **data_source_cfg),
        pipeline=test_pipeline))
 # additional hooks
 custom_hooks = [
    dict(
        type='ValidateHook',
        dataset=data['val'],
        initial=False,
        interval=20,
        imgs_per_gpu=32,
        workers_per_gpu=2,
        eval_param=dict(topk=(1, 5)))
 ]
 # learning policy
 lr_config = dict(policy='step', step=[12, 16], gamma=0.2)
 checkpoint_config = dict(interval=20)
 # runtime settings
 total_epochs = 20
--- a/configs/benchmarks/semi_classification/imagenet_10percent/r50_lr0_001_head1.py
+++ b/configs/benchmarks/semi_classification/imagenet_10percent/r50_lr0_001_head1.py
@ -1,4 +0,0 @@
 _base_ = 'base.py'
 # optimizer
 optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0001,
                 paramwise_options={'\Ahead.': dict(lr_mult=1)})
--- a/configs/benchmarks/semi_classification/imagenet_10percent/r50_lr0_001_head10.py
+++ b/configs/benchmarks/semi_classification/imagenet_10percent/r50_lr0_001_head10.py
@ -1,4 +0,0 @@
 _base_ = 'base.py'
 # optimizer
 optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0001,
                 paramwise_options={'\Ahead.': dict(lr_mult=10)})
--- a/configs/benchmarks/semi_classification/imagenet_10percent/r50_lr0_001_head100.py
+++ b/configs/benchmarks/semi_classification/imagenet_10percent/r50_lr0_001_head100.py
@ -1,4 +0,0 @@
 _base_ = 'base.py'
 # optimizer
 optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0001,
                 paramwise_options={'\Ahead.': dict(lr_mult=100)})
--- a/configs/benchmarks/semi_classification/imagenet_10percent/r50_lr0_01_head1.py
+++ b/configs/benchmarks/semi_classification/imagenet_10percent/r50_lr0_01_head1.py
@ -1,4 +0,0 @@
 _base_ = 'base.py'
 # optimizer
 optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001,
                 paramwise_options={'\Ahead.': dict(lr_mult=1)})
--- a/configs/benchmarks/semi_classification/imagenet_10percent/r50_lr0_01_head10.py
+++ b/configs/benchmarks/semi_classification/imagenet_10percent/r50_lr0_01_head10.py
@ -1,4 +0,0 @@
 _base_ = 'base.py'
 # optimizer
 optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001,
                 paramwise_options={'\Ahead.': dict(lr_mult=10)})
--- a/configs/benchmarks/semi_classification/imagenet_10percent/r50_lr0_01_head100.py
+++ b/configs/benchmarks/semi_classification/imagenet_10percent/r50_lr0_01_head100.py
@ -1,4 +0,0 @@
 _base_ = 'base.py'
 # optimizer
 optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001,
                 paramwise_options={'\Ahead.': dict(lr_mult=100)})
--- a/configs/benchmarks/semi_classification/imagenet_10percent/r50_lr0_01_head1_sobel.py
+++ b/configs/benchmarks/semi_classification/imagenet_10percent/r50_lr0_01_head1_sobel.py
@ -1,71 +0,0 @@
 _base_ = '../../../base.py'
 # model settings
 model = dict(
    type='Classification',
    pretrained=None,
    with_sobel=True,
    backbone=dict(
        type='ResNet',
        depth=50,
        in_channels=2,
        out_indices=[4],  # 0: conv-1, x: stage-x
        norm_cfg=dict(type='SyncBN')),
    head=dict(
        type='ClsHead', with_avg_pool=True, in_channels=2048,
        num_classes=1000))
 # dataset settings
 data_source_cfg = dict(
    type='ImageNet',
    memcached=True,
    mclient_path='/mnt/lustre/share/memcached_client')
 data_train_list = 'data/imagenet/meta/train_labeled_10percent.txt'
 data_train_root = 'data/imagenet/train'
 data_test_list = 'data/imagenet/meta/val_labeled.txt'
 data_test_root = 'data/imagenet/val'
 dataset_type = 'ClassificationDataset'
 img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 train_pipeline = [
    dict(type='RandomResizedCrop', size=224),
    dict(type='RandomHorizontalFlip'),
    dict(type='ToTensor'),
    dict(type='Normalize', **img_norm_cfg),
 ]
 test_pipeline = [
    dict(type='Resize', size=256),
    dict(type='CenterCrop', size=224),
    dict(type='ToTensor'),
    dict(type='Normalize', **img_norm_cfg),
 ]
 data = dict(
    imgs_per_gpu=64,  # total 256
    workers_per_gpu=2,
    train=dict(
        type=dataset_type,
        data_source=dict(
            list_file=data_train_list, root=data_train_root,
            **data_source_cfg),
        pipeline=train_pipeline),
    val=dict(
        type=dataset_type,
        data_source=dict(
            list_file=data_test_list, root=data_test_root, **data_source_cfg),
        pipeline=test_pipeline))
 # additional hooks
 custom_hooks = [
    dict(
        type='ValidateHook',
        dataset=data['val'],
        initial=False,
        interval=20,
        imgs_per_gpu=32,
        workers_per_gpu=2,
        eval_param=dict(topk=(1, 5)))
 ]
 # optimizer
 optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001,
                 paramwise_options={'\Ahead.': dict(lr_mult=1)})
 # learning policy
 lr_config = dict(policy='step', step=[12, 16], gamma=0.2)
 checkpoint_config = dict(interval=20)
 # runtime settings
 total_epochs = 20
--- a/configs/benchmarks/semi_classification/imagenet_10percent/r50_lr0_1_head1.py
+++ b/configs/benchmarks/semi_classification/imagenet_10percent/r50_lr0_1_head1.py
@ -1,4 +0,0 @@
 _base_ = 'base.py'
 # optimizer
 optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001,
                 paramwise_options={'\Ahead.': dict(lr_mult=1)})
--- a/configs/benchmarks/semi_classification/imagenet_10percent/r50_lr0_1_head10.py
+++ b/configs/benchmarks/semi_classification/imagenet_10percent/r50_lr0_1_head10.py
@ -1,4 +0,0 @@
 _base_ = 'base.py'
 # optimizer
 optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001,
                 paramwise_options={'\Ahead.': dict(lr_mult=10)})
--- a/configs/benchmarks/semi_classification/imagenet_10percent/r50_lr0_1_head100.py
+++ b/configs/benchmarks/semi_classification/imagenet_10percent/r50_lr0_1_head100.py
@ -1,4 +0,0 @@
 _base_ = 'base.py'
 # optimizer
 optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001,
                 paramwise_options={'\Ahead.': dict(lr_mult=100)})
--- a/configs/benchmarks/semi_classification/imagenet_1percent/base.py
+++ b/configs/benchmarks/semi_classification/imagenet_1percent/base.py
@ -1,72 +0,0 @@
 _base_ = '../../../base.py'
 # model settings
 model = dict(
    type='Classification',
    pretrained=None,
    backbone=dict(
        type='ResNet',
        depth=50,
        out_indices=[4],  # 0: conv-1, x: stage-x
        norm_cfg=dict(type='SyncBN')),
    head=dict(
        type='ClsHead', with_avg_pool=True, in_channels=2048,
        num_classes=1000))
 # dataset settings
 data_source_cfg = dict(
    type='ImageNet',
    memcached=True,
    mclient_path='/mnt/lustre/share/memcached_client')
 data_train_list = 'data/imagenet/meta/train_labeled_1percent.txt'
 data_train_root = 'data/imagenet/train'
 data_test_list = 'data/imagenet/meta/val_labeled.txt'
 data_test_root = 'data/imagenet/val'
 dataset_type = 'ClassificationDataset'
 img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 train_pipeline = [
    dict(type='RandomResizedCrop', size=224),
    dict(type='RandomHorizontalFlip'),
    dict(type='ToTensor'),
    dict(type='Normalize', **img_norm_cfg),
 ]
 test_pipeline = [
    dict(type='Resize', size=256),
    dict(type='CenterCrop', size=224),
    dict(type='ToTensor'),
    dict(type='Normalize', **img_norm_cfg),
 ]
 data = dict(
    imgs_per_gpu=64,  # total 256
    workers_per_gpu=2,
    train=dict(
        type=dataset_type,
        data_source=dict(
            list_file=data_train_list, root=data_train_root,
            **data_source_cfg),
        pipeline=train_pipeline),
    val=dict(
        type=dataset_type,
        data_source=dict(
            list_file=data_test_list, root=data_test_root, **data_source_cfg),
        pipeline=test_pipeline))
 # additional hooks
 custom_hooks = [
    dict(
        type='ValidateHook',
        dataset=data['val'],
        initial=False,
        interval=20,
        imgs_per_gpu=32,
        workers_per_gpu=2,
        eval_param=dict(topk=(1, 5)))
 ]
 # learning policy
 lr_config = dict(policy='step', step=[12, 16], gamma=0.2)
 checkpoint_config = dict(interval=20)
 log_config = dict(
    interval=10,
    hooks=[
        dict(type='TextLoggerHook'),
        dict(type='TensorboardLoggerHook')
    ])
 # runtime settings
 total_epochs = 20
--- a/configs/benchmarks/semi_classification/imagenet_1percent/r50_lr0_001_head1.py
+++ b/configs/benchmarks/semi_classification/imagenet_1percent/r50_lr0_001_head1.py
@ -1,4 +0,0 @@
 _base_ = 'base.py'
 # optimizer
 optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0005,
                 paramwise_options={'\Ahead.': dict(lr_mult=1)})
--- a/configs/benchmarks/semi_classification/imagenet_1percent/r50_lr0_001_head10.py
+++ b/configs/benchmarks/semi_classification/imagenet_1percent/r50_lr0_001_head10.py
@ -1,4 +0,0 @@
 _base_ = 'base.py'
 # optimizer
 optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0005,
                 paramwise_options={'\Ahead.': dict(lr_mult=10)})
--- a/configs/benchmarks/semi_classification/imagenet_1percent/r50_lr0_001_head100.py
+++ b/configs/benchmarks/semi_classification/imagenet_1percent/r50_lr0_001_head100.py
@ -1,4 +0,0 @@
 _base_ = 'base.py'
 # optimizer
 optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0005,
                 paramwise_options={'\Ahead.': dict(lr_mult=100)})
--- a/configs/benchmarks/semi_classification/imagenet_1percent/r50_lr0_01_head1.py
+++ b/configs/benchmarks/semi_classification/imagenet_1percent/r50_lr0_01_head1.py
@ -1,4 +0,0 @@
 _base_ = 'base.py'
 # optimizer
 optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005,
                 paramwise_options={'\Ahead.': dict(lr_mult=1)})
--- a/configs/benchmarks/semi_classification/imagenet_1percent/r50_lr0_01_head10.py
+++ b/configs/benchmarks/semi_classification/imagenet_1percent/r50_lr0_01_head10.py
@ -1,4 +0,0 @@
 _base_ = 'base.py'
 # optimizer
 optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005,
                 paramwise_options={'\Ahead.': dict(lr_mult=10)})
--- a/configs/benchmarks/semi_classification/imagenet_1percent/r50_lr0_01_head100.py
+++ b/configs/benchmarks/semi_classification/imagenet_1percent/r50_lr0_01_head100.py
@ -1,4 +0,0 @@
 _base_ = 'base.py'
 # optimizer
 optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005,
                 paramwise_options={'\Ahead.': dict(lr_mult=100)})
--- a/configs/benchmarks/semi_classification/imagenet_1percent/r50_lr0_01_head1_sobel.py
+++ b/configs/benchmarks/semi_classification/imagenet_1percent/r50_lr0_01_head1_sobel.py
@ -1,77 +0,0 @@
 _base_ = '../../../base.py'
 # model settings
 model = dict(
    type='Classification',
    pretrained=None,
    with_sobel=True,
    backbone=dict(
        type='ResNet',
        depth=50,
        in_channels=2,
        out_indices=[4],  # 0: conv-1, x: stage-x
        norm_cfg=dict(type='SyncBN')),
    head=dict(
        type='ClsHead', with_avg_pool=True, in_channels=2048,
        num_classes=1000))
 # dataset settings
 data_source_cfg = dict(
    type='ImageNet',
    memcached=True,
    mclient_path='/mnt/lustre/share/memcached_client')
 data_train_list = 'data/imagenet/meta/train_labeled_1percent.txt'
 data_train_root = 'data/imagenet/train'
 data_test_list = 'data/imagenet/meta/val_labeled.txt'
 data_test_root = 'data/imagenet/val'
 dataset_type = 'ClassificationDataset'
 img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 train_pipeline = [
    dict(type='RandomResizedCrop', size=224),
    dict(type='RandomHorizontalFlip'),
    dict(type='ToTensor'),
    dict(type='Normalize', **img_norm_cfg),
 ]
 test_pipeline = [
    dict(type='Resize', size=256),
    dict(type='CenterCrop', size=224),
    dict(type='ToTensor'),
    dict(type='Normalize', **img_norm_cfg),
 ]
 data = dict(
    imgs_per_gpu=64,  # total 256
    workers_per_gpu=2,
    train=dict(
        type=dataset_type,
        data_source=dict(
            list_file=data_train_list, root=data_train_root,
            **data_source_cfg),
        pipeline=train_pipeline),
    val=dict(
        type=dataset_type,
        data_source=dict(
            list_file=data_test_list, root=data_test_root, **data_source_cfg),
        pipeline=test_pipeline))
 # additional hooks
 custom_hooks = [
    dict(
        type='ValidateHook',
        dataset=data['val'],
        initial=False,
        interval=20,
        imgs_per_gpu=32,
        workers_per_gpu=2,
        eval_param=dict(topk=(1, 5)))
 ]
 # optimizer
 optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005,
                 paramwise_options={'\Ahead.': dict(lr_mult=1)})
 # learning policy
 lr_config = dict(policy='step', step=[12, 16], gamma=0.2)
 checkpoint_config = dict(interval=20)
 log_config = dict(
    interval=10,
    hooks=[
        dict(type='TextLoggerHook'),
        dict(type='TensorboardLoggerHook')
    ])
 # runtime settings
 total_epochs = 20
--- a/configs/benchmarks/semi_classification/imagenet_1percent/r50_lr0_1_head1.py
+++ b/configs/benchmarks/semi_classification/imagenet_1percent/r50_lr0_1_head1.py
@ -1,4 +0,0 @@
 _base_ = 'base.py'
 # optimizer
 optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005,
                 paramwise_options={'\Ahead.': dict(lr_mult=1)})
--- a/configs/benchmarks/semi_classification/imagenet_1percent/r50_lr0_1_head10.py
+++ b/configs/benchmarks/semi_classification/imagenet_1percent/r50_lr0_1_head10.py
@ -1,4 +0,0 @@
 _base_ = 'base.py'
 # optimizer
 optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005,
                 paramwise_options={'\Ahead.': dict(lr_mult=10)})
--- a/configs/benchmarks/semi_classification/imagenet_1percent/r50_lr0_1_head100.py
+++ b/configs/benchmarks/semi_classification/imagenet_1percent/r50_lr0_1_head100.py
@ -1,4 +0,0 @@
 _base_ = 'base.py'
 # optimizer
 optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005,
                 paramwise_options={'\Ahead.': dict(lr_mult=100)})
--- a/configs/classification/cifar10/r50.py
+++ b/configs/classification/cifar10/r50.py
@ -1,59 +0,0 @@
 _base_ = '../../base.py'
 # model settings
 model = dict(
    type='Classification',
    pretrained=None,
    backbone=dict(
        type='ResNet',
        depth=50,
        out_indices=[4],  # 4: stage-4
        norm_cfg=dict(type='BN')),
    head=dict(
        type='ClsHead', with_avg_pool=True, in_channels=2048, num_classes=10))
 # dataset settings
 data_source_cfg = dict(type='Cifar10', root='data/cifar/')
 dataset_type = 'ClassificationDataset'
 img_norm_cfg = dict(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.201])
 train_pipeline = [
    dict(type='RandomCrop', size=32, padding=4),
    dict(type='RandomHorizontalFlip'),
    dict(type='ToTensor'),
    dict(type='Normalize', **img_norm_cfg),
 ]
 test_pipeline = [
    dict(type='ToTensor'),
    dict(type='Normalize', **img_norm_cfg),
 ]
 data = dict(
    imgs_per_gpu=128,
    workers_per_gpu=2,
    train=dict(
        type=dataset_type,
        data_source=dict(split='train', **data_source_cfg),
        pipeline=train_pipeline),
    val=dict(
        type=dataset_type,
        data_source=dict(split='test', **data_source_cfg),
        pipeline=test_pipeline),
    test=dict(
        type=dataset_type,
        data_source=dict(split='test', **data_source_cfg),
        pipeline=test_pipeline))
 # additional hooks
 custom_hooks = [
    dict(
        type='ValidateHook',
        dataset=data['val'],
        initial=True,
        interval=10,
        imgs_per_gpu=128,
        workers_per_gpu=8,
        eval_param=dict(topk=(1, 5)))
 ]
 # optimizer
 optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005)
 # learning policy
 lr_config = dict(policy='step', step=[150, 250])
 checkpoint_config = dict(interval=50)
 # runtime settings
 total_epochs = 350
--- a/configs/classification/imagenet/r50.py
+++ b/configs/classification/imagenet/r50.py
@ -1,68 +0,0 @@
 _base_ = '../../base.py'
 # model settings
 model = dict(
    type='Classification',
    pretrained=None,
    backbone=dict(
        type='ResNet',
        depth=50,
        out_indices=[4],  # 0: conv-1, x: stage-x
        norm_cfg=dict(type='SyncBN')),
    head=dict(
        type='ClsHead', with_avg_pool=True, in_channels=2048,
        num_classes=1000))
 # dataset settings
 data_source_cfg = dict(
    type='ImageNet',
    memcached=True,
    mclient_path='/mnt/lustre/share/memcached_client')
 data_train_list = 'data/imagenet/meta/train_labeled.txt'
 data_train_root = 'data/imagenet/train'
 data_test_list = 'data/imagenet/meta/val_labeled.txt'
 data_test_root = 'data/imagenet/val'
 dataset_type = 'ClassificationDataset'
 img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 train_pipeline = [
    dict(type='RandomResizedCrop', size=224),
    dict(type='RandomHorizontalFlip'),
    dict(type='ToTensor'),
    dict(type='Normalize', **img_norm_cfg),
 ]
 test_pipeline = [
    dict(type='Resize', size=256),
    dict(type='CenterCrop', size=224),
    dict(type='ToTensor'),
    dict(type='Normalize', **img_norm_cfg),
 ]
 data = dict(
    imgs_per_gpu=32,  # total 256
    workers_per_gpu=2,
    train=dict(
        type=dataset_type,
        data_source=dict(
            list_file=data_train_list, root=data_train_root,
            **data_source_cfg),
        pipeline=train_pipeline),
    val=dict(
        type=dataset_type,
        data_source=dict(
            list_file=data_test_list, root=data_test_root, **data_source_cfg),
        pipeline=test_pipeline))
 # additional hooks
 custom_hooks = [
    dict(
        type='ValidateHook',
        dataset=data['val'],
        initial=True,
        interval=10,
        imgs_per_gpu=32,
        workers_per_gpu=2,
        eval_param=dict(topk=(1, 5)))
 ]
 # optimizer
 optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001)
 # learning policy
 lr_config = dict(policy='step', step=[30, 60, 90])
 checkpoint_config = dict(interval=10)
 # runtime settings
 total_epochs = 90
--- a/mmselfsup/init.py
+++ b/mmselfsup/init.py
@ -1,3 +1,60 @@
-from .version import __version__, short_version
+# Copyright (c) OpenMMLab. All rights reserved.
 import warnings
-__all__ = ['__version__', 'short_version']
+import mmcv
 from packaging.version import parse
 from .version import __version__
 def digit_version(version_str: str, length: int = 4):
    """Convert a version string into a tuple of integers.
    This method is usually used for comparing two versions. For pre-release
    versions: alpha < beta < rc.
    Args:
        version_str (str): The version string.
        length (int): The maximum number of version levels. Defaults to 4.
    Returns:
        tuple[int]: The version info in digits (integers).
    """
    version = parse(version_str)
    assert version.release, f'failed to parse version {version_str}'
    release = list(version.release)
    release = release[:length]
    if len(release) < length:
        release = release + [0] * (length - len(release))
    if version.is_prerelease:
        mapping = {'a': -3, 'b': -2, 'rc': -1}
        val = -4
        # version.pre can be None
        if version.pre:
            if version.pre[0] not in mapping:
                warnings.warn(f'unknown prerelease version {version.pre[0]}, '
                              'version checking may go wrong')
            else:
                val = mapping[version.pre[0]]
            release.extend([val, version.pre[-1]])
        else:
            release.extend([val, 0])
    elif version.is_postrelease:
        release.extend([1, version.post])
    else:
        release.extend([0, 0])
    return tuple(release)
 mmcv_minimum_version = '1.3.16'
 mmcv_maximum_version = '1.5.0'
 mmcv_version = digit_version(mmcv.__version__)
 assert (mmcv_version >= digit_version(mmcv_minimum_version)
        and mmcv_version <= digit_version(mmcv_maximum_version)), \
    f'MMCV=={mmcv.__version__} is used but incompatible. ' \
    f'Please install mmcv>={mmcv_minimum_version}, <={mmcv_maximum_version}.'
 __all__ = ['__version__', 'digit_version']
--- a/mmselfsup/core/init.py
+++ b/mmselfsup/core/init.py
@ -0,0 +1,3 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from .hooks import *  # noqa: F401,F403
 from .optimizer import *  # noqa: F401, F403
--- a/mmselfsup/core/hooks/init.py
+++ b/mmselfsup/core/hooks/init.py
@ -0,0 +1,14 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from .byol_hook import BYOLHook
 from .deepcluster_hook import DeepClusterHook
 from .densecl_hook import DenseCLHook
 from .odc_hook import ODCHook
 from .optimizer_hook import DistOptimizerHook, GradAccumFp16OptimizerHook
 from .simsiam_hook import SimSiamHook
 from .swav_hook import SwAVHook
 __all__ = [
    'BYOLHook', 'DeepClusterHook', 'DenseCLHook', 'ODCHook',
    'DistOptimizerHook', 'GradAccumFp16OptimizerHook', 'SimSiamHook',
    'SwAVHook'
 ]
--- a/mmselfsup/core/hooks/byol_hook.py
+++ b/mmselfsup/core/hooks/byol_hook.py
@ -1,21 +1,26 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from math import cos, pi
-from mmcv.runner import Hook
+
 from mmcv.parallel import is_module_wrapper
-
+from mmcv.runner import HOOKS, Hook
 from .registry import HOOKS
-@HOOKS.register_module
+@HOOKS.register_module()
 class BYOLHook(Hook):
    """Hook for BYOL.
    This hook includes momentum adjustment in BYOL following:
-        m = 1 - ( 1- m_0) * (cos(pi * k / K) + 1) / 2,
+
-        k: current step, K: total steps.
+    .. math::
        m = 1 - (1 - m_0) * (cos(pi * k / K) + 1) / 2
    where :math:`k` is the current step, :math:`K` is the total steps.
    Args:
        end_momentum (float): The final momentum coefficient
-            for the target network. Default: 1.
+            for the target network. Defaults to 1.
        update_interval (int, optional): The momentum update interval of the
            weights. Defaults to 1.
    """
    def __init__(self, end_momentum=1., update_interval=1, **kwargs):
@ -24,9 +29,9 @@ class BYOLHook(Hook):
    def before_train_iter(self, runner):
        assert hasattr(runner.model.module, 'momentum'), \
-            "The runner must have attribute \"momentum\" in BYOLHook."
+            "The runner must have attribute \"momentum\" in BYOL."
        assert hasattr(runner.model.module, 'base_momentum'), \
-            "The runner must have attribute \"base_momentum\" in BYOLHook."
+            "The runner must have attribute \"base_momentum\" in BYOL."
        if self.every_n_iters(runner, self.update_interval):
            cur_iter = runner.iter
            max_iter = runner.max_iters
--- a/mmselfsup/core/hooks/deepcluster_hook.py
+++ b/mmselfsup/core/hooks/deepcluster_hook.py
@ -1,32 +1,32 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import numpy as np
 from mmcv.runner import Hook
 import torch
 import torch.distributed as dist
 from mmcv.runner import HOOKS, Hook
 from mmcv.utils import print_log
-from openselfsup.third_party import clustering as _clustering
+from mmselfsup.utils import Extractor
-from openselfsup.utils import print_log
+from mmselfsup.utils import clustering as _clustering
 from .registry import HOOKS
 from .extractor import Extractor
-@HOOKS.register_module
+@HOOKS.register_module()
 class DeepClusterHook(Hook):
    """Hook for DeepCluster.
    This hook includes the global clustering process in DC.
    Args:
        extractor (dict): Config dict for feature extraction.
        clustering (dict): Config dict that specifies the clustering algorithm.
        unif_sampling (bool): Whether to apply uniform sampling.
        reweight (bool): Whether to apply loss re-weighting.
        reweight_pow (float): The power of re-weighting.
-        init_memory (bool): Whether to initialize memory banks for ODC.
+        init_memory (bool): Whether to initialize memory banks used in ODC.
-            Default: False.
+            Defaults to False.
-        initial (bool): Whether to call the hook initially. Default: True.
+        initial (bool): Whether to call the hook initially. Defaults to True.
-        interval (int): Frequency of epochs to call the hook. Default: 1.
+        interval (int): Frequency of epochs to call the hook. Defaults to 1.
-        dist_mode (bool): Use distributed training or not. Default: True.
+        dist_mode (bool): Use distributed training or not. Defaults to True.
-        data_loaders (DataLoader): A PyTorch dataloader. Default: None.
+        data_loaders (DataLoader): A PyTorch dataloader. Defaults to None.
    """
    def __init__(
@ -76,9 +76,8 @@ class DeepClusterHook(Hook):
            clustering_algo.cluster(features, verbose=True)
            assert isinstance(clustering_algo.labels, np.ndarray)
            new_labels = clustering_algo.labels.astype(np.int64)
-            np.save(
+            np.save(f'{runner.work_dir}/cluster_epoch_{runner.epoch}.npy',
-                "{}/cluster_epoch_{}.npy".format(runner.work_dir,
+                    new_labels)
                                                 runner.epoch), new_labels)
            self.evaluate(runner, new_labels)
        else:
            new_labels = np.zeros((len(self.data_loaders[0].dataset), ),
@ -103,7 +102,8 @@ class DeepClusterHook(Hook):
            runner.model.module.set_reweight(new_labels, self.reweight_pow)
        # step 5: randomize classifier
-        runner.model.module.head.init_weights(init_linear='normal')
+        runner.model.module.head._is_init = False
        runner.model.module.head.init_weights()
        if self.dist_mode:
            for p in runner.model.module.head.state_dict().values():
                dist.broadcast(p, 0)
@ -113,12 +113,12 @@ class DeepClusterHook(Hook):
            runner.model.module.memory_bank.init_memory(features, new_labels)
    def evaluate(self, runner, new_labels):
-        hist = np.bincount(new_labels, minlength=self.clustering_cfg.k)
+        histogram = np.bincount(new_labels, minlength=self.clustering_cfg.k)
-        empty_cls = (hist == 0).sum()
+        empty_cls = (histogram == 0).sum()
-        minimal_cls_size, maximal_cls_size = hist.min(), hist.max()
+        minimal_cls_size, maximal_cls_size = histogram.min(), histogram.max()
        if runner.rank == 0:
            print_log(
-                "empty_num: {}\tmin_cluster: {}\tmax_cluster:{}".format(
+                f'empty_num: {empty_cls.item()}\t'
-                    empty_cls.item(), minimal_cls_size.item(),
+                f'min_cluster: {minimal_cls_size.item()}\t'
-                    maximal_cls_size.item()),
+                f'max_cluster:{maximal_cls_size.item()}',
                logger='root')
--- a/mmselfsup/core/hooks/densecl_hook.py
+++ b/mmselfsup/core/hooks/densecl_hook.py
@ -0,0 +1,32 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from mmcv.runner import HOOKS, Hook
@HOOKS.register_module()
 class DenseCLHook(Hook):
    """Hook for DenseCL.
    This hook includes ``loss_lambda`` warmup in DenseCL.
    Borrowed from the authors' code: `<https://github.com/WXinlong/DenseCL>`_.
    Args:
        start_iters (int, optional): The number of warmup iterations to set
            ``loss_lambda=0``. Defaults to 1000.
    """
    def __init__(self, start_iters=1000, **kwargs):
        self.start_iters = start_iters
    def before_run(self, runner):
        assert hasattr(runner.model.module, 'loss_lambda'), \
            "The runner must have attribute \"loss_lambda\" in DenseCL."
        self.loss_lambda = runner.model.module.loss_lambda
    def before_train_iter(self, runner):
        assert hasattr(runner.model.module, 'loss_lambda'), \
            "The runner must have attribute \"loss_lambda\" in DenseCL."
        cur_iter = runner.iter
        if cur_iter >= self.start_iters:
            runner.model.module.loss_lambda = self.loss_lambda
        else:
            runner.model.module.loss_lambda = 0.
--- a/mmselfsup/core/hooks/odc_hook.py
+++ b/mmselfsup/core/hooks/odc_hook.py
@ -1,15 +1,15 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import numpy as np
-
+from mmcv.runner import HOOKS, Hook
-from mmcv.runner import Hook
+from mmcv.utils import print_log
 from openselfsup.utils import print_log
 from .registry import HOOKS
-@HOOKS.register_module
+@HOOKS.register_module()
 class ODCHook(Hook):
    """Hook for ODC.
    This hook includes the online clustering process in ODC.
    Args:
        centroids_update_interval (int): Frequency of iterations
            to update centroids.
@ -18,7 +18,7 @@ class ODCHook(Hook):
        evaluate_interval (int): Frequency of iterations to evaluate clusters.
        reweight (bool): Whether to perform loss re-weighting.
        reweight_pow (float): The power of re-weighting.
-        dist_mode (bool): Use distributed training or not. Default: True.
+        dist_mode (bool): Use distributed training or not. Defaults to True.
    """
    def __init__(self,
@ -28,7 +28,7 @@ class ODCHook(Hook):
                 reweight,
                 reweight_pow,
                 dist_mode=True):
-        assert dist_mode, "non-dist mode is not implemented"
+        assert dist_mode, 'non-dist mode is not implemented'
        self.centroids_update_interval = centroids_update_interval
        self.deal_with_small_clusters_interval = \
            deal_with_small_clusters_interval
@ -61,19 +61,17 @@ class ODCHook(Hook):
            new_labels = runner.model.module.memory_bank.label_bank
            if new_labels.is_cuda:
                new_labels = new_labels.cpu()
-            np.save(
+            np.save(f'{runner.work_dir}/cluster_epoch_{runner.epoch + 1}.npy',
-                "{}/cluster_epoch_{}.npy".format(runner.work_dir,
+                    new_labels.numpy())
                                                 runner.epoch),
                new_labels.numpy())
    def evaluate(self, runner, new_labels):
-        hist = np.bincount(
+        histogram = np.bincount(
            new_labels, minlength=runner.model.module.memory_bank.num_classes)
-        empty_cls = (hist == 0).sum()
+        empty_cls = (histogram == 0).sum()
-        minimal_cls_size, maximal_cls_size = hist.min(), hist.max()
+        minimal_cls_size, maximal_cls_size = histogram.min(), histogram.max()
        if runner.rank == 0:
            print_log(
-                "empty_num: {}\tmin_cluster: {}\tmax_cluster:{}".format(
+                f'empty_num: {empty_cls.item()}\t'
-                    empty_cls.item(), minimal_cls_size.item(),
+                f'min_cluster: {minimal_cls_size.item()}\t'
-                    maximal_cls_size.item()),
+                f'max_cluster:{maximal_cls_size.item()}',
                logger='root')
--- a/mmselfsup/core/hooks/optimizer_hook.py
+++ b/mmselfsup/core/hooks/optimizer_hook.py
@ -0,0 +1,261 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from mmcv.runner import (HOOKS, Fp16OptimizerHook, OptimizerHook,
                         allreduce_grads)
 from mmcv.utils import TORCH_VERSION, _BatchNorm, digit_version
@HOOKS.register_module()
 class DistOptimizerHook(OptimizerHook):
    """Optimizer hook for distributed training.
    This hook can accumulate gradients every n intervals and freeze some
    layers for some iters at the beginning.
    Args:
        update_interval (int, optional): The update interval of the weights,
            set > 1 to accumulate the grad. Defaults to 1.
        grad_clip (dict, optional): Dict to config the value of grad clip.
            E.g., grad_clip = dict(max_norm=10). Defaults to None.
        coalesce (bool, optional): Whether allreduce parameters as a whole.
            Defaults to True.
        bucket_size_mb (int, optional): Size of bucket, the unit is MB.
            Defaults to -1.
        frozen_layers_cfg (dict, optional): Dict to config frozen layers.
            The key-value pair is layer name and its frozen iters. If frozen,
            the layer gradient would be set to None. Defaults to dict().
    """
    def __init__(self,
                 update_interval=1,
                 grad_clip=None,
                 coalesce=True,
                 bucket_size_mb=-1,
                 frozen_layers_cfg=dict()):
        self.grad_clip = grad_clip
        self.coalesce = coalesce
        self.bucket_size_mb = bucket_size_mb
        self.update_interval = update_interval
        self.frozen_layers_cfg = frozen_layers_cfg
        self.initialized = False
    def has_batch_norm(self, module):
        if isinstance(module, _BatchNorm):
            return True
        for m in module.children():
            if self.has_batch_norm(m):
                return True
        return False
    def _init(self, runner):
        if runner.iter % self.update_interval != 0:
            runner.logger.warning(
                'Resume iter number is not divisible by update_interval in '
                'GradientCumulativeOptimizerHook, which means the gradient of '
                'some iters is lost and the result may be influenced slightly.'
            )
        if self.has_batch_norm(runner.model) and self.update_interval > 1:
            runner.logger.warning(
                'GradientCumulativeOptimizerHook may slightly decrease '
                'performance if the model has BatchNorm layers.')
        residual_iters = runner.max_iters
        self.divisible_iters = (
            residual_iters // self.update_interval * self.update_interval)
        self.remainder_iters = residual_iters - self.divisible_iters
        self.initialized = True
    def before_run(self, runner):
        runner.optimizer.zero_grad()
    def after_train_iter(self, runner):
        # In some cases, MMCV's GradientCumulativeOptimizerHook will
        # cause the loss_factor to be zero and we fix this bug in our
        # implementation.
        if not self.initialized:
            self._init(runner)
        if runner.iter < self.divisible_iters:
            loss_factor = self.update_interval
        else:
            loss_factor = self.remainder_iters
        runner.outputs['loss'] /= loss_factor
        runner.outputs['loss'].backward()
        if (self.every_n_iters(runner, self.update_interval)
                or self.is_last_iter(runner)):
            # cancel gradient of certain layer for n iters
            # according to frozen_layers_cfg dict
            for layer, iters in self.frozen_layers_cfg.items():
                if runner.iter < iters:
                    for name, p in runner.model.module.named_parameters():
                        if layer in name:
                            p.grad = None
            if self.grad_clip is not None:
                grad_norm = self.clip_grads(runner.model.parameters())
                if grad_norm is not None:
                    # Add grad norm to the logger
                    runner.log_buffer.update({'grad_norm': float(grad_norm)},
                                             runner.outputs['num_samples'])
            runner.optimizer.step()
            runner.optimizer.zero_grad()
 if (TORCH_VERSION != 'parrots'
        and digit_version(TORCH_VERSION) >= digit_version('1.6.0')):
    @HOOKS.register_module()
    class GradAccumFp16OptimizerHook(Fp16OptimizerHook):
        """Fp16 optimizer hook (using PyTorch's implementation).
        This hook can accumulate gradients every n intervals and freeze some
        layers for some iters at the beginning.
        If you are using PyTorch >= 1.6, torch.cuda.amp is used as the backend,
        to take care of the optimization procedure.
        Args:
            update_interval (int, optional): The update interval of the
                weights, set > 1 to accumulate the grad. Defaults to 1.
            frozen_layers_cfg (dict, optional): Dict to config frozen layers.
                The key-value pair is layer name and its frozen iters. If
                frozen, the layer gradient would be set to None.
                Defaults to dict().
        """
        def __init__(self,
                     update_interval=1,
                     frozen_layers_cfg=dict(),
                     **kwargs):
            super(GradAccumFp16OptimizerHook, self).__init__(**kwargs)
            self.update_interval = update_interval
            self.frozen_layers_cfg = frozen_layers_cfg
        def after_train_iter(self, runner):
            runner.outputs['loss'] /= self.update_interval
            self.loss_scaler.scale(runner.outputs['loss']).backward()
            if self.every_n_iters(runner, self.update_interval):
                # cancel gradient of certain layer for n iters
                # according to frozen_layers_cfg dict
                for layer, iters in self.frozen_layers_cfg.items():
                    if runner.iter < iters:
                        for name, p in runner.model.module.named_parameters():
                            if layer in name:
                                p.grad = None
                # copy fp16 grads in the model to fp32 params in the optimizer
                self.loss_scaler.unscale_(runner.optimizer)
                if self.grad_clip is not None:
                    grad_norm = self.clip_grads(runner.model.parameters())
                    if grad_norm is not None:
                        # Add grad norm to the logger
                        runner.log_buffer.update(
                            {'grad_norm': float(grad_norm)},
                            runner.outputs['num_samples'])
                # backward and update scaler
                self.loss_scaler.step(runner.optimizer)
                self.loss_scaler.update(self._scale_update_param)
                # save state_dict of loss_scaler
                runner.meta.setdefault(
                    'fp16', {})['loss_scaler'] = self.loss_scaler.state_dict()
                # clear grads
                runner.model.zero_grad()
                runner.optimizer.zero_grad()
 else:
    @HOOKS.register_module()
    class GradAccumFp16OptimizerHook(Fp16OptimizerHook):
        """Fp16 optimizer hook (using mmcv's implementation).
        This hook can accumulate gradients every n intervals and freeze some
        layers for some iters at the beginning.
        Args:
            update_interval (int, optional): The update interval of the
                weights, set > 1 to accumulate the grad. Defaults to 1.
            frozen_layers_cfg (dict, optional): Dict to config frozen layers.
                The key-value pair is layer name and its frozen iters. If
                frozen, the layer gradient would be set to None.
                Defaults to dict().
        """
        def __init__(self,
                     update_interval=1,
                     frozen_layers_cfg=dict(),
                     **kwargs):
            super(GradAccumFp16OptimizerHook, self).__init__(**kwargs)
            self.update_interval = update_interval
            self.frozen_layers_cfg = frozen_layers_cfg
        def after_train_iter(self, runner):
            runner.outputs['loss'] /= self.update_interval
            # scale the loss value
            scaled_loss = runner.outputs['loss'] * self.loss_scaler.loss_scale
            scaled_loss.backward()
            if self.every_n_iters(runner, self.update_interval):
                # cancel gradient of certain layer for n iters
                # according to frozen_layers_cfg dict
                for layer, iters in self.frozen_layers_cfg.items():
                    if runner.iter < iters:
                        for name, p in runner.model.module.named_parameters():
                            if layer in name:
                                p.grad = None
                # copy fp16 grads in the model to fp32 params in the optimizer
                fp32_weights = []
                for param_group in runner.optimizer.param_groups:
                    fp32_weights += param_group['params']
                self.copy_grads_to_fp32(runner.model, fp32_weights)
                # allreduce grads
                if self.distributed:
                    allreduce_grads(fp32_weights, self.coalesce,
                                    self.bucket_size_mb)
                has_overflow = self.loss_scaler.has_overflow(fp32_weights)
                # if has overflow, skip this iteration
                if not has_overflow:
                    # scale the gradients back
                    for param in fp32_weights:
                        if param.grad is not None:
                            param.grad.div_(self.loss_scaler.loss_scale)
                    if self.grad_clip is not None:
                        grad_norm = self.clip_grads(fp32_weights)
                        if grad_norm is not None:
                            # Add grad norm to the logger
                            runner.log_buffer.update(
                                {'grad_norm': float(grad_norm)},
                                runner.outputs['num_samples'])
                    # update fp32 params
                    runner.optimizer.step()
                    # copy fp32 params to the fp16 model
                    self.copy_params_to_fp16(runner.model, fp32_weights)
                else:
                    runner.logger.warning(
                        'Check overflow, downscale loss scale '
                        f'to {self.loss_scaler.cur_scale}')
                self.loss_scaler.update_scale(has_overflow)
                # save state_dict of loss_scaler
                runner.meta.setdefault(
                    'fp16', {})['loss_scaler'] = self.loss_scaler.state_dict()
                # clear grads
                runner.model.zero_grad()
                runner.optimizer.zero_grad()
--- a/mmselfsup/core/hooks/simsiam_hook.py
+++ b/mmselfsup/core/hooks/simsiam_hook.py
@ -0,0 +1,37 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from mmcv.runner import HOOKS, Hook
@HOOKS.register_module()
 class SimSiamHook(Hook):
    """Hook for SimSiam.
    This hook is for SimSiam to fix learning rate of predictor.
    Args:
        fix_pred_lr (bool): whether to fix the lr of predictor or not.
        lr (float): the value of fixed lr.
        adjust_by_epoch (bool, optional): whether to set lr by epoch or iter.
            Defaults to True.
    """
    def __init__(self, fix_pred_lr, lr, adjust_by_epoch=True, **kwargs):
        self.fix_pred_lr = fix_pred_lr
        self.lr = lr
        self.adjust_by_epoch = adjust_by_epoch
    def before_train_iter(self, runner):
        if self.adjust_by_epoch:
            return
        else:
            if self.fix_pred_lr:
                for param_group in runner.optimizer.param_groups:
                    if 'fix_lr' in param_group and param_group['fix_lr']:
                        param_group['lr'] = self.lr
    def before_train_epoch(self, runner):
        """fix lr of predictor."""
        if self.fix_pred_lr:
            for param_group in runner.optimizer.param_groups:
                if 'fix_lr' in param_group and param_group['fix_lr']:
                    param_group['lr'] = self.lr
--- a/mmselfsup/core/hooks/swav_hook.py
+++ b/mmselfsup/core/hooks/swav_hook.py
@ -0,0 +1,81 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import os.path as osp
 import torch
 import torch.distributed as dist
 from mmcv.runner import HOOKS, Hook
@HOOKS.register_module()
 class SwAVHook(Hook):
    """Hook for SwAV.
    This hook builds the queue in SwAV according to ``epoch_queue_starts``.
    The queue will be saved in ``runner.work_dir`` or loaded at start epoch
    if the path folder has queues saved before.
    Args:
        batch_size (int): the batch size per GPU for computing.
        epoch_queue_starts (int, optional): from this epoch, starts to use the
            queue. Defaults to 15.
        crops_for_assign (list[int], optional): list of crops id used for
            computing assignments. Defaults to [0, 1].
        feat_dim (int, optional): feature dimension of output vector.
            Defaults to 128.
        queue_length (int, optional): length of the queue (0 for no queue).
            Defaults to 0.
        interval (int, optional): the interval to save the queue.
            Defaults to 1.
    """
    def __init__(self,
                 batch_size,
                 epoch_queue_starts=15,
                 crops_for_assign=[0, 1],
                 feat_dim=128,
                 queue_length=0,
                 interval=1,
                 **kwargs):
        self.batch_size = batch_size * dist.get_world_size()\
            if dist.is_initialized() else batch_size
        self.epoch_queue_starts = epoch_queue_starts
        self.crops_for_assign = crops_for_assign
        self.feat_dim = feat_dim
        self.queue_length = queue_length
        self.interval = interval
        self.queue = None
    def before_run(self, runner):
        if dist.is_initialized():
            self.queue_path = osp.join(runner.work_dir,
                                       'queue' + str(dist.get_rank()) + '.pth')
        else:
            self.queue_path = osp.join(runner.work_dir, 'queue.pth')
        # build the queue
        if osp.isfile(self.queue_path):
            self.queue = torch.load(self.queue_path)['queue']
            runner.model.module.head.queue = self.queue
        # the queue needs to be divisible by the batch size
        self.queue_length -= self.queue_length % self.batch_size
    def before_train_epoch(self, runner):
        # optionally starts a queue
        if self.queue_length > 0 \
            and runner.epoch >= self.epoch_queue_starts \
                and self.queue is None:
            self.queue = torch.zeros(
                len(self.crops_for_assign),
                self.queue_length // runner.world_size,
                self.feat_dim,
            ).cuda()
        # set the boolean type of use_the_queue
        runner.model.module.head.queue = self.queue
        runner.model.module.head.use_queue = False
    def after_train_epoch(self, runner):
        self.queue = runner.model.module.head.queue
        if self.queue is not None and self.every_n_epochs(
                runner, self.interval):
            torch.save({'queue': self.queue}, self.queue_path)
--- a/mmselfsup/core/optimizer/init.py
+++ b/mmselfsup/core/optimizer/init.py
@ -0,0 +1,6 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from .builder import build_optimizer
 from .constructor import DefaultOptimizerConstructor
 from .optimizers import LARS
 __all__ = ['LARS', 'build_optimizer', 'DefaultOptimizerConstructor']
--- a/mmselfsup/core/optimizer/builder.py
+++ b/mmselfsup/core/optimizer/builder.py
@ -0,0 +1,47 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import copy
 from mmcv.runner.optimizer.builder import build_optimizer_constructor
 def build_optimizer(model, optimizer_cfg):
    """Build optimizer from configs.
    Args:
        model (:obj:`nn.Module`): The model with parameters to be optimized.
        optimizer_cfg (dict): The config dict of the optimizer.
            Positional fields are:
                - type: class name of the optimizer.
                - lr: base learning rate.
            Optional fields are:
                - any arguments of the corresponding optimizer type, e.g.,
                  weight_decay, momentum, etc.
                - paramwise_options: a dict with regular expression as keys
                  to match parameter names and a dict containing options as
                  values. Options include 6 fields: lr, lr_mult, momentum,
                  momentum_mult, weight_decay, weight_decay_mult.
    Returns:
        torch.optim.Optimizer: The initialized optimizer.
    Example:
        >>> model = torch.nn.modules.Conv1d(1, 1, 1)
        >>> paramwise_options = {
        >>>     '(bn|gn)(\\d+)?.(weight|bias)': dict(weight_decay_mult=0.1),
        >>>     '\\Ahead.': dict(lr_mult=10, momentum=0)}
        >>> optimizer_cfg = dict(type='SGD', lr=0.01, momentum=0.9,
        >>>                      weight_decay=0.0001,
        >>>                      paramwise_options=paramwise_options)
        >>> optimizer = build_optimizer(model, optimizer_cfg)
    """
    optimizer_cfg = copy.deepcopy(optimizer_cfg)
    constructor_type = optimizer_cfg.pop('constructor',
                                         'DefaultOptimizerConstructor')
    paramwise_cfg = optimizer_cfg.pop('paramwise_options', None)
    optim_constructor = build_optimizer_constructor(
        dict(
            type=constructor_type,
            optimizer_cfg=optimizer_cfg,
            paramwise_cfg=paramwise_cfg))
    optimizer = optim_constructor(model)
    return optimizer
--- a/mmselfsup/core/optimizer/constructor.py
+++ b/mmselfsup/core/optimizer/constructor.py
@ -0,0 +1,81 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import re
 import torch.distributed as dist
 from mmcv.runner.optimizer.builder import OPTIMIZER_BUILDERS, OPTIMIZERS
 from mmcv.utils import build_from_cfg, print_log
@OPTIMIZER_BUILDERS.register_module(force=True)
 class DefaultOptimizerConstructor:
    """Rewrote default constructor for optimizers. By default each parameter
    share the same optimizer settings, and we provide an argument
    ``paramwise_cfg`` to specify parameter-wise settings. It is a dict and may
    contain the following fields:
    Args:
        model (:obj:`nn.Module`): The model with parameters to be optimized.
        optimizer_cfg (dict): The config dict of the optimizer.
            Positional fields are
                - `type`: class name of the optimizer.
            Optional fields are
                - any arguments of the corresponding optimizer type, e.g.,
                  lr, weight_decay, momentum, etc.
        paramwise_cfg (dict, optional): Parameter-wise options.
            Defaults to None
    Example 1:
        >>> model = torch.nn.modules.Conv1d(1, 1, 1)
        >>> optimizer_cfg = dict(type='SGD', lr=0.01, momentum=0.9,
        >>>                      weight_decay=0.0001)
        >>> paramwise_cfg = dict('bias': dict(weight_decay=0., \
                                 lars_exclude=True))
        >>> optim_builder = DefaultOptimizerConstructor(
        >>>     optimizer_cfg, paramwise_cfg)
        >>> optimizer = optim_builder(model)
    """
    def __init__(self, optimizer_cfg, paramwise_cfg=None):
        if not isinstance(optimizer_cfg, dict):
            raise TypeError('optimizer_cfg should be a dict',
                            f'but got {type(optimizer_cfg)}')
        self.optimizer_cfg = optimizer_cfg
        self.paramwise_cfg = {} if paramwise_cfg is None else paramwise_cfg
    def __call__(self, model):
        if hasattr(model, 'module'):
            model = model.module
        optimizer_cfg = self.optimizer_cfg.copy()
        paramwise_options = self.paramwise_cfg
        # if no paramwise option is specified, just use the global setting
        if paramwise_options is None:
            optimizer_cfg['params'] = model.parameters()
            return build_from_cfg(optimizer_cfg, OPTIMIZERS)
        else:
            assert isinstance(paramwise_options, dict)
            params = []
            for name, param in model.named_parameters():
                param_group = {'params': [param]}
                if not param.requires_grad:
                    params.append(param_group)
                    continue
                for regexp, options in paramwise_options.items():
                    if re.search(regexp, name):
                        for key, value in options.items():
                            if key.endswith('_mult'):  # is a multiplier
                                key = key[:-5]
                                assert key in optimizer_cfg, \
                                    f'{key} not in optimizer_cfg'
                                value = optimizer_cfg[key] * value
                            param_group[key] = value
                            if not dist.is_initialized() or \
                                    dist.get_rank() == 0:
                                print_log(f'paramwise_options -- \
                                    {name}: {key}={value}')
                # otherwise use the global settings
                params.append(param_group)
            optimizer_cfg['params'] = params
            return build_from_cfg(optimizer_cfg, OPTIMIZERS)
--- a/mmselfsup/core/optimizer/optimizers.py
+++ b/mmselfsup/core/optimizer/optimizers.py
@ -1,25 +1,31 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import torch
 from mmcv.runner.optimizer.builder import OPTIMIZERS
 from torch.optim import *  # noqa: F401,F403
 from torch.optim.optimizer import Optimizer, required
 from torch.optim import *
@OPTIMIZERS.register_module()
 class LARS(Optimizer):
-    r"""Implements layer-wise adaptive rate scaling for SGD.
+    """Implements layer-wise adaptive rate scaling for SGD.
    Args:
-        params (iterable): iterable of parameters to optimize or dicts defining
+        params (iterable): Iterable of parameters to optimize or dicts defining
-            parameter groups
+            parameter groups.
-        lr (float): base learning rate (\gamma_0)
+        lr (float): Base learning rate.
-        momentum (float, optional): momentum factor (default: 0) ("m")
+        momentum (float, optional): Momentum factor. Defaults to 0 ('m')
-        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
+        weight_decay (float, optional): Weight decay (L2 penalty).
-            ("\beta")
+            Defaults to 0. ('beta')
-        dampening (float, optional): dampening for momentum (default: 0)
+        dampening (float, optional): Dampening for momentum. Defaults to 0.
-        eta (float, optional): LARS coefficient
+        eta (float, optional): LARS coefficient. Defaults to 0.001.
-        nesterov (bool, optional): enables Nesterov momentum (default: False)
+        nesterov (bool, optional): Enables Nesterov momentum.
            Defaults to False.
        eps (float, optional): A small number to avoid dviding zero.
            Defaults to 1e-8.
    Based on Algorithm 1 of the following paper by You, Gitman, and Ginsburg.
-    Large Batch Training of Convolutional Networks:
+    `Large Batch Training of Convolutional Networks:
-        https://arxiv.org/abs/1708.03888
+        <https://arxiv.org/abs/1708.03888>`_.
    Example:
        >>> optimizer = LARS(model.parameters(), lr=0.1, momentum=0.9,
@ -33,26 +39,32 @@ class LARS(Optimizer):
                 params,
                 lr=required,
                 momentum=0,
                 dampening=0,
                 weight_decay=0,
                 dampening=0,
                 eta=0.001,
-                 nesterov=False):
+                 nesterov=False,
                 eps=1e-8):
        if lr is not required and lr < 0.0:
-            raise ValueError("Invalid learning rate: {}".format(lr))
+            raise ValueError(f'Invalid learning rate: {lr}')
        if momentum < 0.0:
-            raise ValueError("Invalid momentum value: {}".format(momentum))
+            raise ValueError(f'Invalid momentum value: {momentum}')
        if weight_decay < 0.0:
-            raise ValueError(
+            raise ValueError(f'Invalid weight_decay value: {weight_decay}')
                "Invalid weight_decay value: {}".format(weight_decay))
        if eta < 0.0:
-            raise ValueError("Invalid LARS coefficient value: {}".format(eta))
+            raise ValueError(f'Invalid LARS coefficient value: {eta}')
        defaults = dict(
-            lr=lr, momentum=momentum, dampening=dampening,
+            lr=lr,
-            weight_decay=weight_decay, nesterov=nesterov, eta=eta)
+            momentum=momentum,
            dampening=dampening,
            weight_decay=weight_decay,
            nesterov=nesterov,
            eta=eta)
        if nesterov and (momentum <= 0 or dampening != 0):
-            raise ValueError("Nesterov momentum requires a momentum and zero dampening")
+            raise ValueError(
                'Nesterov momentum requires a momentum and zero dampening')
        self.eps = eps
        super(LARS, self).__init__(params, defaults)
    def __setstate__(self, state):
@ -93,9 +105,12 @@ class LARS(Optimizer):
                else:
                    weight_norm = torch.norm(p).item()
                    grad_norm = torch.norm(d_p).item()
-                    # Compute local learning rate for this layer
+                    if weight_norm != 0 and grad_norm != 0:
-                    local_lr = eta * weight_norm / \
+                        # Compute local learning rate for this layer
-                        (grad_norm + weight_decay * weight_norm)
+                        local_lr = eta * weight_norm / \
                            (grad_norm + weight_decay * weight_norm + self.eps)
                    else:
                        local_lr = 1.
                actual_lr = local_lr * lr
                d_p = d_p.add(p, alpha=weight_decay).mul(actual_lr)
--- a/mmselfsup/hooks/init.py
+++ b/mmselfsup/hooks/init.py
@ -1,8 +0,0 @@
 from .builder import build_hook
 from .byol_hook import BYOLHook
 from .deepcluster_hook import DeepClusterHook
 from .odc_hook import ODCHook
 from .optimizer_hook import DistOptimizerHook
 from .extractor import Extractor
 from .validate_hook import ValidateHook
 from .registry import HOOKS
--- a/mmselfsup/hooks/builder.py
+++ b/mmselfsup/hooks/builder.py
@ -1,7 +0,0 @@
 from openselfsup.utils import build_from_cfg
 from .registry import HOOKS
 def build_hook(cfg, default_args=None):
    return build_from_cfg(cfg, HOOKS, default_args)
--- a/mmselfsup/hooks/extractor.py
+++ b/mmselfsup/hooks/extractor.py
@ -1,61 +0,0 @@
 import torch.nn as nn
 from torch.utils.data import Dataset
 from openselfsup.utils import nondist_forward_collect, dist_forward_collect
 class Extractor(object):
    """Feature extractor.
    Args:
        dataset (Dataset | dict): A PyTorch dataset or dict that indicates
            the dataset.
        imgs_per_gpu (int): Number of images on each GPU, i.e., batch size of
            each GPU.
        workers_per_gpu (int): How many subprocesses to use for data loading
            for each GPU.
        dist_mode (bool): Use distributed extraction or not. Default: False.
    """
    def __init__(self,
                 dataset,
                 imgs_per_gpu,
                 workers_per_gpu,
                 dist_mode=False):
        from openselfsup import datasets
        if isinstance(dataset, Dataset):
            self.dataset = dataset
        elif isinstance(dataset, dict):
            self.dataset = datasets.build_dataset(dataset)
        else:
            raise TypeError(
                'dataset must be a Dataset object or a dict, not {}'.format(
                    type(dataset)))
        self.data_loader = datasets.build_dataloader(
            self.dataset,
            imgs_per_gpu,
            workers_per_gpu,
            dist=dist_mode,
            shuffle=False)
        self.dist_mode = dist_mode
        self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
    def _forward_func(self, runner, **x):
        backbone_feat = runner.model(mode='extract', **x)
        last_layer_feat = runner.model.module.neck([backbone_feat[-1]])[0]
        last_layer_feat = last_layer_feat.view(last_layer_feat.size(0), -1)
        return dict(feature=last_layer_feat.cpu())
    def __call__(self, runner):
        func = lambda **x: self._forward_func(runner, **x)
        if self.dist_mode:
            feats = dist_forward_collect(
                func,
                self.data_loader,
                runner.rank,
                len(self.dataset),
                ret_rank=-1)['feature']  # NxD
        else:
            feats = nondist_forward_collect(func, self.data_loader,
                                            len(self.dataset))['feature']
        return feats
--- a/mmselfsup/hooks/optimizer_hook.py
+++ b/mmselfsup/hooks/optimizer_hook.py
@ -1,31 +0,0 @@
 from mmcv.runner import OptimizerHook
 try:
    import apex
 except:
    print('apex is not installed')
 class DistOptimizerHook(OptimizerHook):
    """Optimizer hook for distributed training."""
    def __init__(self, update_interval=1, grad_clip=None, coalesce=True, bucket_size_mb=-1, use_fp16=False):
        self.grad_clip = grad_clip
        self.coalesce = coalesce
        self.bucket_size_mb = bucket_size_mb
        self.update_interval = update_interval
        self.use_fp16 = use_fp16
    def before_run(self, runner):
        runner.optimizer.zero_grad()
    def after_train_iter(self, runner):
        runner.outputs['loss'] /= self.update_interval
        if self.use_fp16:
            with apex.amp.scale_loss(runner.outputs['loss'], runner.optimizer) as scaled_loss:
                scaled_loss.backward()
        else:
            runner.outputs['loss'].backward()
        if self.every_n_iters(runner, self.update_interval):
            if self.grad_clip is not None:
                self.clip_grads(runner.model.parameters())
            runner.optimizer.step()
            runner.optimizer.zero_grad()
--- a/mmselfsup/hooks/registry.py
+++ b/mmselfsup/hooks/registry.py
@ -1,3 +0,0 @@
 from openselfsup.utils import Registry
 HOOKS = Registry('hook')
--- a/mmselfsup/hooks/validate_hook.py
+++ b/mmselfsup/hooks/validate_hook.py
@ -1,86 +0,0 @@
 from mmcv.runner import Hook
 import torch
 from torch.utils.data import Dataset
 from openselfsup.utils import nondist_forward_collect, dist_forward_collect
 from .registry import HOOKS
@HOOKS.register_module
 class ValidateHook(Hook):
    """Validation hook.
    Args:
        dataset (Dataset | dict): A PyTorch dataset or dict that indicates
            the dataset.
        dist_mode (bool): Use distributed evaluation or not. Default: True.
        initial (bool): Whether to evaluate before the training starts.
            Default: True.
        interval (int): Evaluation interval (by epochs). Default: 1.
        **eval_kwargs: Evaluation arguments fed into the evaluate function of
            the dataset.
    """
    def __init__(self,
                 dataset,
                 dist_mode=True,
                 initial=True,
                 interval=1,
                 **eval_kwargs):
        from openselfsup import datasets
        if isinstance(dataset, Dataset):
            self.dataset = dataset
        elif isinstance(dataset, dict):
            self.dataset = datasets.build_dataset(dataset)
        else:
            raise TypeError(
                'dataset must be a Dataset object or a dict, not {}'.format(
                    type(dataset)))
        self.data_loader = datasets.build_dataloader(
            self.dataset,
            eval_kwargs['imgs_per_gpu'],
            eval_kwargs['workers_per_gpu'],
            dist=dist_mode,
            shuffle=False,
            prefetch=eval_kwargs.get('prefetch', False),
            img_norm_cfg=eval_kwargs.get('img_norm_cfg', dict()),
        )
        self.dist_mode = dist_mode
        self.initial = initial
        self.interval = interval
        self.eval_kwargs = eval_kwargs
    def before_run(self, runner):
        if self.initial:
            self._run_validate(runner)
    def after_train_epoch(self, runner):
        if not self.every_n_epochs(runner, self.interval):
            return
        self._run_validate(runner)
    def _run_validate(self, runner):
        runner.model.eval()
        func = lambda **x: runner.model(mode='test', **x)
        if self.dist_mode:
            results = dist_forward_collect(
                func, self.data_loader, runner.rank,
                len(self.dataset))  # dict{key: np.ndarray}
        else:
            results = nondist_forward_collect(func, self.data_loader,
                                              len(self.dataset))
        if runner.rank == 0:
            for name, val in results.items():
                self._evaluate(runner, torch.from_numpy(val), name)
        runner.model.train()
    def _evaluate(self, runner, results, keyword):
        eval_res = self.dataset.evaluate(
            results,
            keyword=keyword,
            logger=runner.logger,
            **self.eval_kwargs['eval_param'])
        for name, val in eval_res.items():
            runner.log_buffer.output[name] = val
        runner.log_buffer.ready = True
--- a/mmselfsup/models/init.py
+++ b/mmselfsup/models/init.py
@ -1,15 +0,0 @@
 from .backbones import *  # noqa: F401,F403
 from .builder import (build_backbone, build_model, build_head, build_loss)
 from .byol import BYOL
 from .heads import *
 from .classification import Classification
 from .deepcluster import DeepCluster
 from .odc import ODC
 from .necks import *
 from .npid import NPID
 from .memories import *
 from .moco import MOCO
 from .registry import (BACKBONES, MODELS, NECKS, MEMORIES, HEADS, LOSSES)
 from .rotation_pred import RotationPred
 from .relative_loc import RelativeLoc
 from .simclr import SimCLR
--- a/mmselfsup/models/backbones/init.py
+++ b/mmselfsup/models/backbones/init.py
@ -1 +0,0 @@
 from .resnet import ResNet, make_res_layer
--- a/Show More
+++ b/Show More
		`@ -1,3 +0,0 @@`
			`from openselfsup.utils import Registry`

			`HOOKS = Registry('hook')`