Merge pull request #2 from Jiahao000/xjh_dev

[Refactor] refactor models, benchmarks, tools and other miscs, etc.
2025-06-03 14:59:38 +08:00 · 2021-12-15 18:56:38 +08:00 · 2021-12-15 18:56:38 +08:00 · 00e51990fb
commit 00e51990fb
parent 86bf9bce47 dfaa8215ae
166 changed files with 893 additions and 9098 deletions
--- a/.gitignore
+++ b/.gitignore
@ -2,6 +2,7 @@
 __pycache__/
 *.py[cod]
 *$py.class
+**/*.pyc

 # C extensions
 *.so
@ -103,22 +104,16 @@ venv.bak/
 # mypy
 .mypy_cache/

-openselfsup/version.py
-version.py
-data
+# custom
+/data
 .vscode
 .idea
-
-# custom
 *.pkl
 *.pkl.json
 *.log.json
 work_dirs/
+/mmselfsup/.mim
 pretrains
-
-# Pytorch
-*.pth
-
 *.swp
 source.sh
 tensorboard.sh
@ -126,3 +121,6 @@ tensorboard.sh
 replace.sh
 benchmarks/detection/datasets
 benchmarks/detection/output
+
+# Pytorch
+*.pth
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -0,0 +1,46 @@
+exclude: ^tests/data/
+repos:
+  - repo: https://gitlab.com/pycqa/flake8.git
+    rev: 3.8.3
+    hooks:
+      - id: flake8
+  - repo: https://github.com/asottile/seed-isort-config
+    rev: v2.2.0
+    hooks:
+      - id: seed-isort-config
+  - repo: https://github.com/timothycrosley/isort
+    rev: 4.3.21
+    hooks:
+      - id: isort
+  - repo: https://github.com/pre-commit/mirrors-yapf
+    rev: v0.30.0
+    hooks:
+      - id: yapf
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v3.1.0
+    hooks:
+      - id: trailing-whitespace
+      - id: check-yaml
+        exclude: configs/benchmarks/detectron2/Base-RetinaNet.yaml
+      - id: end-of-file-fixer
+      - id: requirements-txt-fixer
+      - id: double-quote-string-fixer
+      - id: check-merge-conflict
+      - id: fix-encoding-pragma
+        args: ["--remove"]
+      - id: mixed-line-ending
+        args: ["--fix=lf"]
+  - repo: https://github.com/markdownlint/markdownlint
+    rev: v0.11.0
+    hooks:
+      - id: markdownlint
+        args: ["-r", "~MD002,~MD013,~MD024,~MD029,~MD033,~MD034,~MD036", "-t", "allow_different_nesting"]
+  - repo: https://github.com/codespell-project/codespell
+    rev: v2.1.0
+    hooks:
+      - id: codespell
+  - repo: https://github.com/myint/docformatter
+    rev: v1.3.1
+    hooks:
+      - id: docformatter
+        args: ["--in-place", "--wrap-descriptions", "79"]
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@ -0,0 +1,9 @@
+version: 2
+
+formats: all
+
+python:
+  version: 3.7
+  install:
+    - requirements: requirements/docs.txt
+    - requirements: requirements/readthedocs.txt
--- a/benchmarks/detection/README.md
+++ b/benchmarks/detection/README.md
@ -1,12 +0,0 @@
-
-## Transferring to Detection
-
-We follow the evaluation setting in MoCo when trasferring to object detection.
-
-### Instruction
-
-1. Install [detectron2](https://github.com/facebookresearch/detectron2/blob/master/INSTALL.md).
-
-1. Put dataset under "benchmarks/detection/datasets" directory,
-   following the [directory structure](https://github.com/facebookresearch/detectron2/tree/master/datasets)
-	 requried by detectron2.
--- a/benchmarks/detection/configs/Base-Keypoint-RCNN-FPN.yaml
+++ b/benchmarks/detection/configs/Base-Keypoint-RCNN-FPN.yaml
@ -1,15 +0,0 @@
-_BASE_: "Base-RCNN-FPN.yaml"
-MODEL:
-  KEYPOINT_ON: True
-  ROI_HEADS:
-    NUM_CLASSES: 1
-  ROI_BOX_HEAD:
-    SMOOTH_L1_BETA: 0.5  # Keypoint AP degrades (though box AP improves) when using plain L1 loss
-  RPN:
-    # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2.
-    # 1000 proposals per-image is found to hurt box AP.
-    # Therefore we increase it to 1500 per-image.
-    POST_NMS_TOPK_TRAIN: 1500
-DATASETS:
-  TRAIN: ("keypoints_coco_2017_train",)
-  TEST: ("keypoints_coco_2017_val",)
--- a/benchmarks/detection/configs/Base-RCNN-C4-BN.yaml
+++ b/benchmarks/detection/configs/Base-RCNN-C4-BN.yaml
@ -1,17 +0,0 @@
-MODEL:
-  META_ARCHITECTURE: "GeneralizedRCNN"
-  RPN:
-    PRE_NMS_TOPK_TEST: 6000
-    POST_NMS_TOPK_TEST: 1000
-  ROI_HEADS:
-    NAME: "Res5ROIHeadsExtraNorm"
-  BACKBONE:
-    FREEZE_AT: 0
-  RESNETS:
-    NORM: "SyncBN"
-TEST:
-  PRECISE_BN:
-    ENABLED: True
-SOLVER:
-  IMS_PER_BATCH: 16
-  BASE_LR: 0.02
--- a/benchmarks/detection/configs/Base-RCNN-FPN.yaml
+++ b/benchmarks/detection/configs/Base-RCNN-FPN.yaml
@ -1,42 +0,0 @@
-MODEL:
-  META_ARCHITECTURE: "GeneralizedRCNN"
-  BACKBONE:
-    NAME: "build_resnet_fpn_backbone"
-  RESNETS:
-    OUT_FEATURES: ["res2", "res3", "res4", "res5"]
-  FPN:
-    IN_FEATURES: ["res2", "res3", "res4", "res5"]
-  ANCHOR_GENERATOR:
-    SIZES: [[32], [64], [128], [256], [512]]  # One size for each in feature map
-    ASPECT_RATIOS: [[0.5, 1.0, 2.0]]  # Three aspect ratios (same for all in feature maps)
-  RPN:
-    IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
-    PRE_NMS_TOPK_TRAIN: 2000  # Per FPN level
-    PRE_NMS_TOPK_TEST: 1000  # Per FPN level
-    # Detectron1 uses 2000 proposals per-batch,
-    # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
-    # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
-    POST_NMS_TOPK_TRAIN: 1000
-    POST_NMS_TOPK_TEST: 1000
-  ROI_HEADS:
-    NAME: "StandardROIHeads"
-    IN_FEATURES: ["p2", "p3", "p4", "p5"]
-  ROI_BOX_HEAD:
-    NAME: "FastRCNNConvFCHead"
-    NUM_FC: 2
-    POOLER_RESOLUTION: 7
-  ROI_MASK_HEAD:
-    NAME: "MaskRCNNConvUpsampleHead"
-    NUM_CONV: 4
-    POOLER_RESOLUTION: 14
-DATASETS:
-  TRAIN: ("coco_2017_train",)
-  TEST: ("coco_2017_val",)
-SOLVER:
-  IMS_PER_BATCH: 16
-  BASE_LR: 0.02
-  STEPS: (60000, 80000)
-  MAX_ITER: 90000
-INPUT:
-  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
-VERSION: 2
--- a/benchmarks/detection/configs/Base-RetinaNet.yaml
+++ b/benchmarks/detection/configs/Base-RetinaNet.yaml
@ -1,25 +0,0 @@
-MODEL:
-  META_ARCHITECTURE: "RetinaNet"
-  BACKBONE:
-    NAME: "build_retinanet_resnet_fpn_backbone"
-  RESNETS:
-    OUT_FEATURES: ["res3", "res4", "res5"]
-  ANCHOR_GENERATOR:
-    SIZES: !!python/object/apply:eval ["[[x, x * 2**(1.0/3), x * 2**(2.0/3) ] for x in [32, 64, 128, 256, 512 ]]"]
-  FPN:
-    IN_FEATURES: ["res3", "res4", "res5"]
-  RETINANET:
-    IOU_THRESHOLDS: [0.4, 0.5]
-    IOU_LABELS: [0, -1, 1]
-    SMOOTH_L1_LOSS_BETA: 0.0
-DATASETS:
-  TRAIN: ("coco_2017_train",)
-  TEST: ("coco_2017_val",)
-SOLVER:
-  IMS_PER_BATCH: 16
-  BASE_LR: 0.01  # Note that RetinaNet uses a different default learning rate
-  STEPS: (60000, 80000)
-  MAX_ITER: 90000
-INPUT:
-  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
-VERSION: 2
--- a/benchmarks/detection/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml
+++ b/benchmarks/detection/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml
@ -1,30 +0,0 @@
-_BASE_: "../Base-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  MASK_ON: True
-  ROI_HEADS:
-    NUM_CLASSES: 8
-  BACKBONE:
-    FREEZE_AT: 0
-  RESNETS:
-    DEPTH: 50
-    NORM: "SyncBN"
-  FPN:
-    NORM: "SyncBN"
-INPUT:
-  MIN_SIZE_TRAIN: (800, 832, 864, 896, 928, 960, 992, 1024)
-  MIN_SIZE_TRAIN_SAMPLING: "choice"
-  MIN_SIZE_TEST: 1024
-  MAX_SIZE_TRAIN: 2048
-  MAX_SIZE_TEST: 2048
-DATASETS:
-  TRAIN: ("cityscapes_fine_instance_seg_train",)
-  TEST: ("cityscapes_fine_instance_seg_val",)
-SOLVER:
-  BASE_LR: 0.01
-  STEPS: (18000,)
-  MAX_ITER: 24000
-  IMS_PER_BATCH: 8
-TEST:
-  PRECISE_BN:
-    ENABLED: True
--- a/benchmarks/detection/configs/Cityscapes/mask_rcnn_R_50_FPN_moco.yaml
+++ b/benchmarks/detection/configs/Cityscapes/mask_rcnn_R_50_FPN_moco.yaml
@ -1,9 +0,0 @@
-_BASE_: "mask_rcnn_R_50_FPN.yaml"
-MODEL:
-  PIXEL_MEAN: [123.675, 116.280, 103.530]
-  PIXEL_STD: [58.395, 57.120, 57.375]
-  WEIGHTS: "See Instructions"
-  RESNETS:
-    STRIDE_IN_1X1: False
-INPUT:
-  FORMAT: "RGB"
--- a/benchmarks/detection/configs/coco_R_50_C4_1x.yaml
+++ b/benchmarks/detection/configs/coco_R_50_C4_1x.yaml
@ -1,4 +0,0 @@
-_BASE_: "coco_R_50_C4_2x.yaml"
-SOLVER:
-  STEPS: (60000, 80000)
-  MAX_ITER: 90000
--- a/benchmarks/detection/configs/coco_R_50_C4_1x_moco.yaml
+++ b/benchmarks/detection/configs/coco_R_50_C4_1x_moco.yaml
@ -1,4 +0,0 @@
-_BASE_: "coco_R_50_C4_2x_moco.yaml"
-SOLVER:
-  STEPS: (60000, 80000)
-  MAX_ITER: 90000
--- a/benchmarks/detection/configs/coco_R_50_C4_2x.yaml
+++ b/benchmarks/detection/configs/coco_R_50_C4_2x.yaml
@ -1,13 +0,0 @@
-_BASE_: "Base-RCNN-C4-BN.yaml"
-MODEL:
-  MASK_ON: True
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-INPUT:
-  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
-  MIN_SIZE_TEST: 800
-DATASETS:
-  TRAIN: ("coco_2017_train",)
-  TEST: ("coco_2017_val",)
-SOLVER:
-  STEPS: (120000, 160000)
-  MAX_ITER: 180000
--- a/benchmarks/detection/configs/coco_R_50_C4_2x_moco.yaml
+++ b/benchmarks/detection/configs/coco_R_50_C4_2x_moco.yaml
@ -1,10 +0,0 @@
-_BASE_: "coco_R_50_C4_2x.yaml"
-MODEL:
-  PIXEL_MEAN: [123.675, 116.280, 103.530]
-  PIXEL_STD: [58.395, 57.120, 57.375]
-  WEIGHTS: "See Instructions"
-  RESNETS:
-    STRIDE_IN_1X1: False
-INPUT:
-  MAX_SIZE_TRAIN: 1200
-  FORMAT: "RGB"
--- a/benchmarks/detection/configs/coco_R_50_FPN_1x.yaml
+++ b/benchmarks/detection/configs/coco_R_50_FPN_1x.yaml
@ -1,17 +0,0 @@
-_BASE_: "Base-RCNN-FPN.yaml"
-MODEL:
-  MASK_ON: True
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  BACKBONE:
-    FREEZE_AT: 0
-  RESNETS:
-    DEPTH: 50
-    NORM: "SyncBN"
-  FPN:
-    NORM: "SyncBN"
-TEST:
-  PRECISE_BN:
-    ENABLED: True
-SOLVER:
-  STEPS: (60000, 80000)
-  MAX_ITER: 90000
--- a/benchmarks/detection/configs/coco_R_50_FPN_1x_moco.yaml
+++ b/benchmarks/detection/configs/coco_R_50_FPN_1x_moco.yaml
@ -1,9 +0,0 @@
-_BASE_: "coco_R_50_FPN_1x.yaml"
-MODEL:
-  PIXEL_MEAN: [123.675, 116.280, 103.530]
-  PIXEL_STD: [58.395, 57.120, 57.375]
-  WEIGHTS: "See Instructions"
-  RESNETS:
-    STRIDE_IN_1X1: False
-INPUT:
-  FORMAT: "RGB"
--- a/benchmarks/detection/configs/coco_R_50_FPN_2x.yaml
+++ b/benchmarks/detection/configs/coco_R_50_FPN_2x.yaml
@ -1,4 +0,0 @@
-_BASE_: "coco_R_50_FPN_1x.yaml"
-SOLVER:
-  STEPS: (120000, 160000)
-  MAX_ITER: 180000
--- a/benchmarks/detection/configs/coco_R_50_FPN_2x_moco.yaml
+++ b/benchmarks/detection/configs/coco_R_50_FPN_2x_moco.yaml
@ -1,4 +0,0 @@
-_BASE_: "coco_R_50_FPN_1x_moco.yaml"
-SOLVER:
-  STEPS: (120000, 160000)
-  MAX_ITER: 180000
--- a/benchmarks/detection/configs/coco_R_50_RetinaNet_1x.yaml
+++ b/benchmarks/detection/configs/coco_R_50_RetinaNet_1x.yaml
@ -1,13 +0,0 @@
-_BASE_: "Base-RetinaNet.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  BACKBONE:
-    FREEZE_AT: 0
-  RESNETS:
-    DEPTH: 50
-    NORM: "SyncBN"
-  FPN:
-    NORM: "SyncBN"
-TEST:
-  PRECISE_BN:
-    ENABLED: True
--- a/benchmarks/detection/configs/coco_R_50_RetinaNet_1x_moco.yaml
+++ b/benchmarks/detection/configs/coco_R_50_RetinaNet_1x_moco.yaml
@ -1,9 +0,0 @@
-_BASE_: "coco_R_50_RetinaNet_1x.yaml"
-MODEL:
-  PIXEL_MEAN: [123.675, 116.280, 103.530]
-  PIXEL_STD: [58.395, 57.120, 57.375]
-  WEIGHTS: "See Instructions"
-  RESNETS:
-    STRIDE_IN_1X1: False
-INPUT:
-  FORMAT: "RGB"
--- a/benchmarks/detection/configs/coco_R_50_RetinaNet_2x.yaml
+++ b/benchmarks/detection/configs/coco_R_50_RetinaNet_2x.yaml
@ -1,4 +0,0 @@
-_BASE_: "coco_R_50_RetinaNet_1x.yaml"
-SOLVER:
-  STEPS: (120000, 160000)
-  MAX_ITER: 180000
--- a/benchmarks/detection/configs/coco_R_50_RetinaNet_2x_moco.yaml
+++ b/benchmarks/detection/configs/coco_R_50_RetinaNet_2x_moco.yaml
@ -1,4 +0,0 @@
-_BASE_: "coco_R_50_RetinaNet_1x_moco.yaml"
-SOLVER:
-  STEPS: (120000, 160000)
-  MAX_ITER: 180000
--- a/benchmarks/detection/configs/keypoint_rcnn_R_50_FPN_2x.yaml
+++ b/benchmarks/detection/configs/keypoint_rcnn_R_50_FPN_2x.yaml
@ -1,16 +0,0 @@
-_BASE_: "Base-Keypoint-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  BACKBONE:
-    FREEZE_AT: 0
-  RESNETS:
-    DEPTH: 50
-    NORM: "SyncBN"
-  FPN:
-    NORM: "SyncBN"
-TEST:
-  PRECISE_BN:
-    ENABLED: True
-SOLVER:
-  STEPS: (120000, 160000)
-  MAX_ITER: 180000
--- a/benchmarks/detection/configs/keypoint_rcnn_R_50_FPN_2x_moco.yaml
+++ b/benchmarks/detection/configs/keypoint_rcnn_R_50_FPN_2x_moco.yaml
@ -1,9 +0,0 @@
-_BASE_: "keypoint_rcnn_R_50_FPN_2x.yaml"
-MODEL:
-  PIXEL_MEAN: [123.675, 116.280, 103.530]
-  PIXEL_STD: [58.395, 57.120, 57.375]
-  WEIGHTS: "See Instructions"
-  RESNETS:
-    STRIDE_IN_1X1: False
-INPUT:
-  FORMAT: "RGB"
--- a/benchmarks/detection/configs/pascal_voc_R_50_C4_24k.yaml
+++ b/benchmarks/detection/configs/pascal_voc_R_50_C4_24k.yaml
@ -1,16 +0,0 @@
-_BASE_: "Base-RCNN-C4-BN.yaml"
-MODEL:
-  MASK_ON: False
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  ROI_HEADS:
-    NUM_CLASSES: 20
-INPUT:
-  MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800)
-  MIN_SIZE_TEST: 800
-DATASETS:
-  TRAIN: ('voc_2007_trainval', 'voc_2012_trainval')
-  TEST: ('voc_2007_test',)
-SOLVER:
-  STEPS: (18000, 22000)
-  MAX_ITER: 24000
-  WARMUP_ITERS: 100
--- a/benchmarks/detection/configs/pascal_voc_R_50_C4_24k_moco.yaml
+++ b/benchmarks/detection/configs/pascal_voc_R_50_C4_24k_moco.yaml
@ -1,9 +0,0 @@
-_BASE_: "pascal_voc_R_50_C4_24k.yaml"
-MODEL:
-  PIXEL_MEAN: [123.675, 116.280, 103.530]
-  PIXEL_STD: [58.395, 57.120, 57.375]
-  WEIGHTS: "See Instructions"
-  RESNETS:
-    STRIDE_IN_1X1: False
-INPUT:
-  FORMAT: "RGB"
--- a/benchmarks/detection/convert-pretrain-to-detectron2.py
+++ b/benchmarks/detection/convert-pretrain-to-detectron2.py
@ -1,36 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-import pickle as pkl
-import sys
-import torch
-
-if __name__ == "__main__":
-    input = sys.argv[1]
-
-    obj = torch.load(input, map_location="cpu")
-    obj = obj["state_dict"]
-
-    newmodel = {}
-    for k, v in obj.items():
-        old_k = k
-        if "layer" not in k:
-            k = "stem." + k
-        for t in [1, 2, 3, 4]:
-            k = k.replace("layer{}".format(t), "res{}".format(t + 1))
-        for t in [1, 2, 3]:
-            k = k.replace("bn{}".format(t), "conv{}.norm".format(t))
-        k = k.replace("downsample.0", "shortcut")
-        k = k.replace("downsample.1", "shortcut.norm")
-        print(old_k, "->", k)
-        newmodel[k] = v.numpy()
-
-    res = {
-        "model": newmodel,
-        "__author__": "OpenSelfSup",
-        "matching_heuristics": True
-    }
-
-    assert sys.argv[2].endswith('.pkl')
-    with open(sys.argv[2], "wb") as f:
-        pkl.dump(res, f)
--- a/benchmarks/detection/run.sh
+++ b/benchmarks/detection/run.sh
@ -1,6 +0,0 @@
-#!/bin/bash
-DET_CFG=$1
-WEIGHTS=$2
-
-python $(dirname "$0")/train_net.py --config-file $DET_CFG \
-    --num-gpus 8 MODEL.WEIGHTS $WEIGHTS
--- a/benchmarks/detection/train_net.py
+++ b/benchmarks/detection/train_net.py
@ -1,77 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-
-import os
-
-from detectron2.checkpoint import DetectionCheckpointer
-from detectron2.config import get_cfg
-from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, launch
-from detectron2.evaluation import COCOEvaluator, PascalVOCDetectionEvaluator
-from detectron2.layers import get_norm
-from detectron2.modeling.roi_heads import ROI_HEADS_REGISTRY, Res5ROIHeads
-
-
-@ROI_HEADS_REGISTRY.register()
-class Res5ROIHeadsExtraNorm(Res5ROIHeads):
-    """
-    As described in the MOCO paper, there is an extra BN layer
-    following the res5 stage.
-    """
-
-    def _build_res5_block(self, cfg):
-        seq, out_channels = super()._build_res5_block(cfg)
-        norm = cfg.MODEL.RESNETS.NORM
-        norm = get_norm(norm, out_channels)
-        seq.add_module("norm", norm)
-        return seq, out_channels
-
-
-class Trainer(DefaultTrainer):
-
-    @classmethod
-    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
-        if output_folder is None:
-            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
-        if "coco" in dataset_name:
-            return COCOEvaluator(dataset_name, cfg, True, output_folder)
-        else:
-            assert "voc" in dataset_name
-            return PascalVOCDetectionEvaluator(dataset_name)
-
-
-def setup(args):
-    cfg = get_cfg()
-    cfg.merge_from_file(args.config_file)
-    cfg.merge_from_list(args.opts)
-    cfg.freeze()
-    default_setup(cfg, args)
-    return cfg
-
-
-def main(args):
-    cfg = setup(args)
-
-    if args.eval_only:
-        model = Trainer.build_model(cfg)
-        DetectionCheckpointer(
-            model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
-                cfg.MODEL.WEIGHTS, resume=args.resume)
-        res = Trainer.test(cfg, model)
-        return res
-
-    trainer = Trainer(cfg)
-    trainer.resume_or_load(resume=args.resume)
-    return trainer.train()
-
-
-if __name__ == "__main__":
-    args = default_argument_parser().parse_args()
-    print("Command Line Args:", args)
-    launch(
-        main,
-        args.num_gpus,
-        num_machines=args.num_machines,
-        machine_rank=args.machine_rank,
-        dist_url=args.dist_url,
-        args=(args, ),
-    )
--- a/benchmarks/dist_test_svm_epoch.sh
+++ b/benchmarks/dist_test_svm_epoch.sh
@ -1,28 +0,0 @@
-#!/bin/bash
-set -e
-set -x
-
-CFG=$1
-EPOCH=$2
-FEAT_LIST=$3 # e.g.: "feat5", "feat4 feat5". If leave empty, the default is "feat5"
-GPUS=${4:-8}
-WORK_DIR=$(echo ${CFG%.*} | sed -e "s/configs/work_dirs/g")/
-
-if [ "$CFG" == "" ] || [ "$EPOCH" == "" ]; then
-    echo "ERROR: Missing arguments."
-    exit
-fi
-
-if [ ! -f $WORK_DIR/epoch_${EPOCH}.pth ]; then
-    echo "ERROR: File not exist: $WORK_DIR/epoch_${EPOCH}.pth"
-    exit
-fi
-
-mkdir -p $WORK_DIR/logs
-echo "Testing checkpoint: $WORK_DIR/epoch_${EPOCH}.pth" 2>&1 | tee -a $WORK_DIR/logs/eval_svm.log
-
-bash tools/dist_extract.sh $CFG $GPUS $WORK_DIR --checkpoint $WORK_DIR/epoch_${EPOCH}.pth
-
-bash benchmarks/svm_tools/eval_svm_full.sh $WORK_DIR "$FEAT_LIST"
-
-bash benchmarks/svm_tools/eval_svm_lowshot.sh $WORK_DIR "$FEAT_LIST"
--- a/benchmarks/dist_test_svm_pretrain.sh
+++ b/benchmarks/dist_test_svm_pretrain.sh
@ -1,28 +0,0 @@
-#!/bin/bash
-set -e
-set -x
-
-CFG=$1
-PRETRAIN=$2 # pretrained model or "random" (random init)
-FEAT_LIST=$3 # e.g.: "feat5", "feat4 feat5". If leave empty, the default is "feat5"
-GPUS=${4:-8}
-WORK_DIR="$(echo ${CFG%.*} | sed -e "s/configs/work_dirs/g")/$(echo $PRETRAIN | rev | cut -d/ -f 1 | rev)"
-
-if [ "$CFG" == "" ] || [ "$PRETRAIN" == "" ]; then
-    echo "ERROR: Missing arguments."
-    exit
-fi
-
-if [ ! -f $PRETRAIN ] && [ "$PRETRAIN" != "random" ]; then
-    echo "ERROR: PRETRAIN should be a file or a string \"random\", got: $PRETRAIN"
-    exit
-fi
-
-mkdir -p $WORK_DIR/logs
-echo "Testing pretrain: $PRETRAIN" 2>&1 | tee -a $WORK_DIR/logs/eval_svm.log
-
-bash tools/dist_extract.sh $CFG $GPUS $WORK_DIR --pretrained $PRETRAIN
-
-bash benchmarks/svm_tools/eval_svm_full.sh $WORK_DIR "$FEAT_LIST"
-
-bash benchmarks/svm_tools/eval_svm_lowshot.sh $WORK_DIR "$FEAT_LIST"
--- a/benchmarks/dist_train_linear.sh
+++ b/benchmarks/dist_train_linear.sh
@ -1,24 +0,0 @@
-#!/usr/bin/env bash
-
-set -e
-set -x
-
-CFG=$1 # use cfgs under "configs/benchmarks/linear_classification/"
-PRETRAIN=$2
-PY_ARGS=${@:3} # --resume_from --deterministic
-GPUS=8 # When changing GPUS, please also change imgs_per_gpu in the config file accordingly to ensure the total batch size is 256.
-PORT=${PORT:-29500}
-
-if [ "$CFG" == "" ] || [ "$PRETRAIN" == "" ]; then
-    echo "ERROR: Missing arguments."
-    exit
-fi
-
-WORK_DIR="$(echo ${CFG%.*} | sed -e "s/configs/work_dirs/g")/$(echo $PRETRAIN | rev | cut -d/ -f 1 | rev)"
-
-# train
-python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
-    tools/train.py \
-    $CFG \
-    --pretrained $PRETRAIN \
-    --work_dir $WORK_DIR --seed 0 --launcher="pytorch" ${PY_ARGS}
--- a/benchmarks/dist_train_semi.sh
+++ b/benchmarks/dist_train_semi.sh
@ -1,24 +0,0 @@
-#!/usr/bin/env bash
-
-set -e
-set -x
-
-CFG=$1 # use cfgs under "configs/benchmarks/semi_classification/imagenet_*percent/"
-PRETRAIN=$2
-PY_ARGS=${@:3}
-GPUS=4 # in the standard setting, GPUS=4
-PORT=${PORT:-29500}
-
-if [ "$CFG" == "" ] || [ "$PRETRAIN" == "" ]; then
-    echo "ERROR: Missing arguments."
-    exit
-fi
-
-WORK_DIR="$(echo ${CFG%.*} | sed -e "s/configs/work_dirs/g")/$(echo $PRETRAIN | rev | cut -d/ -f 1 | rev)"
-
-# train
-python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
-    tools/train.py \
-    $CFG \
-    --pretrained $PRETRAIN \
-    --work_dir $WORK_DIR --seed 0 --launcher="pytorch" ${PY_ARGS}
--- a/benchmarks/extract_info/voc07.py
+++ b/benchmarks/extract_info/voc07.py
@ -1,20 +0,0 @@
-data_source_cfg = dict(type='ImageList', memcached=False, mclient_path=None)
-data_root = "data/VOCdevkit/VOC2007/JPEGImages"
-data_all_list = "data/VOCdevkit/VOC2007/Lists/trainvaltest.txt"
-split_at = [5011]
-split_name = ['voc07_trainval', 'voc07_test']
-img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
-
-data = dict(
-    imgs_per_gpu=32,
-    workers_per_gpu=2,
-    extract=dict(
-        type="ExtractDataset",
-        data_source=dict(
-            list_file=data_all_list, root=data_root, **data_source_cfg),
-        pipeline=[
-            dict(type='Resize', size=256),
-            dict(type='Resize', size=(224, 224)),
-            dict(type='ToTensor'),
-            dict(type='Normalize', **img_norm_cfg),
-        ]))
--- a/benchmarks/srun_test_svm_epoch.sh
+++ b/benchmarks/srun_test_svm_epoch.sh
@ -1,24 +0,0 @@
-#!/usr/bin/env bash
-set -e
-set -x
-
-PARTITION=$1
-CFG=$2
-EPOCH=$3
-FEAT_LIST=$4 # e.g.: "feat5", "feat4 feat5". If leave empty, the default is "feat5"
-GPUS=${5:-8}
-WORK_DIR=$(echo ${CFG%.*} | sed -e "s/configs/work_dirs/g")/
-
-if [ ! -f $WORK_DIR/epoch_${EPOCH}.pth ]; then
-    echo "ERROR: File not exist: $WORK_DIR/epoch_${EPOCH}.pth"
-    exit
-fi
-
-mkdir -p $WORK_DIR/logs
-echo "Testing checkpoint: $WORK_DIR/epoch_${EPOCH}.pth" 2>&1 | tee -a $WORK_DIR/logs/eval_svm.log
-
-bash tools/srun_extract.sh $PARTITION $CFG $GPUS $WORK_DIR --checkpoint $WORK_DIR/epoch_${EPOCH}.pth
-
-srun -p $PARTITION bash benchmarks/svm_tools/eval_svm_full.sh $WORK_DIR "$FEAT_LIST"
-
-srun -p $PARTITION bash benchmarks/svm_tools/eval_svm_lowshot.sh $WORK_DIR "$FEAT_LIST"
--- a/benchmarks/srun_test_svm_pretrain.sh
+++ b/benchmarks/srun_test_svm_pretrain.sh
@ -1,24 +0,0 @@
-#!/usr/bin/env bash
-set -e
-set -x
-
-PARTITION=$1
-CFG=$2
-PRETRAIN=$3 # pretrained model or "random" (random init)
-FEAT_LIST=$4 # e.g.: "feat5", "feat4 feat5". If leave empty, the default is "feat5"
-GPUS=${5:-8}
-WORK_DIR="$(echo ${CFG%.*} | sed -e "s/configs/work_dirs/g")/$(echo $PRETRAIN | rev | cut -d/ -f 1 | rev)"
-
-if [ ! -f $PRETRAIN ] and [ "$PRETRAIN" != "random" ]; then
-    echo "ERROR: PRETRAIN should be a file or a string \"random\", got: $PRETRAIN"
-    exit
-fi
-
-mkdir -p $WORK_DIR/logs
-echo "Testing pretrain: $PRETRAIN" 2>&1 | tee -a $WORK_DIR/logs/eval_svm.log
-
-bash tools/srun_extract.sh $PARTITION $CFG $GPUS $WORK_DIR --pretrained $PRETRAIN
-
-srun -p $PARTITION bash benchmarks/svm_tools/eval_svm_full.sh $WORK_DIR "$FEAT_LIST"
-
-srun -p $PARTITION bash benchmarks/svm_tools/eval_svm_lowshot.sh $WORK_DIR "$FEAT_LIST"
--- a/benchmarks/srun_train_linear.sh
+++ b/benchmarks/srun_train_linear.sh
@ -1,31 +0,0 @@
-#!/usr/bin/env bash
-
-set -e
-set -x
-
-PARTITION=$1
-CFG=$2
-PRETRAIN=$3
-PY_ARGS=${@:4}
-JOB_NAME="openselfsup"
-GPUS=8 # When changing GPUS, please also change imgs_per_gpu in the config file accordingly to ensure the total batch size is 256.
-GPUS_PER_NODE=${GPUS_PER_NODE:-8}
-CPUS_PER_TASK=${CPUS_PER_TASK:-5}
-SRUN_ARGS=${SRUN_ARGS:-""}
-
-WORK_DIR="$(echo ${CFG%.*} | sed -e "s/configs/work_dirs/g")/$(echo $PRETRAIN | rev | cut -d/ -f 1 | rev)"
-
-# train
-GLOG_vmodule=MemcachedClient=-1 \
-srun -p ${PARTITION} \
-    --job-name=${JOB_NAME} \
-    --gres=gpu:${GPUS_PER_NODE} \
-    --ntasks=${GPUS} \
-    --ntasks-per-node=${GPUS_PER_NODE} \
-    --cpus-per-task=${CPUS_PER_TASK} \
-    --kill-on-bad-exit=1 \
-    ${SRUN_ARGS} \
-    python -u tools/train.py \
-        $CFG \
-        --pretrained $PRETRAIN \
-        --work_dir $WORK_DIR --seed 0 --launcher="slurm" ${PY_ARGS}
--- a/benchmarks/srun_train_semi.sh
+++ b/benchmarks/srun_train_semi.sh
@ -1,31 +0,0 @@
-#!/usr/bin/env bash
-
-set -e
-set -x
-
-PARTITION=$1
-CFG=$2
-PRETRAIN=$3
-PY_ARGS=${@:4}
-JOB_NAME="openselfsup"
-GPUS=4 # in the standard setting, GPUS=4
-GPUS_PER_NODE=${GPUS_PER_NODE:-4}
-CPUS_PER_TASK=${CPUS_PER_TASK:-5}
-SRUN_ARGS=${SRUN_ARGS:-""}
-
-WORK_DIR="$(echo ${CFG%.*} | sed -e "s/configs/work_dirs/g")/$(echo $PRETRAIN | rev | cut -d/ -f 1 | rev)"
-
-# train
-GLOG_vmodule=MemcachedClient=-1 \
-srun -p ${PARTITION} \
-    --job-name=${JOB_NAME} \
-    --gres=gpu:${GPUS_PER_NODE} \
-    --ntasks=${GPUS} \
-    --ntasks-per-node=${GPUS_PER_NODE} \
-    --cpus-per-task=${CPUS_PER_TASK} \
-    --kill-on-bad-exit=1 \
-    ${SRUN_ARGS} \
-    python -u tools/train.py \
-        $CFG \
-        --pretrained $PRETRAIN \
-        --work_dir $WORK_DIR --seed 0 --launcher="slurm" ${PY_ARGS}
--- a/benchmarks/svm_tools/aggregate_low_shot_svm_stats.py
+++ b/benchmarks/svm_tools/aggregate_low_shot_svm_stats.py
@ -1,127 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-#
-################################################################################
-"""
-Aggregate the stats over various independent samples for low-shot svm training.
-Stats computed: mean, max, min, std
-
-Relevant transfer tasks: Low-shot Image Classification VOC07 and Places205 low
-shot samples.
-"""
-
-from __future__ import division
-from __future__ import absolute_import
-from __future__ import unicode_literals
-from __future__ import print_function
-
-import argparse
-import logging
-import numpy as np
-import os
-import sys
-
-# create the logger
-FORMAT = '[%(levelname)s: %(filename)s: %(lineno)4d]: %(message)s'
-logging.basicConfig(level=logging.INFO, format=FORMAT, stream=sys.stdout)
-logger = logging.getLogger(__name__)
-
-
-def save_stats(output_dir, stat, output):
-    out_file = os.path.join(output_dir, 'test_ap_{}.npy'.format(stat))
-    #logger.info('Saving {} to: {} {}'.format(stat, out_file, output.shape))
-    np.save(out_file, output)
-
-
-def aggregate_stats(opts):
-    k_values = [int(val) for val in opts.k_values.split(",")]
-    sample_inds = [int(val) for val in opts.sample_inds.split(",")]
-    #logger.info(
-    #    'Aggregating stats for k-values: {} and sample_inds: {}'.format(
-    #        k_values, sample_inds))
-
-    output_mean, output_max, output_min, output_std = [], [], [], []
-    for k_idx in range(len(k_values)):
-        k_low = k_values[k_idx]
-        k_val_output = []
-        for inds in range(len(sample_inds)):
-            sample_idx = sample_inds[inds]
-            file_name = 'test_ap_sample{}_k{}.npy'.format(
-                sample_idx + 1, k_low)
-            filepath = os.path.join(opts.output_path, file_name)
-            if os.path.exists(filepath):
-                k_val_output.append(np.load(filepath, encoding='latin1'))
-            else:
-                logger.info('file does not exist: {}'.format(filepath))
-        k_val_output = np.concatenate(k_val_output, axis=0)
-        k_low_max = np.max(
-            k_val_output, axis=0).reshape(-1, k_val_output.shape[1])
-        k_low_min = np.min(
-            k_val_output, axis=0).reshape(-1, k_val_output.shape[1])
-        k_low_mean = np.mean(
-            k_val_output, axis=0).reshape(-1, k_val_output.shape[1])
-        k_low_std = np.std(
-            k_val_output, axis=0).reshape(-1, k_val_output.shape[1])
-        output_mean.append(k_low_mean)
-        output_min.append(k_low_min)
-        output_max.append(k_low_max)
-        output_std.append(k_low_std)
-
-    output_mean = np.concatenate(output_mean, axis=0)
-    output_min = np.concatenate(output_min, axis=0)
-    output_max = np.concatenate(output_max, axis=0)
-    output_std = np.concatenate(output_std, axis=0)
-
-    save_stats(opts.output_path, 'mean', output_mean)
-    save_stats(opts.output_path, 'min', output_min)
-    save_stats(opts.output_path, 'max', output_max)
-    save_stats(opts.output_path, 'std', output_std)
-
-    argmax_cls = np.argmax(output_mean, axis=1)
-    argmax_mean, argmax_min, argmax_max, argmax_std = [], [], [], []
-    for idx in range(len(argmax_cls)):
-        argmax_mean.append(100.0 * output_mean[idx, argmax_cls[idx]])
-        argmax_min.append(100.0 * output_min[idx, argmax_cls[idx]])
-        argmax_max.append(100.0 * output_max[idx, argmax_cls[idx]])
-        argmax_std.append(100.0 * output_std[idx, argmax_cls[idx]])
-    for idx in range(len(argmax_max)):
-        logger.info('mean/min/max/std: {} / {} / {} / {}'.format(
-            round(argmax_mean[idx], 2),
-            round(argmax_min[idx], 2),
-            round(argmax_max[idx], 2),
-            round(argmax_std[idx], 2),
-        ))
-    #logger.info('All done!!')
-
-
-def main():
-    parser = argparse.ArgumentParser(description='Low shot SVM model test')
-    parser.add_argument(
-        '--output_path',
-        type=str,
-        default=None,
-        help="Numpy file containing test AP result files")
-    parser.add_argument(
-        '--k_values',
-        type=str,
-        default=None,
-        help="Low-shot k-values for svm testing. Comma separated")
-    parser.add_argument(
-        '--sample_inds',
-        type=str,
-        default=None,
-        help="sample_inds for which to test svm. Comma separated")
-    if len(sys.argv) == 1:
-        parser.print_help()
-        sys.exit(1)
-
-    opts = parser.parse_args()
-    #logger.info(opts)
-    aggregate_stats(opts)
-
-
-if __name__ == '__main__':
-    main()
--- a/benchmarks/svm_tools/eval_svm_full.sh
+++ b/benchmarks/svm_tools/eval_svm_full.sh
@ -1,40 +0,0 @@
-#!/bin/bash
-set -x
-set -e
-
-WORK_DIR=$1
-FEAT_LIST=${2:-"feat5"} # "feat1 feat2 feat3 feat4 feat5"
-TRAIN_SVM_FLAG=true
-TEST_SVM_FLAG=true
-DATA="data/VOCdevkit/VOC2007/SVMLabels"
-
-# config svm
-costs="1.0,10.0,100.0"
-
-for feat in $FEAT_LIST; do
-    echo "For feature: $feat" 2>&1 | tee -a $WORK_DIR/logs/eval_svm.log
-    # train svm
-    if $TRAIN_SVM_FLAG; then
-        rm -rf $WORK_DIR/svm
-        mkdir -p $WORK_DIR/svm/voc07_${feat}
-        echo "training svm ..."
-        python benchmarks/svm_tools/train_svm_kfold_parallel.py \
-            --data_file $WORK_DIR/features/voc07_trainval_${feat}.npy \
-            --targets_data_file $DATA/train_labels.npy \
-            --costs_list $costs \
-            --output_path $WORK_DIR/svm/voc07_${feat}
-    fi
-    
-    # test svm
-    if $TEST_SVM_FLAG; then
-        echo "testing svm ..."
-        python benchmarks/svm_tools/test_svm.py \
-            --data_file $WORK_DIR/features/voc07_test_${feat}.npy \
-            --json_targets $DATA/test_targets.json \
-            --targets_data_file $DATA/test_labels.npy \
-            --costs_list $costs \
-            --generate_json 1 \
-            --output_path $WORK_DIR/svm/voc07_${feat} 2>&1 | tee -a $WORK_DIR/logs/eval_svm.log
-    fi
-
-done
--- a/benchmarks/svm_tools/eval_svm_lowshot.sh
+++ b/benchmarks/svm_tools/eval_svm_lowshot.sh
@ -1,64 +0,0 @@
-#!/bin/bash
-set -x
-set -e
-
-WORK_DIR=$1
-MODE="full"
-FEAT_LIST=${2:-"feat5"} # "feat1 feat2 feat3 feat4 feat5"
-TRAIN_SVM_LOWSHOT_FLAG=true
-TEST_SVM_LOWSHOT_FLAG=true
-AGGREGATE_FLAG=true
-DATA="data/VOCdevkit/VOC2007/SVMLabels"
-
-# config svm
-costs="1.0,10.0,100.0"
-if [ "$MODE" == "fast" ]; then
-    shots="96"
-else
-    shots="1 2 4 8 16 32 64 96"
-fi
-
-for feat in $FEAT_LIST; do
-    echo "For feature: $feat" 2>&1 | tee -a $WORK_DIR/logs/eval_svm.log
-    # train lowshot svm
-    if $TRAIN_SVM_LOWSHOT_FLAG; then
-        rm -rf $WORK_DIR/svm_lowshot
-        mkdir -p $WORK_DIR/svm_lowshot/voc07_${feat}
-        echo "training svm low-shot ..."
-        for s in {1..5}; do
-            for k in $shots; do
-                echo -e "\ts${s} k${k}"
-                python benchmarks/svm_tools/train_svm_low_shot.py \
-                    --data_file $WORK_DIR/features/voc07_trainval_${feat}.npy \
-                    --targets_data_file $DATA/low_shot/labels/train_targets_sample${s}_k${k}.npy \
-                    --costs_list $costs \
-                    --output_path $WORK_DIR/svm_lowshot/voc07_${feat}
-            done
-        done
-    fi
-    
-    # test lowshot svm
-    if $TEST_SVM_LOWSHOT_FLAG; then
-        echo "testing svm low-shot ..."
-        python benchmarks/svm_tools/test_svm_low_shot.py \
-            --data_file $WORK_DIR/features/voc07_test_${feat}.npy \
-            --targets_data_file $DATA/test_labels.npy \
-            --json_targets $DATA/test_targets.json \
-            --generate_json 1 \
-            --costs_list $costs \
-            --output_path $WORK_DIR/svm_lowshot/voc07_${feat} \
-            --k_values "${shots// /,}" \
-            --sample_inds "0,1,2,3,4" \
-            --dataset "voc"
-    fi
-    
-    # aggregate testing results
-    if $AGGREGATE_FLAG; then
-        echo "aggregating svm low-shot ..."
-        python benchmarks/svm_tools/aggregate_low_shot_svm_stats.py \
-            --output_path $WORK_DIR/svm_lowshot/voc07_${feat} \
-            --k_values "${shots// /,}" \
-            --sample_inds "0,1,2,3,4" 2>&1 | tee -a $WORK_DIR/logs/eval_svm.log
-    fi
-
-done
--- a/benchmarks/svm_tools/svm_helper.py
+++ b/benchmarks/svm_tools/svm_helper.py
@ -1,171 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-#
-################################################################################
-"""
-Helper module for svm training and testing.
-"""
-
-from __future__ import division
-from __future__ import absolute_import
-from __future__ import unicode_literals
-from __future__ import print_function
-
-import logging
-import numpy as np
-import os
-import sys
-
-# create the logger
-FORMAT = '[%(levelname)s: %(filename)s: %(lineno)4d]: %(message)s'
-logging.basicConfig(level=logging.INFO, format=FORMAT, stream=sys.stdout)
-logger = logging.getLogger(__name__)
-
-
-# Python 2 and python 3 have different floating point precision. The following
-# trick helps keep the backwards compatibility.
-def py2_py3_compatible_cost(cost):
-    return str(float("{:.17f}".format(cost)))
-
-
-def get_svm_train_output_files(cls, cost, output_path):
-    cls_cost = str(cls) + '_cost' + py2_py3_compatible_cost(cost)
-    out_file = os.path.join(output_path, 'cls' + cls_cost + '.pickle')
-    ap_matrix_out_file = os.path.join(output_path,
-                                      'AP_cls' + cls_cost + '.npy')
-    return out_file, ap_matrix_out_file
-
-
-def parse_cost_list(costs):
-    costs_list = [float(cost) for cost in costs.split(",")]
-    start_num, end_num = 4, 20
-    for num in range(start_num, end_num):
-        costs_list.append(0.5**num)
-    return costs_list
-
-
-def normalize_features(features):
-    feats_norm = np.linalg.norm(features, axis=1)
-    features = features / (feats_norm + 1e-5)[:, np.newaxis]
-    return features
-
-
-def load_input_data(data_file, targets_file):
-    # load the features and the targets
-    #logger.info('loading features and targets...')
-    targets = np.load(targets_file, encoding='latin1')
-    features = np.array(np.load(data_file,
-                                encoding='latin1')).astype(np.float64)
-    assert features.shape[0] == targets.shape[0], "Mismatched #images"
-    #logger.info('Loaded features: {} and targets: {}'.format(
-    #    features.shape, targets.shape))
-    return features, targets
-
-
-def calculate_ap(rec, prec):
-    """
-    Computes the AP under the precision recall curve.
-    """
-    rec, prec = rec.reshape(rec.size, 1), prec.reshape(prec.size, 1)
-    z, o = np.zeros((1, 1)), np.ones((1, 1))
-    mrec, mpre = np.vstack((z, rec, o)), np.vstack((z, prec, z))
-    for i in range(len(mpre) - 2, -1, -1):
-        mpre[i] = max(mpre[i], mpre[i + 1])
-
-    indices = np.where(mrec[1:] != mrec[0:-1])[0] + 1
-    ap = 0
-    for i in indices:
-        ap = ap + (mrec[i] - mrec[i - 1]) * mpre[i]
-    return ap
-
-
-def get_precision_recall(targets, preds):
-    """
-    [P, R, score, ap] = get_precision_recall(targets, preds)
-    Input    :
-        targets  : number of occurrences of this class in the ith image
-        preds    : score for this image
-    Output   :
-        P, R   : precision and recall
-        score  : score which corresponds to the particular precision and recall
-        ap     : average precision
-    """
-    # binarize targets
-    targets = np.array(targets > 0, dtype=np.float32)
-    tog = np.hstack((targets[:, np.newaxis].astype(np.float64),
-                     preds[:, np.newaxis].astype(np.float64)))
-    ind = np.argsort(preds)
-    ind = ind[::-1]
-    score = np.array([tog[i, 1] for i in ind])
-    sortcounts = np.array([tog[i, 0] for i in ind])
-
-    tp = sortcounts
-    fp = sortcounts.copy()
-    for i in range(sortcounts.shape[0]):
-        if sortcounts[i] >= 1:
-            fp[i] = 0.
-        elif sortcounts[i] < 1:
-            fp[i] = 1.
-    P = np.cumsum(tp) / (np.cumsum(tp) + np.cumsum(fp))
-    numinst = np.sum(targets)
-    R = np.cumsum(tp) / numinst
-    ap = calculate_ap(R, P)
-    return P, R, score, ap
-
-
-def get_low_shot_output_file(opts, cls, cost, suffix):
-    # in case of low-shot training, we train for 5 independent samples
-    # (sample{}) and vary low-shot amount (k{}). The input data should have
-    # sample{}_k{} information that we extract in suffix below.
-    # logger.info('Suffix: {}'.format(suffix))
-    cls_cost = str(cls) + '_cost' + py2_py3_compatible_cost(cost)
-    out_file = os.path.join(opts.output_path,
-                            'cls' + cls_cost + '_' + suffix + '.pickle')
-    return out_file
-
-
-def get_low_shot_svm_classes(targets, dataset):
-    # classes for which SVM testing should be done
-    num_classes, cls_list = None, None
-    if dataset == 'voc':
-        num_classes = targets.shape[1]
-        cls_list = range(num_classes)
-    elif dataset == 'places':
-        # each image in places has a target cls [0, .... ,204]
-        num_classes = len(set(targets[:, 0].tolist()))
-        cls_list = list(set(targets[:, 0].tolist()))
-    else:
-        logger.info('Dataset not recognized. Abort!')
-    #logger.info('Testing SVM for classes: {}'.format(cls_list))
-    #logger.info('Num classes: {}'.format(num_classes))
-    return num_classes, cls_list
-
-
-def get_cls_feats_labels(cls, features, targets, dataset):
-    out_feats, out_cls_labels = None, None
-    if dataset == 'voc':
-        cls_labels = targets[:, cls].astype(dtype=np.int32, copy=True)
-        # find the indices for positive/negative imgs. Remove the ignore label.
-        out_data_inds = (targets[:, cls] != -1)
-        out_feats = features[out_data_inds]
-        out_cls_labels = cls_labels[out_data_inds]
-        # label 0 = not present, set it to -1 as svm train target.
-        # Make the svm train target labels as -1, 1.
-        out_cls_labels[np.where(out_cls_labels == 0)] = -1
-    elif dataset == 'places':
-        out_feats = features
-        out_cls_labels = targets.astype(dtype=np.int32, copy=True)
-        # for the given class, get the relevant positive/negative images and
-        # make the label 1, -1
-        cls_inds = np.where(targets[:, 0] == cls)
-        non_cls_inds = (targets[:, 0] != cls)
-        out_cls_labels[non_cls_inds] = -1
-        out_cls_labels[cls_inds] = 1
-        # finally reshape into the format taken by sklearn svm package.
-        out_cls_labels = out_cls_labels.reshape(-1)
-    else:
-        raise Exception('args.dataset not recognized')
-    return out_feats, out_cls_labels
--- a/benchmarks/svm_tools/test_svm.py
+++ b/benchmarks/svm_tools/test_svm.py
@ -1,174 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-#
-################################################################################
-"""
-SVM test for image classification.
-
-Relevant transfer tasks: Image Classification VOC07 and COCO2014.
-"""
-from __future__ import division
-from __future__ import absolute_import
-from __future__ import unicode_literals
-from __future__ import print_function
-
-import argparse
-import json
-import logging
-import numpy as np
-import os
-import pickle
-import six
-import sys
-
-import svm_helper
-
-# create the logger
-FORMAT = '[%(levelname)s: %(filename)s: %(lineno)4d]: %(message)s'
-logging.basicConfig(level=logging.INFO, format=FORMAT, stream=sys.stdout)
-logger = logging.getLogger(__name__)
-
-
-def get_chosen_costs(opts, num_classes):
-    costs_list = svm_helper.parse_cost_list(opts.costs_list)
-    train_ap_matrix = np.zeros((num_classes, len(costs_list)))
-    for cls in range(num_classes):
-        for cost_idx in range(len(costs_list)):
-            cost = costs_list[cost_idx]
-            _, ap_out_file = svm_helper.get_svm_train_output_files(
-                cls, cost, opts.output_path)
-            train_ap_matrix[cls][cost_idx] = float(
-                np.load(ap_out_file, encoding='latin1')[0])
-    argmax_cls = np.argmax(train_ap_matrix, axis=1)
-    chosen_cost = [costs_list[idx] for idx in argmax_cls]
-    #logger.info('chosen_cost: {}'.format(chosen_cost))
-    np.save(
-        os.path.join(opts.output_path, 'crossval_ap.npy'),
-        np.array(train_ap_matrix))
-    np.save(
-        os.path.join(opts.output_path, 'chosen_cost.npy'),
-        np.array(chosen_cost))
-    #logger.info('saved crossval_ap AP to file: {}'.format(
-    #    os.path.join(opts.output_path, 'crossval_ap.npy')))
-    #logger.info('saved chosen costs to file: {}'.format(
-    #    os.path.join(opts.output_path, 'chosen_cost.npy')))
-    return np.array(chosen_cost)
-
-
-def load_json(file_path):
-    assert os.path.exists(file_path), "{} does not exist".format(file_path)
-    with open(file_path, 'r') as fp:
-        data = json.load(fp)
-    img_ids = list(data.keys())
-    cls_names = list(data[img_ids[0]].keys())
-    return img_ids, cls_names
-
-
-def test_svm(opts):
-    assert os.path.exists(opts.data_file), "Data file not found. Abort!"
-    json_predictions, img_ids, cls_names = {}, [], []
-    if opts.generate_json:
-        img_ids, cls_names = load_json(opts.json_targets)
-
-    features, targets = svm_helper.load_input_data(opts.data_file,
-                                                   opts.targets_data_file)
-    # normalize the features: N x 9216 (example shape)
-    features = svm_helper.normalize_features(features)
-    num_classes = targets.shape[1]
-    #logger.info('Num classes: {}'.format(num_classes))
-
-    # get the chosen cost that maximizes the cross-validation AP per class
-    costs_list = get_chosen_costs(opts, num_classes)
-
-    ap_matrix = np.zeros((num_classes, 1))
-    for cls in range(num_classes):
-        cost = costs_list[cls]
-        #logger.info('Testing model for cls: {} cost: {}'.format(cls, cost))
-        model_file = os.path.join(
-            opts.output_path,
-            'cls' + str(cls) + '_cost' + str(cost) + '.pickle')
-        with open(model_file, 'rb') as fopen:
-            if six.PY2:
-                model = pickle.load(fopen)
-            else:
-                model = pickle.load(fopen, encoding='latin1')
-        prediction = model.decision_function(features)
-        if opts.generate_json:
-            cls_name = cls_names[cls]
-            for idx in range(len(prediction)):
-                img_id = img_ids[idx]
-                if img_id in json_predictions:
-                    json_predictions[img_id][cls_name] = prediction[idx]
-                else:
-                    out_lbl = {}
-                    out_lbl[cls_name] = prediction[idx]
-                    json_predictions[img_id] = out_lbl
-
-        cls_labels = targets[:, cls]
-        # meaning of labels in VOC/COCO original loaded target files:
-        # label 0 = not present, set it to -1 as svm train target
-        # label 1 = present. Make the svm train target labels as -1, 1.
-        evaluate_data_inds = (targets[:, cls] != -1)
-        eval_preds = prediction[evaluate_data_inds]
-        eval_cls_labels = cls_labels[evaluate_data_inds]
-        eval_cls_labels[np.where(eval_cls_labels == 0)] = -1
-        P, R, score, ap = svm_helper.get_precision_recall(
-            eval_cls_labels, eval_preds)
-        ap_matrix[cls][0] = ap
-    if opts.generate_json:
-        output_file = os.path.join(opts.output_path, 'json_preds.json')
-        with open(output_file, 'w') as fp:
-            json.dump(json_predictions, fp)
-        #logger.info('Saved json predictions to: {}'.format(output_file))
-    logger.info('Mean AP: {}'.format(np.mean(ap_matrix, axis=0)))
-    np.save(os.path.join(opts.output_path, 'test_ap.npy'), np.array(ap_matrix))
-    #logger.info('saved test AP to file: {}'.format(
-    #    os.path.join(opts.output_path, 'test_ap.npy')))
-
-
-def main():
-    parser = argparse.ArgumentParser(description='SVM model test')
-    parser.add_argument(
-        '--data_file',
-        type=str,
-        default=None,
-        help="Numpy file containing image features and labels")
-    parser.add_argument(
-        '--json_targets',
-        type=str,
-        default=None,
-        help="Json file containing json targets")
-    parser.add_argument(
-        '--targets_data_file',
-        type=str,
-        default=None,
-        help="Numpy file containing image labels")
-    parser.add_argument(
-        '--costs_list',
-        type=str,
-        default="0.01,0.1",
-        help="comma separated string containing list of costs")
-    parser.add_argument(
-        '--output_path',
-        type=str,
-        default=None,
-        help="path where trained SVM models are saved")
-    parser.add_argument(
-        '--generate_json',
-        type=int,
-        default=0,
-        help="Whether to generate json files for output")
-    if len(sys.argv) == 1:
-        parser.print_help()
-        sys.exit(1)
-
-    opts = parser.parse_args()
-    #logger.info(opts)
-    test_svm(opts)
-
-
-if __name__ == '__main__':
-    main()
--- a/benchmarks/svm_tools/test_svm_low_shot.py
+++ b/benchmarks/svm_tools/test_svm_low_shot.py
@ -1,212 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-#
-################################################################################
-"""
-SVM test for low shot image classification.
-
-Relevant transfer tasks: Low-shot Image Classification VOC07 and Places205 low
-shot samples.
-"""
-from __future__ import division
-from __future__ import absolute_import
-from __future__ import unicode_literals
-from __future__ import print_function
-
-import argparse
-import json
-import logging
-import numpy as np
-import os
-import pickle
-import six
-import sys
-
-import svm_helper
-
-# create the logger
-FORMAT = '[%(levelname)s: %(filename)s: %(lineno)4d]: %(message)s'
-logging.basicConfig(level=logging.INFO, format=FORMAT, stream=sys.stdout)
-logger = logging.getLogger(__name__)
-
-
-def load_json(file_path):
-    assert os.path.exists(file_path), "{} does not exist".format(file_path)
-    with open(file_path, 'r') as fp:
-        data = json.load(fp)
-    img_ids = list(data.keys())
-    cls_names = list(data[img_ids[0]].keys())
-    return img_ids, cls_names
-
-
-def save_json_predictions(opts, cost, sample_idx, k_low, features, cls_list,
-                          cls_names, img_ids):
-    num_classes = len(cls_list)
-    json_predictions = {}
-    for cls in range(num_classes):
-        suffix = 'sample{}_k{}'.format(sample_idx + 1, k_low)
-        model_file = svm_helper.get_low_shot_output_file(
-            opts, cls, cost, suffix)
-        with open(model_file, 'rb') as fopen:
-            if six.PY2:
-                model = pickle.load(fopen)
-            else:
-                model = pickle.load(fopen, encoding='latin1')
-        prediction = model.decision_function(features)
-        cls_name = cls_names[cls]
-        for idx in range(len(prediction)):
-            img_id = img_ids[idx]
-            if img_id in json_predictions:
-                json_predictions[img_id][cls_name] = prediction[idx]
-            else:
-                out_lbl = {}
-                out_lbl[cls_name] = prediction[idx]
-                json_predictions[img_id] = out_lbl
-
-    output_file = os.path.join(opts.output_path,
-                               'test_{}_json_preds.json'.format(suffix))
-    with open(output_file, 'w') as fp:
-        json.dump(json_predictions, fp)
-    #logger.info('Saved json predictions to: {}'.format(output_file))
-
-
-def test_svm_low_shot(opts):
-    k_values = [int(val) for val in opts.k_values.split(",")]
-    sample_inds = [int(val) for val in opts.sample_inds.split(",")]
-    #logger.info('Testing svm for k-values: {} and sample_inds: {}'.format(
-    #    k_values, sample_inds))
-
-    img_ids, cls_names = [], []
-    if opts.generate_json:
-        img_ids, cls_names = load_json(opts.json_targets)
-
-    assert os.path.exists(opts.data_file), "Data file not found. Abort!"
-    # we test the svms on the full test set. Given the test features and the
-    # targets, we test it for various k-values (low-shot), cost values and
-    # 5 independent samples.
-    features, targets = svm_helper.load_input_data(opts.data_file,
-                                                   opts.targets_data_file)
-    # normalize the features: N x 9216 (example shape)
-    features = svm_helper.normalize_features(features)
-
-    # parse the cost values for training the SVM on
-    costs_list = svm_helper.parse_cost_list(opts.costs_list)
-    #logger.info('Testing SVM for costs: {}'.format(costs_list))
-
-    # classes for which SVM testing should be done
-    num_classes, cls_list = svm_helper.get_low_shot_svm_classes(
-        targets, opts.dataset)
-
-    # create the output for per sample, per k-value and per cost.
-    sample_ap_matrices = []
-    for _ in range(len(sample_inds)):
-        ap_matrix = np.zeros((len(k_values), len(costs_list)))
-        sample_ap_matrices.append(ap_matrix)
-
-    # the test goes like this: For a given sample, for a given k-value and a
-    # given cost value, we evaluate the trained svm model for all classes.
-    # After computing over all classes, we get the mean AP value over all
-    # classes. We hence end up with: output = [sample][k_value][cost]
-    for inds in range(len(sample_inds)):
-        sample_idx = sample_inds[inds]
-        for k_idx in range(len(k_values)):
-            k_low = k_values[k_idx]
-            suffix = 'sample{}_k{}'.format(sample_idx + 1, k_low)
-            for cost_idx in range(len(costs_list)):
-                cost = costs_list[cost_idx]
-                local_cost_ap = np.zeros((num_classes, 1))
-                for cls in cls_list:
-                    #logger.info(
-                    #    'Test sample/k_value/cost/cls: {}/{}/{}/{}'.format(
-                    #        sample_idx + 1, k_low, cost, cls))
-                    model_file = svm_helper.get_low_shot_output_file(
-                        opts, cls, cost, suffix)
-                    with open(model_file, 'rb') as fopen:
-                        if six.PY2:
-                            model = pickle.load(fopen)
-                        else:
-                            model = pickle.load(fopen, encoding='latin1')
-                    prediction = model.decision_function(features)
-                    eval_preds, eval_cls_labels = svm_helper.get_cls_feats_labels(
-                        cls, prediction, targets, opts.dataset)
-                    P, R, score, ap = svm_helper.get_precision_recall(
-                        eval_cls_labels, eval_preds)
-                    local_cost_ap[cls][0] = ap
-                mean_cost_ap = np.mean(local_cost_ap, axis=0)
-                sample_ap_matrices[inds][k_idx][cost_idx] = mean_cost_ap
-            out_k_sample_file = os.path.join(
-                opts.output_path,
-                'test_ap_sample{}_k{}.npy'.format(sample_idx + 1, k_low))
-            save_data = sample_ap_matrices[inds][k_idx]
-            save_data = save_data.reshape((1, -1))
-            np.save(out_k_sample_file, save_data)
-            #logger.info('Saved sample test k_idx AP to file: {} {}'.format(
-            #    out_k_sample_file, save_data.shape))
-            if opts.generate_json:
-                argmax_cls = np.argmax(save_data, axis=1)
-                chosen_cost = costs_list[argmax_cls[0]]
-                #logger.info('chosen cost: {}'.format(chosen_cost))
-                save_json_predictions(opts, chosen_cost, sample_idx, k_low,
-                                      features, cls_list, cls_names, img_ids)
-    #logger.info('All done!!')
-
-
-def main():
-    parser = argparse.ArgumentParser(description='Low shot SVM model test')
-    parser.add_argument(
-        '--data_file',
-        type=str,
-        default=None,
-        help="Numpy file containing image features and labels")
-    parser.add_argument(
-        '--targets_data_file',
-        type=str,
-        default=None,
-        help="Numpy file containing image labels")
-    parser.add_argument(
-        '--json_targets',
-        type=str,
-        default=None,
-        help="Numpy file containing json targets")
-    parser.add_argument(
-        '--generate_json',
-        type=int,
-        default=0,
-        help="Whether to generate json files for output")
-    parser.add_argument(
-        '--costs_list',
-        type=str,
-        default=
-        "0.0000001,0.000001,0.00001,0.0001,0.001,0.01,0.1,1.0,10.0,100.0",
-        help="comma separated string containing list of costs")
-    parser.add_argument(
-        '--output_path',
-        type=str,
-        default=None,
-        help="path where trained SVM models are saved")
-    parser.add_argument(
-        '--k_values',
-        type=str,
-        default="1,2,4,8,16,32,64,96",
-        help="Low-shot k-values for svm testing. Comma separated")
-    parser.add_argument(
-        '--sample_inds',
-        type=str,
-        default="0,1,2,3,4",
-        help="sample_inds for which to test svm. Comma separated")
-    parser.add_argument(
-        '--dataset', type=str, default="voc", help='voc | places')
-    if len(sys.argv) == 1:
-        parser.print_help()
-        sys.exit(1)
-
-    opts = parser.parse_args()
-    #logger.info(opts)
-    test_svm_low_shot(opts)
-
-
-if __name__ == '__main__':
-    main()
--- a/benchmarks/svm_tools/train_svm_kfold.py
+++ b/benchmarks/svm_tools/train_svm_kfold.py
@ -1,162 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-#
-################################################################################
-"""
-SVM training using 3-fold cross-validation.
-
-Relevant transfer tasks: Image Classification VOC07 and COCO2014.
-"""
-
-from __future__ import division
-from __future__ import absolute_import
-from __future__ import unicode_literals
-from __future__ import print_function
-
-import argparse
-import logging
-import numpy as np
-import os
-import pickle
-import sys
-from tqdm import tqdm
-from sklearn.svm import LinearSVC
-from sklearn.model_selection import cross_val_score
-
-import svm_helper
-
-import time
-
-# create the logger
-FORMAT = '[%(levelname)s: %(filename)s: %(lineno)4d]: %(message)s'
-logging.basicConfig(level=logging.INFO, format=FORMAT, stream=sys.stdout)
-logger = logging.getLogger(__name__)
-
-
-def train_svm(opts):
-    assert os.path.exists(opts.data_file), "Data file not found. Abort!"
-    if not os.path.exists(opts.output_path):
-        os.makedirs(opts.output_path)
-
-    features, targets = svm_helper.load_input_data(opts.data_file,
-                                                   opts.targets_data_file)
-    # normalize the features: N x 9216 (example shape)
-    features = svm_helper.normalize_features(features)
-
-    # parse the cost values for training the SVM on
-    costs_list = svm_helper.parse_cost_list(opts.costs_list)
-    #logger.info('Training SVM for costs: {}'.format(costs_list))
-
-    # classes for which SVM training should be done
-    if opts.cls_list:
-        cls_list = [int(cls) for cls in opts.cls_list.split(",")]
-    else:
-        num_classes = targets.shape[1]
-        cls_list = range(num_classes)
-    #logger.info('Training SVM for classes: {}'.format(cls_list))
-
-    for cls_idx in tqdm(range(len(cls_list))):
-        cls = cls_list[cls_idx]
-        for cost_idx in range(len(costs_list)):
-            start = time.time()
-            cost = costs_list[cost_idx]
-            out_file, ap_out_file = svm_helper.get_svm_train_output_files(
-                cls, cost, opts.output_path)
-            if os.path.exists(out_file) and os.path.exists(ap_out_file):
-                logger.info('SVM model exists: {}'.format(out_file))
-                logger.info('AP file exists: {}'.format(ap_out_file))
-            else:
-                #logger.info('Training model with the cost: {}'.format(cost))
-                clf = LinearSVC(
-                    C=cost,
-                    class_weight={
-                        1: 2,
-                        -1: 1
-                    },
-                    intercept_scaling=1.0,
-                    verbose=0,
-                    penalty='l2',
-                    loss='squared_hinge',
-                    tol=0.0001,
-                    dual=True,
-                    max_iter=2000,
-                )
-                cls_labels = targets[:, cls].astype(dtype=np.int32, copy=True)
-                # meaning of labels in VOC/COCO original loaded target files:
-                # label 0 = not present, set it to -1 as svm train target
-                # label 1 = present. Make the svm train target labels as -1, 1.
-                cls_labels[np.where(cls_labels == 0)] = -1
-                #num_positives = len(np.where(cls_labels == 1)[0])
-                #num_negatives = len(cls_labels) - num_positives
-
-                #logger.info('cls: {} has +ve: {} -ve: {} ratio: {}'.format(
-                #    cls, num_positives, num_negatives,
-                #    float(num_positives) / num_negatives)
-                #)
-                #logger.info('features: {} cls_labels: {}'.format(
-                #    features.shape, cls_labels.shape))
-                ap_scores = cross_val_score(
-                    clf,
-                    features,
-                    cls_labels,
-                    cv=3,
-                    scoring='average_precision')
-                clf.fit(features, cls_labels)
-
-                #logger.info('cls: {} cost: {} AP: {} mean:{}'.format(
-                #    cls, cost, ap_scores, ap_scores.mean()))
-                #logger.info('Saving cls cost AP to: {}'.format(ap_out_file))
-                np.save(ap_out_file, np.array([ap_scores.mean()]))
-                #logger.info('Saving SVM model to: {}'.format(out_file))
-                with open(out_file, 'wb') as fwrite:
-                    pickle.dump(clf, fwrite)
-            print("time: {:.4g} s".format(time.time() - start))
-
-
-def main():
-    parser = argparse.ArgumentParser(description='SVM model training')
-    parser.add_argument(
-        '--data_file',
-        type=str,
-        default=None,
-        help="Numpy file containing image features")
-    parser.add_argument(
-        '--targets_data_file',
-        type=str,
-        default=None,
-        help="Numpy file containing image labels")
-    parser.add_argument(
-        '--output_path',
-        type=str,
-        default=None,
-        help="path where to save the trained SVM models")
-    parser.add_argument(
-        '--costs_list',
-        type=str,
-        default="0.01,0.1",
-        help="comma separated string containing list of costs")
-    parser.add_argument(
-        '--random_seed',
-        type=int,
-        default=100,
-        help="random seed for SVM classifier training")
-
-    parser.add_argument(
-        '--cls_list',
-        type=str,
-        default=None,
-        help="comma separated string list of classes to train")
-    if len(sys.argv) == 1:
-        parser.print_help()
-        sys.exit(1)
-
-    opts = parser.parse_args()
-    #logger.info(opts)
-    train_svm(opts)
-
-
-if __name__ == '__main__':
-    main()
--- a/benchmarks/svm_tools/train_svm_kfold_parallel.py
+++ b/benchmarks/svm_tools/train_svm_kfold_parallel.py
@ -1,151 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-#
-################################################################################
-"""
-SVM training using 3-fold cross-validation.
-
-Relevant transfer tasks: Image Classification VOC07 and COCO2014.
-"""
-
-from __future__ import division
-from __future__ import absolute_import
-from __future__ import unicode_literals
-from __future__ import print_function
-
-import multiprocessing as mp
-import tqdm
-import argparse
-import logging
-import numpy as np
-import os
-import pickle
-import sys
-from sklearn.svm import LinearSVC
-from sklearn.model_selection import cross_val_score
-
-import svm_helper
-
-import pdb
-
-
-def task(cls, cost, opts, features, targets):
-    out_file, ap_out_file = svm_helper.get_svm_train_output_files(
-        cls, cost, opts.output_path)
-    if not (os.path.exists(out_file) and os.path.exists(ap_out_file)):
-        clf = LinearSVC(
-            C=cost,
-            class_weight={
-                1: 2,
-                -1: 1
-            },
-            intercept_scaling=1.0,
-            verbose=0,
-            penalty='l2',
-            loss='squared_hinge',
-            tol=0.0001,
-            dual=True,
-            max_iter=2000,
-        )
-        cls_labels = targets[:, cls].astype(dtype=np.int32, copy=True)
-        cls_labels[np.where(cls_labels == 0)] = -1
-        ap_scores = cross_val_score(
-            clf, features, cls_labels, cv=3, scoring='average_precision')
-        clf.fit(features, cls_labels)
-        np.save(ap_out_file, np.array([ap_scores.mean()]))
-        with open(out_file, 'wb') as fwrite:
-            pickle.dump(clf, fwrite)
-    return 0
-
-
-def mp_helper(args):
-    return task(*args)
-
-
-def train_svm(opts):
-    assert os.path.exists(opts.data_file), "Data file not found. Abort!"
-    if not os.path.exists(opts.output_path):
-        os.makedirs(opts.output_path)
-
-    features, targets = svm_helper.load_input_data(opts.data_file,
-                                                   opts.targets_data_file)
-    # normalize the features: N x 9216 (example shape)
-    features = svm_helper.normalize_features(features)
-
-    # parse the cost values for training the SVM on
-    costs_list = svm_helper.parse_cost_list(opts.costs_list)
-
-    # classes for which SVM training should be done
-    if opts.cls_list:
-        cls_list = [int(cls) for cls in opts.cls_list.split(",")]
-    else:
-        num_classes = targets.shape[1]
-        cls_list = range(num_classes)
-
-    num_task = len(cls_list) * len(costs_list)
-    args_cls = []
-    args_cost = []
-    for cls in cls_list:
-        for cost in costs_list:
-            args_cls.append(cls)
-            args_cost.append(cost)
-    args_opts = [opts] * num_task
-    args_features = [features] * num_task
-    args_targets = [targets] * num_task
-
-    pool = mp.Pool(mp.cpu_count())
-    for _ in tqdm.tqdm(
-            pool.imap_unordered(
-                mp_helper,
-                zip(args_cls, args_cost, args_opts, args_features,
-                    args_targets)),
-            total=num_task):
-        pass
-
-
-def main():
-    parser = argparse.ArgumentParser(description='SVM model training')
-    parser.add_argument(
-        '--data_file',
-        type=str,
-        default=None,
-        help="Numpy file containing image features")
-    parser.add_argument(
-        '--targets_data_file',
-        type=str,
-        default=None,
-        help="Numpy file containing image labels")
-    parser.add_argument(
-        '--output_path',
-        type=str,
-        default=None,
-        help="path where to save the trained SVM models")
-    parser.add_argument(
-        '--costs_list',
-        type=str,
-        default="0.01,0.1",
-        help="comma separated string containing list of costs")
-    parser.add_argument(
-        '--random_seed',
-        type=int,
-        default=100,
-        help="random seed for SVM classifier training")
-
-    parser.add_argument(
-        '--cls_list',
-        type=str,
-        default=None,
-        help="comma separated string list of classes to train")
-    if len(sys.argv) == 1:
-        parser.print_help()
-        sys.exit(1)
-
-    opts = parser.parse_args()
-    train_svm(opts)
-
-
-if __name__ == '__main__':
-    main()
--- a/benchmarks/svm_tools/train_svm_low_shot.py
+++ b/benchmarks/svm_tools/train_svm_low_shot.py
@ -1,144 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-#
-################################################################################
-"""
-Low Shot SVM training.
-
-Relevant transfer tasks: Low-shot Image Classification VOC07 and Places205 low
-shot samples.
-"""
-
-from __future__ import division
-from __future__ import absolute_import
-from __future__ import unicode_literals
-from __future__ import print_function
-
-import argparse
-import logging
-import numpy as np
-import os
-import pickle
-import sys
-from sklearn.svm import LinearSVC
-from tqdm import tqdm
-
-import svm_helper
-
-import time
-
-# create the logger
-FORMAT = '[%(levelname)s: %(filename)s: %(lineno)4d]: %(message)s'
-logging.basicConfig(level=logging.INFO, format=FORMAT, stream=sys.stdout)
-logger = logging.getLogger(__name__)
-
-
-def train_svm_low_shot(opts):
-    assert os.path.exists(opts.data_file), "Data file not found. Abort!"
-    if not os.path.exists(opts.output_path):
-        os.makedirs(opts.output_path)
-
-    features, targets = svm_helper.load_input_data(opts.data_file,
-                                                   opts.targets_data_file)
-    # normalize the features: N x 9216 (example shape)
-    features = svm_helper.normalize_features(features)
-
-    # parse the cost values for training the SVM on
-    costs_list = svm_helper.parse_cost_list(opts.costs_list)
-    #logger.info('Training SVM for costs: {}'.format(costs_list))
-
-    # classes for which SVM testing should be done
-    num_classes, cls_list = svm_helper.get_low_shot_svm_classes(
-        targets, opts.dataset)
-
-    for cls in tqdm(cls_list):
-        for cost_idx in range(len(costs_list)):
-            start = time.time()
-            cost = costs_list[cost_idx]
-            suffix = '_'.join(
-                opts.targets_data_file.split('/')[-1].split('.')[0].split('_')
-                [-2:])
-            out_file = svm_helper.get_low_shot_output_file(
-                opts, cls, cost, suffix)
-            if os.path.exists(out_file):
-                logger.info('SVM model exists: {}'.format(out_file))
-            else:
-                #logger.info('SVM model not found: {}'.format(out_file))
-                #logger.info('Training model with the cost: {}'.format(cost))
-                clf = LinearSVC(
-                    C=cost,
-                    class_weight={
-                        1: 2,
-                        -1: 1
-                    },
-                    intercept_scaling=1.0,
-                    verbose=0,
-                    penalty='l2',
-                    loss='squared_hinge',
-                    tol=0.0001,
-                    dual=True,
-                    max_iter=2000,
-                )
-                train_feats, train_cls_labels = svm_helper.get_cls_feats_labels(
-                    cls, features, targets, opts.dataset)
-                #num_positives = len(np.where(train_cls_labels == 1)[0])
-                #num_negatives = len(np.where(train_cls_labels == -1)[0])
-
-                #logger.info('cls: {} has +ve: {} -ve: {} ratio: {}'.format(
-                #    cls, num_positives, num_negatives,
-                #    float(num_positives) / num_negatives)
-                #)
-                #logger.info('features: {} cls_labels: {}'.format(
-                #    train_feats.shape, train_cls_labels.shape))
-                clf.fit(train_feats, train_cls_labels)
-                #logger.info('Saving SVM model to: {}'.format(out_file))
-                with open(out_file, 'wb') as fwrite:
-                    pickle.dump(clf, fwrite)
-            #print("time: {:.4g} s".format(time.time() - start))
-    #logger.info('All done!')
-
-
-def main():
-    parser = argparse.ArgumentParser(description='Low-shot SVM model training')
-    parser.add_argument(
-        '--data_file',
-        type=str,
-        default=None,
-        help="Numpy file containing image features")
-    parser.add_argument(
-        '--targets_data_file',
-        type=str,
-        default=None,
-        help="Numpy file containing image labels")
-    parser.add_argument(
-        '--costs_list',
-        type=str,
-        default="0.01,0.1",
-        help="comma separated string containing list of costs")
-    parser.add_argument(
-        '--output_path',
-        type=str,
-        default=None,
-        help="path where to save the trained SVM models")
-    parser.add_argument(
-        '--random_seed',
-        type=int,
-        default=100,
-        help="random seed for SVM classifier training")
-    parser.add_argument(
-        '--dataset', type=str, default="voc", help='voc | places')
-    if len(sys.argv) == 1:
-        parser.print_help()
-        sys.exit(1)
-
-    opts = parser.parse_args()
-
-    #logger.info(opts)
-    train_svm_low_shot(opts)
-
-
-if __name__ == '__main__':
-    main()
--- a/benchmarks/svm_tools/train_svm_low_shot_parallel.py
+++ b/benchmarks/svm_tools/train_svm_low_shot_parallel.py
@ -1,145 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-#
-################################################################################
-"""
-Low Shot SVM training.
-
-Relevant transfer tasks: Low-shot Image Classification VOC07 and Places205 low
-shot samples.
-"""
-
-from __future__ import division
-from __future__ import absolute_import
-from __future__ import unicode_literals
-from __future__ import print_function
-
-import multiprocessing as mp
-import tqdm
-import argparse
-import logging
-import numpy as np
-import os
-import pickle
-import sys
-from sklearn.svm import LinearSVC
-
-import svm_helper
-
-import pdb
-
-
-def task(cls, cost, opts, features, targets):
-    suffix = '_'.join(
-        opts.targets_data_file.split('/')[-1].split('.')[0].split('_')[-2:])
-    out_file = svm_helper.get_low_shot_output_file(opts, cls, cost, suffix)
-    if not os.path.exists(out_file):
-        clf = LinearSVC(
-            C=cost,
-            class_weight={
-                1: 2,
-                -1: 1
-            },
-            intercept_scaling=1.0,
-            verbose=0,
-            penalty='l2',
-            loss='squared_hinge',
-            tol=0.0001,
-            dual=True,
-            max_iter=2000,
-        )
-        train_feats, train_cls_labels = svm_helper.get_cls_feats_labels(
-            cls, features, targets, opts.dataset)
-        clf.fit(train_feats, train_cls_labels)
-        #cls_labels = targets[:, cls].astype(dtype=np.int32, copy=True)
-        #cls_labels[np.where(cls_labels == 0)] = -1
-        #clf.fit(features, cls_labels)
-        with open(out_file, 'wb') as fwrite:
-            pickle.dump(clf, fwrite)
-    return 0
-
-
-def mp_helper(args):
-    return task(*args)
-
-
-def train_svm_low_shot(opts):
-    assert os.path.exists(opts.data_file), "Data file not found. Abort!"
-    if not os.path.exists(opts.output_path):
-        os.makedirs(opts.output_path)
-
-    features, targets = svm_helper.load_input_data(opts.data_file,
-                                                   opts.targets_data_file)
-    # normalize the features: N x 9216 (example shape)
-    features = svm_helper.normalize_features(features)
-
-    # parse the cost values for training the SVM on
-    costs_list = svm_helper.parse_cost_list(opts.costs_list)
-
-    # classes for which SVM testing should be done
-    num_classes, cls_list = svm_helper.get_low_shot_svm_classes(
-        targets, opts.dataset)
-
-    num_task = len(cls_list) * len(costs_list)
-    args_cls = []
-    args_cost = []
-    for cls in cls_list:
-        for cost in costs_list:
-            args_cls.append(cls)
-            args_cost.append(cost)
-    args_opts = [opts] * num_task
-    args_features = [features] * num_task
-    args_targets = [targets] * num_task
-
-    pool = mp.Pool(mp.cpu_count())
-    for _ in tqdm.tqdm(
-            pool.imap_unordered(
-                mp_helper,
-                zip(args_cls, args_cost, args_opts, args_features,
-                    args_targets)),
-            total=num_task):
-        pass
-
-
-def main():
-    parser = argparse.ArgumentParser(description='Low-shot SVM model training')
-    parser.add_argument(
-        '--data_file',
-        type=str,
-        default=None,
-        help="Numpy file containing image features")
-    parser.add_argument(
-        '--targets_data_file',
-        type=str,
-        default=None,
-        help="Numpy file containing image labels")
-    parser.add_argument(
-        '--costs_list',
-        type=str,
-        default="0.01,0.1",
-        help="comma separated string containing list of costs")
-    parser.add_argument(
-        '--output_path',
-        type=str,
-        default=None,
-        help="path where to save the trained SVM models")
-    parser.add_argument(
-        '--random_seed',
-        type=int,
-        default=100,
-        help="random seed for SVM classifier training")
-    parser.add_argument(
-        '--dataset', type=str, default="voc", help='voc | places')
-    if len(sys.argv) == 1:
-        parser.print_help()
-        sys.exit(1)
-
-    opts = parser.parse_args()
-    train_svm_low_shot(opts)
-
-
-if __name__ == '__main__':
-    main()
--- a/configs/benchmarks/linear_classification/imagenet/r50_last.py
+++ b/configs/benchmarks/linear_classification/imagenet/r50_last.py
@ -1,76 +0,0 @@
-_base_ = '../../../base.py'
-# model settings
-model = dict(
-    type='Classification',
-    pretrained=None,
-    with_sobel=False,
-    backbone=dict(
-        type='ResNet',
-        depth=50,
-        in_channels=3,
-        out_indices=[4],  # 0: conv-1, x: stage-x
-        norm_cfg=dict(type='BN'),
-        frozen_stages=4),
-    head=dict(
-        type='ClsHead', with_avg_pool=True, in_channels=2048,
-        num_classes=1000))
-# dataset settings
-data_source_cfg = dict(
-    type='ImageNet',
-    memcached=True,
-    mclient_path='/mnt/lustre/share/memcached_client')
-data_train_list = 'data/imagenet/meta/train_labeled.txt'
-data_train_root = 'data/imagenet/train'
-data_test_list = 'data/imagenet/meta/val_labeled.txt'
-data_test_root = 'data/imagenet/val'
-dataset_type = 'ClassificationDataset'
-img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
-train_pipeline = [
-    dict(type='RandomResizedCrop', size=224),
-    dict(type='RandomHorizontalFlip'),
-]
-test_pipeline = [
-    dict(type='Resize', size=256),
-    dict(type='CenterCrop', size=224),
-]
-# prefetch
-prefetch = False
-if not prefetch:
-    train_pipeline.extend([dict(type='ToTensor'), dict(type='Normalize', **img_norm_cfg)])
-    test_pipeline.extend([dict(type='ToTensor'), dict(type='Normalize', **img_norm_cfg)])
-data = dict(
-    imgs_per_gpu=32,  # total 32*8=256, 8GPU linear cls
-    workers_per_gpu=5,
-    train=dict(
-        type=dataset_type,
-        data_source=dict(
-            list_file=data_train_list, root=data_train_root,
-            **data_source_cfg),
-        pipeline=train_pipeline,
-        prefetch=prefetch),
-    val=dict(
-        type=dataset_type,
-        data_source=dict(
-            list_file=data_test_list, root=data_test_root, **data_source_cfg),
-        pipeline=test_pipeline,
-        prefetch=prefetch))
-# additional hooks
-custom_hooks = [
-    dict(
-        type='ValidateHook',
-        dataset=data['val'],
-        initial=True,
-        interval=1,
-        imgs_per_gpu=128,
-        workers_per_gpu=4,
-        prefetch=prefetch,
-        img_norm_cfg=img_norm_cfg,
-        eval_param=dict(topk=(1, 5)))
-]
-# optimizer
-optimizer = dict(type='SGD', lr=30., momentum=0.9, weight_decay=0.)
-# learning policy
-lr_config = dict(policy='step', step=[60, 80])
-checkpoint_config = dict(interval=10)
-# runtime settings
-total_epochs = 100
--- a/configs/benchmarks/linear_classification/imagenet/r50_last_sobel.py
+++ b/configs/benchmarks/linear_classification/imagenet/r50_last_sobel.py
@ -1,76 +0,0 @@
-_base_ = '../../../base.py'
-# model settings
-model = dict(
-    type='Classification',
-    pretrained=None,
-    with_sobel=True,
-    backbone=dict(
-        type='ResNet',
-        depth=50,
-        in_channels=2,
-        out_indices=[4],  # 0: conv-1, x: stage-x
-        norm_cfg=dict(type='BN'),
-        frozen_stages=4),
-    head=dict(
-        type='ClsHead', with_avg_pool=True, in_channels=2048,
-        num_classes=1000))
-# dataset settings
-data_source_cfg = dict(
-    type='ImageNet',
-    memcached=True,
-    mclient_path='/mnt/lustre/share/memcached_client')
-data_train_list = 'data/imagenet/meta/train_labeled.txt'
-data_train_root = 'data/imagenet/train'
-data_test_list = 'data/imagenet/meta/val_labeled.txt'
-data_test_root = 'data/imagenet/val'
-dataset_type = 'ClassificationDataset'
-img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
-train_pipeline = [
-    dict(type='RandomResizedCrop', size=224),
-    dict(type='RandomHorizontalFlip'),
-]
-test_pipeline = [
-    dict(type='Resize', size=256),
-    dict(type='CenterCrop', size=224),
-]
-# prefetch
-prefetch = False
-if not prefetch:
-    train_pipeline.extend([dict(type='ToTensor'), dict(type='Normalize', **img_norm_cfg)])
-    test_pipeline.extend([dict(type='ToTensor'), dict(type='Normalize', **img_norm_cfg)])
-data = dict(
-    imgs_per_gpu=32,  # total 32*8=256, 8GPU linear cls
-    workers_per_gpu=5,
-    train=dict(
-        type=dataset_type,
-        data_source=dict(
-            list_file=data_train_list, root=data_train_root,
-            **data_source_cfg),
-        pipeline=train_pipeline,
-        prefetch=prefetch),
-    val=dict(
-        type=dataset_type,
-        data_source=dict(
-            list_file=data_test_list, root=data_test_root, **data_source_cfg),
-        pipeline=test_pipeline,
-        prefetch=prefetch))
-# additional hooks
-custom_hooks = [
-    dict(
-        type='ValidateHook',
-        dataset=data['val'],
-        initial=True,
-        interval=1,
-        imgs_per_gpu=128,
-        workers_per_gpu=4,
-        prefetch=prefetch,
-        img_norm_cfg=img_norm_cfg,
-        eval_param=dict(topk=(1, 5)))
-]
-# optimizer
-optimizer = dict(type='SGD', lr=30., momentum=0.9, weight_decay=0.)
-# learning policy
-lr_config = dict(policy='step', step=[60, 80])
-checkpoint_config = dict(interval=10)
-# runtime settings
-total_epochs = 100
--- a/configs/benchmarks/linear_classification/imagenet/r50_multihead.py
+++ b/configs/benchmarks/linear_classification/imagenet/r50_multihead.py
@ -1,89 +0,0 @@
-_base_ = '../../../base.py'
-# model settings
-model = dict(
-    type='Classification',
-    pretrained=None,
-    with_sobel=False,
-    backbone=dict(
-        type='ResNet',
-        depth=50,
-        in_channels=3,
-        out_indices=[0, 1, 2, 3, 4],  # 0: conv-1, x: stage-x
-        norm_cfg=dict(type='BN'),
-        frozen_stages=4),
-    head=dict(
-        type='MultiClsHead',
-        pool_type='specified',
-        in_indices=[0, 1, 2, 3, 4],
-        with_last_layer_unpool=False,
-        backbone='resnet50',
-        norm_cfg=dict(type='SyncBN', momentum=0.1, affine=False),
-        num_classes=1000))
-# dataset settings
-data_source_cfg = dict(
-    type='ImageNet',
-    memcached=True,
-    mclient_path='/mnt/lustre/share/memcached_client')
-data_train_list = 'data/imagenet/meta/train_labeled.txt'
-data_train_root = 'data/imagenet/train'
-data_test_list = 'data/imagenet/meta/val_labeled.txt'
-data_test_root = 'data/imagenet/val'
-dataset_type = 'ClassificationDataset'
-img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
-train_pipeline = [
-    dict(type='RandomResizedCrop', size=224),
-    dict(type='RandomHorizontalFlip'),
-    dict(
-        type='ColorJitter',
-        brightness=0.4,
-        contrast=0.4,
-        saturation=0.4,
-        hue=0.),
-    dict(type='ToTensor'),
-    dict(type='Lighting'),
-    dict(type='Normalize', **img_norm_cfg),
-]
-test_pipeline = [
-    dict(type='Resize', size=256),
-    dict(type='CenterCrop', size=224),
-    dict(type='ToTensor'),
-    dict(type='Normalize', **img_norm_cfg),
-]
-data = dict(
-    imgs_per_gpu=32,  # total 32x8=256
-    workers_per_gpu=5,
-    train=dict(
-        type=dataset_type,
-        data_source=dict(
-            list_file=data_train_list, root=data_train_root,
-            **data_source_cfg),
-        pipeline=train_pipeline),
-    val=dict(
-        type=dataset_type,
-        data_source=dict(
-            list_file=data_test_list, root=data_test_root, **data_source_cfg),
-        pipeline=test_pipeline))
-# additional hooks
-custom_hooks = [
-    dict(
-        type='ValidateHook',
-        dataset=data['val'],
-        initial=True,
-        interval=10,
-        imgs_per_gpu=128,
-        workers_per_gpu=4,
-        eval_param=dict(topk=(1, )))
-]
-# optimizer
-optimizer = dict(
-    type='SGD',
-    lr=0.01,
-    momentum=0.9,
-    weight_decay=0.0001,
-    paramwise_options=dict(norm_decay_mult=0.),
-    nesterov=True)
-# learning policy
-lr_config = dict(policy='step', step=[30, 60, 90])
-checkpoint_config = dict(interval=10)
-# runtime settings
-total_epochs = 90
--- a/configs/benchmarks/linear_classification/imagenet/r50_multihead_sobel.py
+++ b/configs/benchmarks/linear_classification/imagenet/r50_multihead_sobel.py
@ -1,89 +0,0 @@
-_base_ = '../../../base.py'
-# model settings
-model = dict(
-    type='Classification',
-    pretrained=None,
-    with_sobel=True,
-    backbone=dict(
-        type='ResNet',
-        depth=50,
-        in_channels=2,
-        out_indices=[0, 1, 2, 3, 4],  # 0: conv-1, x: stage-x
-        norm_cfg=dict(type='BN'),
-        frozen_stages=4),
-    head=dict(
-        type='MultiClsHead',
-        pool_type='specified',
-        in_indices=[0, 1, 2, 3, 4],
-        with_last_layer_unpool=False,
-        backbone='resnet50',
-        norm_cfg=dict(type='SyncBN', momentum=0.1, affine=False),
-        num_classes=1000))
-# dataset settings
-data_source_cfg = dict(
-    type='ImageNet',
-    memcached=True,
-    mclient_path='/mnt/lustre/share/memcached_client')
-data_train_list = 'data/imagenet/meta/train_labeled.txt'
-data_train_root = 'data/imagenet/train'
-data_test_list = 'data/imagenet/meta/val_labeled.txt'
-data_test_root = 'data/imagenet/val'
-dataset_type = 'ClassificationDataset'
-img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
-train_pipeline = [
-    dict(type='RandomResizedCrop', size=224),
-    dict(type='RandomHorizontalFlip'),
-    dict(
-        type='ColorJitter',
-        brightness=0.4,
-        contrast=0.4,
-        saturation=0.4,
-        hue=0.),
-    dict(type='ToTensor'),
-    dict(type='Lighting'),
-    dict(type='Normalize', **img_norm_cfg),
-]
-test_pipeline = [
-    dict(type='Resize', size=256),
-    dict(type='CenterCrop', size=224),
-    dict(type='ToTensor'),
-    dict(type='Normalize', **img_norm_cfg),
-]
-data = dict(
-    imgs_per_gpu=32,  # total 32x8=256
-    workers_per_gpu=5,
-    train=dict(
-        type=dataset_type,
-        data_source=dict(
-            list_file=data_train_list, root=data_train_root,
-            **data_source_cfg),
-        pipeline=train_pipeline),
-    val=dict(
-        type=dataset_type,
-        data_source=dict(
-            list_file=data_test_list, root=data_test_root, **data_source_cfg),
-        pipeline=test_pipeline))
-# additional hooks
-custom_hooks = [
-    dict(
-        type='ValidateHook',
-        dataset=data['val'],
-        initial=True,
-        interval=10,
-        imgs_per_gpu=128,
-        workers_per_gpu=4,
-        eval_param=dict(topk=(1, )))
-]
-# optimizer
-optimizer = dict(
-    type='SGD',
-    lr=0.01,
-    momentum=0.9,
-    weight_decay=0.0001,
-    paramwise_options=dict(norm_decay_mult=0.),
-    nesterov=True)
-# learning policy
-lr_config = dict(policy='step', step=[30, 60, 90])
-checkpoint_config = dict(interval=10)
-# runtime settings
-total_epochs = 90
--- a/configs/benchmarks/linear_classification/places205/r50_multihead.py
+++ b/configs/benchmarks/linear_classification/places205/r50_multihead.py
@ -1,89 +0,0 @@
-_base_ = '../../../base.py'
-# model settings
-model = dict(
-    type='Classification',
-    pretrained=None,
-    with_sobel=False,
-    backbone=dict(
-        type='ResNet',
-        depth=50,
-        in_channels=3,
-        out_indices=[0, 1, 2, 3, 4],  # 0: conv-1, x: stage-x
-        norm_cfg=dict(type='BN'),
-        frozen_stages=4),
-    head=dict(
-        type='MultiClsHead',
-        pool_type='specified',
-        in_indices=[0, 1, 2, 3, 4],
-        with_last_layer_unpool=False,
-        backbone='resnet50',
-        norm_cfg=dict(type='SyncBN', momentum=0.1, affine=False),
-        num_classes=205))
-# dataset settings
-data_source_cfg = dict(
-    type='Places205',
-    memcached=True,
-    mclient_path='/mnt/lustre/share/memcached_client')
-data_train_list = 'data/places205/meta/train_labeled.txt'
-data_train_root = 'data/places205/train'
-data_test_list = 'data/places205/meta/val_labeled.txt'
-data_test_root = 'data/places205/val'
-dataset_type = 'ClassificationDataset'
-img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
-train_pipeline = [
-    dict(type='Resize', size=256),
-    dict(type='CenterCrop', size=256),
-    dict(type='RandomCrop', size=224),
-    dict(type='RandomHorizontalFlip'),
-]
-test_pipeline = [
-    dict(type='Resize', size=256),
-    dict(type='CenterCrop', size=224),
-]
-# prefetch
-prefetch = False
-if not prefetch:
-    train_pipeline.extend([dict(type='ToTensor'), dict(type='Normalize', **img_norm_cfg)])
-    test_pipeline.extend([dict(type='ToTensor'), dict(type='Normalize', **img_norm_cfg)])
-data = dict(
-    imgs_per_gpu=32,  # total 32x8=256
-    workers_per_gpu=4,
-    train=dict(
-        type=dataset_type,
-        data_source=dict(
-            list_file=data_train_list, root=data_train_root,
-            **data_source_cfg),
-        pipeline=train_pipeline,
-        prefetch=prefetch),
-    val=dict(
-        type=dataset_type,
-        data_source=dict(
-            list_file=data_test_list, root=data_test_root, **data_source_cfg),
-        pipeline=test_pipeline,
-        prefetch=prefetch))
-# additional hooks
-custom_hooks = [
-    dict(
-        type='ValidateHook',
-        dataset=data['val'],
-        initial=True,
-        interval=10,
-        imgs_per_gpu=32,
-        workers_per_gpu=4,
-        prefetch=prefetch,
-        img_norm_cfg=img_norm_cfg,
-        eval_param=dict(topk=(1, )))
-]
-# optimizer
-optimizer = dict(
-    type='SGD',
-    lr=0.01,
-    momentum=0.9,
-    weight_decay=0.0001,
-    paramwise_options=dict(norm_decay_mult=0.),
-    nesterov=True)
-# learning policy
-lr_config = dict(policy='step', step=[7, 14, 21])
-checkpoint_config = dict(interval=10)
-# runtime settings
-total_epochs = 28
--- a/configs/benchmarks/linear_classification/places205/r50_multihead_sobel.py
+++ b/configs/benchmarks/linear_classification/places205/r50_multihead_sobel.py
@ -1,89 +0,0 @@
-_base_ = '../../../base.py'
-# model settings
-model = dict(
-    type='Classification',
-    pretrained=None,
-    with_sobel=True,
-    backbone=dict(
-        type='ResNet',
-        depth=50,
-        in_channels=2,
-        out_indices=[0, 1, 2, 3, 4],  # 0: conv-1, x: stage-x
-        norm_cfg=dict(type='BN'),
-        frozen_stages=4),
-    head=dict(
-        type='MultiClsHead',
-        pool_type='specified',
-        in_indices=[0, 1, 2, 3, 4],
-        with_last_layer_unpool=False,
-        backbone='resnet50',
-        norm_cfg=dict(type='SyncBN', momentum=0.1, affine=False),
-        num_classes=205))
-# dataset settings
-data_source_cfg = dict(
-    type='Places205',
-    memcached=True,
-    mclient_path='/mnt/lustre/share/memcached_client')
-data_train_list = 'data/places205/meta/train_labeled.txt'
-data_train_root = 'data/places205/train'
-data_test_list = 'data/places205/meta/val_labeled.txt'
-data_test_root = 'data/places205/val'
-dataset_type = 'ClassificationDataset'
-img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
-train_pipeline = [
-    dict(type='Resize', size=256),
-    dict(type='CenterCrop', size=256),
-    dict(type='RandomCrop', size=224),
-    dict(type='RandomHorizontalFlip'),
-]
-test_pipeline = [
-    dict(type='Resize', size=256),
-    dict(type='CenterCrop', size=224),
-]
-# prefetch
-prefetch = False
-if not prefetch:
-    train_pipeline.extend([dict(type='ToTensor'), dict(type='Normalize', **img_norm_cfg)])
-    test_pipeline.extend([dict(type='ToTensor'), dict(type='Normalize', **img_norm_cfg)])
-data = dict(
-    imgs_per_gpu=32,  # total 32x8=256
-    workers_per_gpu=4,
-    train=dict(
-        type=dataset_type,
-        data_source=dict(
-            list_file=data_train_list, root=data_train_root,
-            **data_source_cfg),
-        pipeline=train_pipeline,
-        prefetch=prefetch),
-    val=dict(
-        type=dataset_type,
-        data_source=dict(
-            list_file=data_test_list, root=data_test_root, **data_source_cfg),
-        pipeline=test_pipeline,
-        prefetch=prefetch))
-# additional hooks
-custom_hooks = [
-    dict(
-        type='ValidateHook',
-        dataset=data['val'],
-        initial=True,
-        interval=10,
-        imgs_per_gpu=32,
-        workers_per_gpu=4,
-        prefetch=prefetch,
-        img_norm_cfg=img_norm_cfg,
-        eval_param=dict(topk=(1, )))
-]
-# optimizer
-optimizer = dict(
-    type='SGD',
-    lr=0.01,
-    momentum=0.9,
-    weight_decay=0.0001,
-    paramwise_options=dict(norm_decay_mult=0.),
-    nesterov=True)
-# learning policy
-lr_config = dict(policy='step', step=[7, 14, 21])
-checkpoint_config = dict(interval=10)
-# runtime settings
-total_epochs = 28
--- a/configs/benchmarks/semi_classification/imagenet_10percent/base.py
+++ b/configs/benchmarks/semi_classification/imagenet_10percent/base.py
@ -1,66 +0,0 @@
-_base_ = '../../../base.py'
-# model settings
-model = dict(
-    type='Classification',
-    pretrained=None,
-    backbone=dict(
-        type='ResNet',
-        depth=50,
-        out_indices=[4],  # 0: conv-1, x: stage-x
-        norm_cfg=dict(type='SyncBN')),
-    head=dict(
-        type='ClsHead', with_avg_pool=True, in_channels=2048,
-        num_classes=1000))
-# dataset settings
-data_source_cfg = dict(
-    type='ImageNet',
-    memcached=True,
-    mclient_path='/mnt/lustre/share/memcached_client')
-data_train_list = 'data/imagenet/meta/train_labeled_10percent.txt'
-data_train_root = 'data/imagenet/train'
-data_test_list = 'data/imagenet/meta/val_labeled.txt'
-data_test_root = 'data/imagenet/val'
-dataset_type = 'ClassificationDataset'
-img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
-train_pipeline = [
-    dict(type='RandomResizedCrop', size=224),
-    dict(type='RandomHorizontalFlip'),
-    dict(type='ToTensor'),
-    dict(type='Normalize', **img_norm_cfg),
-]
-test_pipeline = [
-    dict(type='Resize', size=256),
-    dict(type='CenterCrop', size=224),
-    dict(type='ToTensor'),
-    dict(type='Normalize', **img_norm_cfg),
-]
-data = dict(
-    imgs_per_gpu=64,  # total 256
-    workers_per_gpu=2,
-    train=dict(
-        type=dataset_type,
-        data_source=dict(
-            list_file=data_train_list, root=data_train_root,
-            **data_source_cfg),
-        pipeline=train_pipeline),
-    val=dict(
-        type=dataset_type,
-        data_source=dict(
-            list_file=data_test_list, root=data_test_root, **data_source_cfg),
-        pipeline=test_pipeline))
-# additional hooks
-custom_hooks = [
-    dict(
-        type='ValidateHook',
-        dataset=data['val'],
-        initial=False,
-        interval=20,
-        imgs_per_gpu=32,
-        workers_per_gpu=2,
-        eval_param=dict(topk=(1, 5)))
-]
-# learning policy
-lr_config = dict(policy='step', step=[12, 16], gamma=0.2)
-checkpoint_config = dict(interval=20)
-# runtime settings
-total_epochs = 20
--- a/configs/benchmarks/semi_classification/imagenet_10percent/r50_lr0_001_head1.py
+++ b/configs/benchmarks/semi_classification/imagenet_10percent/r50_lr0_001_head1.py
@ -1,4 +0,0 @@
-_base_ = 'base.py'
-# optimizer
-optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0001,
-                 paramwise_options={'\Ahead.': dict(lr_mult=1)})
--- a/configs/benchmarks/semi_classification/imagenet_10percent/r50_lr0_001_head10.py
+++ b/configs/benchmarks/semi_classification/imagenet_10percent/r50_lr0_001_head10.py
@ -1,4 +0,0 @@
-_base_ = 'base.py'
-# optimizer
-optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0001,
-                 paramwise_options={'\Ahead.': dict(lr_mult=10)})
--- a/configs/benchmarks/semi_classification/imagenet_10percent/r50_lr0_001_head100.py
+++ b/configs/benchmarks/semi_classification/imagenet_10percent/r50_lr0_001_head100.py
@ -1,4 +0,0 @@
-_base_ = 'base.py'
-# optimizer
-optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0001,
-                 paramwise_options={'\Ahead.': dict(lr_mult=100)})
--- a/configs/benchmarks/semi_classification/imagenet_10percent/r50_lr0_01_head1.py
+++ b/configs/benchmarks/semi_classification/imagenet_10percent/r50_lr0_01_head1.py
@ -1,4 +0,0 @@
-_base_ = 'base.py'
-# optimizer
-optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001,
-                 paramwise_options={'\Ahead.': dict(lr_mult=1)})
--- a/configs/benchmarks/semi_classification/imagenet_10percent/r50_lr0_01_head10.py
+++ b/configs/benchmarks/semi_classification/imagenet_10percent/r50_lr0_01_head10.py
@ -1,4 +0,0 @@
-_base_ = 'base.py'
-# optimizer
-optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001,
-                 paramwise_options={'\Ahead.': dict(lr_mult=10)})
--- a/configs/benchmarks/semi_classification/imagenet_10percent/r50_lr0_01_head100.py
+++ b/configs/benchmarks/semi_classification/imagenet_10percent/r50_lr0_01_head100.py
@ -1,4 +0,0 @@
-_base_ = 'base.py'
-# optimizer
-optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001,
-                 paramwise_options={'\Ahead.': dict(lr_mult=100)})
--- a/configs/benchmarks/semi_classification/imagenet_10percent/r50_lr0_01_head1_sobel.py
+++ b/configs/benchmarks/semi_classification/imagenet_10percent/r50_lr0_01_head1_sobel.py
@ -1,71 +0,0 @@
-_base_ = '../../../base.py'
-# model settings
-model = dict(
-    type='Classification',
-    pretrained=None,
-    with_sobel=True,
-    backbone=dict(
-        type='ResNet',
-        depth=50,
-        in_channels=2,
-        out_indices=[4],  # 0: conv-1, x: stage-x
-        norm_cfg=dict(type='SyncBN')),
-    head=dict(
-        type='ClsHead', with_avg_pool=True, in_channels=2048,
-        num_classes=1000))
-# dataset settings
-data_source_cfg = dict(
-    type='ImageNet',
-    memcached=True,
-    mclient_path='/mnt/lustre/share/memcached_client')
-data_train_list = 'data/imagenet/meta/train_labeled_10percent.txt'
-data_train_root = 'data/imagenet/train'
-data_test_list = 'data/imagenet/meta/val_labeled.txt'
-data_test_root = 'data/imagenet/val'
-dataset_type = 'ClassificationDataset'
-img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
-train_pipeline = [
-    dict(type='RandomResizedCrop', size=224),
-    dict(type='RandomHorizontalFlip'),
-    dict(type='ToTensor'),
-    dict(type='Normalize', **img_norm_cfg),
-]
-test_pipeline = [
-    dict(type='Resize', size=256),
-    dict(type='CenterCrop', size=224),
-    dict(type='ToTensor'),
-    dict(type='Normalize', **img_norm_cfg),
-]
-data = dict(
-    imgs_per_gpu=64,  # total 256
-    workers_per_gpu=2,
-    train=dict(
-        type=dataset_type,
-        data_source=dict(
-            list_file=data_train_list, root=data_train_root,
-            **data_source_cfg),
-        pipeline=train_pipeline),
-    val=dict(
-        type=dataset_type,
-        data_source=dict(
-            list_file=data_test_list, root=data_test_root, **data_source_cfg),
-        pipeline=test_pipeline))
-# additional hooks
-custom_hooks = [
-    dict(
-        type='ValidateHook',
-        dataset=data['val'],
-        initial=False,
-        interval=20,
-        imgs_per_gpu=32,
-        workers_per_gpu=2,
-        eval_param=dict(topk=(1, 5)))
-]
-# optimizer
-optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001,
-                 paramwise_options={'\Ahead.': dict(lr_mult=1)})
-# learning policy
-lr_config = dict(policy='step', step=[12, 16], gamma=0.2)
-checkpoint_config = dict(interval=20)
-# runtime settings
-total_epochs = 20
--- a/configs/benchmarks/semi_classification/imagenet_10percent/r50_lr0_1_head1.py
+++ b/configs/benchmarks/semi_classification/imagenet_10percent/r50_lr0_1_head1.py
@ -1,4 +0,0 @@
-_base_ = 'base.py'
-# optimizer
-optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001,
-                 paramwise_options={'\Ahead.': dict(lr_mult=1)})
--- a/configs/benchmarks/semi_classification/imagenet_10percent/r50_lr0_1_head10.py
+++ b/configs/benchmarks/semi_classification/imagenet_10percent/r50_lr0_1_head10.py
@ -1,4 +0,0 @@
-_base_ = 'base.py'
-# optimizer
-optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001,
-                 paramwise_options={'\Ahead.': dict(lr_mult=10)})
--- a/configs/benchmarks/semi_classification/imagenet_10percent/r50_lr0_1_head100.py
+++ b/configs/benchmarks/semi_classification/imagenet_10percent/r50_lr0_1_head100.py
@ -1,4 +0,0 @@
-_base_ = 'base.py'
-# optimizer
-optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001,
-                 paramwise_options={'\Ahead.': dict(lr_mult=100)})
--- a/configs/benchmarks/semi_classification/imagenet_1percent/base.py
+++ b/configs/benchmarks/semi_classification/imagenet_1percent/base.py
@ -1,72 +0,0 @@
-_base_ = '../../../base.py'
-# model settings
-model = dict(
-    type='Classification',
-    pretrained=None,
-    backbone=dict(
-        type='ResNet',
-        depth=50,
-        out_indices=[4],  # 0: conv-1, x: stage-x
-        norm_cfg=dict(type='SyncBN')),
-    head=dict(
-        type='ClsHead', with_avg_pool=True, in_channels=2048,
-        num_classes=1000))
-# dataset settings
-data_source_cfg = dict(
-    type='ImageNet',
-    memcached=True,
-    mclient_path='/mnt/lustre/share/memcached_client')
-data_train_list = 'data/imagenet/meta/train_labeled_1percent.txt'
-data_train_root = 'data/imagenet/train'
-data_test_list = 'data/imagenet/meta/val_labeled.txt'
-data_test_root = 'data/imagenet/val'
-dataset_type = 'ClassificationDataset'
-img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
-train_pipeline = [
-    dict(type='RandomResizedCrop', size=224),
-    dict(type='RandomHorizontalFlip'),
-    dict(type='ToTensor'),
-    dict(type='Normalize', **img_norm_cfg),
-]
-test_pipeline = [
-    dict(type='Resize', size=256),
-    dict(type='CenterCrop', size=224),
-    dict(type='ToTensor'),
-    dict(type='Normalize', **img_norm_cfg),
-]
-data = dict(
-    imgs_per_gpu=64,  # total 256
-    workers_per_gpu=2,
-    train=dict(
-        type=dataset_type,
-        data_source=dict(
-            list_file=data_train_list, root=data_train_root,
-            **data_source_cfg),
-        pipeline=train_pipeline),
-    val=dict(
-        type=dataset_type,
-        data_source=dict(
-            list_file=data_test_list, root=data_test_root, **data_source_cfg),
-        pipeline=test_pipeline))
-# additional hooks
-custom_hooks = [
-    dict(
-        type='ValidateHook',
-        dataset=data['val'],
-        initial=False,
-        interval=20,
-        imgs_per_gpu=32,
-        workers_per_gpu=2,
-        eval_param=dict(topk=(1, 5)))
-]
-# learning policy
-lr_config = dict(policy='step', step=[12, 16], gamma=0.2)
-checkpoint_config = dict(interval=20)
-log_config = dict(
-    interval=10,
-    hooks=[
-        dict(type='TextLoggerHook'),
-        dict(type='TensorboardLoggerHook')
-    ])
-# runtime settings
-total_epochs = 20
--- a/configs/benchmarks/semi_classification/imagenet_1percent/r50_lr0_001_head1.py
+++ b/configs/benchmarks/semi_classification/imagenet_1percent/r50_lr0_001_head1.py
@ -1,4 +0,0 @@
-_base_ = 'base.py'
-# optimizer
-optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0005,
-                 paramwise_options={'\Ahead.': dict(lr_mult=1)})
--- a/configs/benchmarks/semi_classification/imagenet_1percent/r50_lr0_001_head10.py
+++ b/configs/benchmarks/semi_classification/imagenet_1percent/r50_lr0_001_head10.py
@ -1,4 +0,0 @@
-_base_ = 'base.py'
-# optimizer
-optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0005,
-                 paramwise_options={'\Ahead.': dict(lr_mult=10)})
--- a/configs/benchmarks/semi_classification/imagenet_1percent/r50_lr0_001_head100.py
+++ b/configs/benchmarks/semi_classification/imagenet_1percent/r50_lr0_001_head100.py
@ -1,4 +0,0 @@
-_base_ = 'base.py'
-# optimizer
-optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0005,
-                 paramwise_options={'\Ahead.': dict(lr_mult=100)})
--- a/configs/benchmarks/semi_classification/imagenet_1percent/r50_lr0_01_head1.py
+++ b/configs/benchmarks/semi_classification/imagenet_1percent/r50_lr0_01_head1.py
@ -1,4 +0,0 @@
-_base_ = 'base.py'
-# optimizer
-optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005,
-                 paramwise_options={'\Ahead.': dict(lr_mult=1)})
--- a/configs/benchmarks/semi_classification/imagenet_1percent/r50_lr0_01_head10.py
+++ b/configs/benchmarks/semi_classification/imagenet_1percent/r50_lr0_01_head10.py
@ -1,4 +0,0 @@
-_base_ = 'base.py'
-# optimizer
-optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005,
-                 paramwise_options={'\Ahead.': dict(lr_mult=10)})
--- a/configs/benchmarks/semi_classification/imagenet_1percent/r50_lr0_01_head100.py
+++ b/configs/benchmarks/semi_classification/imagenet_1percent/r50_lr0_01_head100.py
@ -1,4 +0,0 @@
-_base_ = 'base.py'
-# optimizer
-optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005,
-                 paramwise_options={'\Ahead.': dict(lr_mult=100)})
--- a/configs/benchmarks/semi_classification/imagenet_1percent/r50_lr0_01_head1_sobel.py
+++ b/configs/benchmarks/semi_classification/imagenet_1percent/r50_lr0_01_head1_sobel.py
@ -1,77 +0,0 @@
-_base_ = '../../../base.py'
-# model settings
-model = dict(
-    type='Classification',
-    pretrained=None,
-    with_sobel=True,
-    backbone=dict(
-        type='ResNet',
-        depth=50,
-        in_channels=2,
-        out_indices=[4],  # 0: conv-1, x: stage-x
-        norm_cfg=dict(type='SyncBN')),
-    head=dict(
-        type='ClsHead', with_avg_pool=True, in_channels=2048,
-        num_classes=1000))
-# dataset settings
-data_source_cfg = dict(
-    type='ImageNet',
-    memcached=True,
-    mclient_path='/mnt/lustre/share/memcached_client')
-data_train_list = 'data/imagenet/meta/train_labeled_1percent.txt'
-data_train_root = 'data/imagenet/train'
-data_test_list = 'data/imagenet/meta/val_labeled.txt'
-data_test_root = 'data/imagenet/val'
-dataset_type = 'ClassificationDataset'
-img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
-train_pipeline = [
-    dict(type='RandomResizedCrop', size=224),
-    dict(type='RandomHorizontalFlip'),
-    dict(type='ToTensor'),
-    dict(type='Normalize', **img_norm_cfg),
-]
-test_pipeline = [
-    dict(type='Resize', size=256),
-    dict(type='CenterCrop', size=224),
-    dict(type='ToTensor'),
-    dict(type='Normalize', **img_norm_cfg),
-]
-data = dict(
-    imgs_per_gpu=64,  # total 256
-    workers_per_gpu=2,
-    train=dict(
-        type=dataset_type,
-        data_source=dict(
-            list_file=data_train_list, root=data_train_root,
-            **data_source_cfg),
-        pipeline=train_pipeline),
-    val=dict(
-        type=dataset_type,
-        data_source=dict(
-            list_file=data_test_list, root=data_test_root, **data_source_cfg),
-        pipeline=test_pipeline))
-# additional hooks
-custom_hooks = [
-    dict(
-        type='ValidateHook',
-        dataset=data['val'],
-        initial=False,
-        interval=20,
-        imgs_per_gpu=32,
-        workers_per_gpu=2,
-        eval_param=dict(topk=(1, 5)))
-]
-# optimizer
-optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005,
-                 paramwise_options={'\Ahead.': dict(lr_mult=1)})
-# learning policy
-lr_config = dict(policy='step', step=[12, 16], gamma=0.2)
-checkpoint_config = dict(interval=20)
-log_config = dict(
-    interval=10,
-    hooks=[
-        dict(type='TextLoggerHook'),
-        dict(type='TensorboardLoggerHook')
-    ])
-# runtime settings
-total_epochs = 20
--- a/configs/benchmarks/semi_classification/imagenet_1percent/r50_lr0_1_head1.py
+++ b/configs/benchmarks/semi_classification/imagenet_1percent/r50_lr0_1_head1.py
@ -1,4 +0,0 @@
-_base_ = 'base.py'
-# optimizer
-optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005,
-                 paramwise_options={'\Ahead.': dict(lr_mult=1)})
--- a/configs/benchmarks/semi_classification/imagenet_1percent/r50_lr0_1_head10.py
+++ b/configs/benchmarks/semi_classification/imagenet_1percent/r50_lr0_1_head10.py
@ -1,4 +0,0 @@
-_base_ = 'base.py'
-# optimizer
-optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005,
-                 paramwise_options={'\Ahead.': dict(lr_mult=10)})
--- a/configs/benchmarks/semi_classification/imagenet_1percent/r50_lr0_1_head100.py
+++ b/configs/benchmarks/semi_classification/imagenet_1percent/r50_lr0_1_head100.py
@ -1,4 +0,0 @@
-_base_ = 'base.py'
-# optimizer
-optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005,
-                 paramwise_options={'\Ahead.': dict(lr_mult=100)})
--- a/configs/classification/cifar10/r50.py
+++ b/configs/classification/cifar10/r50.py
@ -1,59 +0,0 @@
-_base_ = '../../base.py'
-# model settings
-model = dict(
-    type='Classification',
-    pretrained=None,
-    backbone=dict(
-        type='ResNet',
-        depth=50,
-        out_indices=[4],  # 4: stage-4
-        norm_cfg=dict(type='BN')),
-    head=dict(
-        type='ClsHead', with_avg_pool=True, in_channels=2048, num_classes=10))
-# dataset settings
-data_source_cfg = dict(type='Cifar10', root='data/cifar/')
-dataset_type = 'ClassificationDataset'
-img_norm_cfg = dict(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.201])
-train_pipeline = [
-    dict(type='RandomCrop', size=32, padding=4),
-    dict(type='RandomHorizontalFlip'),
-    dict(type='ToTensor'),
-    dict(type='Normalize', **img_norm_cfg),
-]
-test_pipeline = [
-    dict(type='ToTensor'),
-    dict(type='Normalize', **img_norm_cfg),
-]
-data = dict(
-    imgs_per_gpu=128,
-    workers_per_gpu=2,
-    train=dict(
-        type=dataset_type,
-        data_source=dict(split='train', **data_source_cfg),
-        pipeline=train_pipeline),
-    val=dict(
-        type=dataset_type,
-        data_source=dict(split='test', **data_source_cfg),
-        pipeline=test_pipeline),
-    test=dict(
-        type=dataset_type,
-        data_source=dict(split='test', **data_source_cfg),
-        pipeline=test_pipeline))
-# additional hooks
-custom_hooks = [
-    dict(
-        type='ValidateHook',
-        dataset=data['val'],
-        initial=True,
-        interval=10,
-        imgs_per_gpu=128,
-        workers_per_gpu=8,
-        eval_param=dict(topk=(1, 5)))
-]
-# optimizer
-optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005)
-# learning policy
-lr_config = dict(policy='step', step=[150, 250])
-checkpoint_config = dict(interval=50)
-# runtime settings
-total_epochs = 350
--- a/configs/classification/imagenet/r50.py
+++ b/configs/classification/imagenet/r50.py
@ -1,68 +0,0 @@
-_base_ = '../../base.py'
-# model settings
-model = dict(
-    type='Classification',
-    pretrained=None,
-    backbone=dict(
-        type='ResNet',
-        depth=50,
-        out_indices=[4],  # 0: conv-1, x: stage-x
-        norm_cfg=dict(type='SyncBN')),
-    head=dict(
-        type='ClsHead', with_avg_pool=True, in_channels=2048,
-        num_classes=1000))
-# dataset settings
-data_source_cfg = dict(
-    type='ImageNet',
-    memcached=True,
-    mclient_path='/mnt/lustre/share/memcached_client')
-data_train_list = 'data/imagenet/meta/train_labeled.txt'
-data_train_root = 'data/imagenet/train'
-data_test_list = 'data/imagenet/meta/val_labeled.txt'
-data_test_root = 'data/imagenet/val'
-dataset_type = 'ClassificationDataset'
-img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
-train_pipeline = [
-    dict(type='RandomResizedCrop', size=224),
-    dict(type='RandomHorizontalFlip'),
-    dict(type='ToTensor'),
-    dict(type='Normalize', **img_norm_cfg),
-]
-test_pipeline = [
-    dict(type='Resize', size=256),
-    dict(type='CenterCrop', size=224),
-    dict(type='ToTensor'),
-    dict(type='Normalize', **img_norm_cfg),
-]
-data = dict(
-    imgs_per_gpu=32,  # total 256
-    workers_per_gpu=2,
-    train=dict(
-        type=dataset_type,
-        data_source=dict(
-            list_file=data_train_list, root=data_train_root,
-            **data_source_cfg),
-        pipeline=train_pipeline),
-    val=dict(
-        type=dataset_type,
-        data_source=dict(
-            list_file=data_test_list, root=data_test_root, **data_source_cfg),
-        pipeline=test_pipeline))
-# additional hooks
-custom_hooks = [
-    dict(
-        type='ValidateHook',
-        dataset=data['val'],
-        initial=True,
-        interval=10,
-        imgs_per_gpu=32,
-        workers_per_gpu=2,
-        eval_param=dict(topk=(1, 5)))
-]
-# optimizer
-optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001)
-# learning policy
-lr_config = dict(policy='step', step=[30, 60, 90])
-checkpoint_config = dict(interval=10)
-# runtime settings
-total_epochs = 90
--- a/mmselfsup/init.py
+++ b/mmselfsup/init.py
@ -1,3 +1,60 @@
-from .version import __version__, short_version
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings

-__all__ = ['__version__', 'short_version']
+import mmcv
+from packaging.version import parse
+
+from .version import __version__
+
+
+def digit_version(version_str: str, length: int = 4):
+    """Convert a version string into a tuple of integers.
+
+    This method is usually used for comparing two versions. For pre-release
+    versions: alpha < beta < rc.
+
+    Args:
+        version_str (str): The version string.
+        length (int): The maximum number of version levels. Defaults to 4.
+
+    Returns:
+        tuple[int]: The version info in digits (integers).
+    """
+    version = parse(version_str)
+    assert version.release, f'failed to parse version {version_str}'
+    release = list(version.release)
+    release = release[:length]
+    if len(release) < length:
+        release = release + [0] * (length - len(release))
+    if version.is_prerelease:
+        mapping = {'a': -3, 'b': -2, 'rc': -1}
+        val = -4
+        # version.pre can be None
+        if version.pre:
+            if version.pre[0] not in mapping:
+                warnings.warn(f'unknown prerelease version {version.pre[0]}, '
+                              'version checking may go wrong')
+            else:
+                val = mapping[version.pre[0]]
+            release.extend([val, version.pre[-1]])
+        else:
+            release.extend([val, 0])
+
+    elif version.is_postrelease:
+        release.extend([1, version.post])
+    else:
+        release.extend([0, 0])
+    return tuple(release)
+
+
+mmcv_minimum_version = '1.3.16'
+mmcv_maximum_version = '1.5.0'
+mmcv_version = digit_version(mmcv.__version__)
+
+
+assert (mmcv_version >= digit_version(mmcv_minimum_version)
+        and mmcv_version <= digit_version(mmcv_maximum_version)), \
+    f'MMCV=={mmcv.__version__} is used but incompatible. ' \
+    f'Please install mmcv>={mmcv_minimum_version}, <={mmcv_maximum_version}.'
+
+__all__ = ['__version__', 'digit_version']
--- a/mmselfsup/core/init.py
+++ b/mmselfsup/core/init.py
@ -0,0 +1,3 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .hooks import *  # noqa: F401,F403
+from .optimizer import *  # noqa: F401, F403
--- a/mmselfsup/core/hooks/init.py
+++ b/mmselfsup/core/hooks/init.py
@ -0,0 +1,14 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .byol_hook import BYOLHook
+from .deepcluster_hook import DeepClusterHook
+from .densecl_hook import DenseCLHook
+from .odc_hook import ODCHook
+from .optimizer_hook import DistOptimizerHook, GradAccumFp16OptimizerHook
+from .simsiam_hook import SimSiamHook
+from .swav_hook import SwAVHook
+
+__all__ = [
+    'BYOLHook', 'DeepClusterHook', 'DenseCLHook', 'ODCHook',
+    'DistOptimizerHook', 'GradAccumFp16OptimizerHook', 'SimSiamHook',
+    'SwAVHook'
+]
--- a/mmselfsup/core/hooks/byol_hook.py
+++ b/mmselfsup/core/hooks/byol_hook.py
@ -1,21 +1,26 @@
+# Copyright (c) OpenMMLab. All rights reserved.
 from math import cos, pi
-from mmcv.runner import Hook
+
 from mmcv.parallel import is_module_wrapper
-
-from .registry import HOOKS
+from mmcv.runner import HOOKS, Hook


-@HOOKS.register_module
+@HOOKS.register_module()
 class BYOLHook(Hook):
    """Hook for BYOL.

    This hook includes momentum adjustment in BYOL following:
-        m = 1 - ( 1- m_0) * (cos(pi * k / K) + 1) / 2,
-        k: current step, K: total steps.
+
+    .. math::
+        m = 1 - (1 - m_0) * (cos(pi * k / K) + 1) / 2
+
+    where :math:`k` is the current step, :math:`K` is the total steps.

    Args:
        end_momentum (float): The final momentum coefficient
-            for the target network. Default: 1.
+            for the target network. Defaults to 1.
+        update_interval (int, optional): The momentum update interval of the
+            weights. Defaults to 1.
    """

    def __init__(self, end_momentum=1., update_interval=1, **kwargs):
@ -24,9 +29,9 @@ class BYOLHook(Hook):

    def before_train_iter(self, runner):
        assert hasattr(runner.model.module, 'momentum'), \
-            "The runner must have attribute \"momentum\" in BYOLHook."
+            "The runner must have attribute \"momentum\" in BYOL."
        assert hasattr(runner.model.module, 'base_momentum'), \
-            "The runner must have attribute \"base_momentum\" in BYOLHook."
+            "The runner must have attribute \"base_momentum\" in BYOL."
        if self.every_n_iters(runner, self.update_interval):
            cur_iter = runner.iter
            max_iter = runner.max_iters
--- a/mmselfsup/core/hooks/deepcluster_hook.py
+++ b/mmselfsup/core/hooks/deepcluster_hook.py
@ -1,32 +1,32 @@
+# Copyright (c) OpenMMLab. All rights reserved.
 import numpy as np
-
-from mmcv.runner import Hook
-
 import torch
 import torch.distributed as dist
+from mmcv.runner import HOOKS, Hook
+from mmcv.utils import print_log

-from openselfsup.third_party import clustering as _clustering
-from openselfsup.utils import print_log
-from .registry import HOOKS
-from .extractor import Extractor
+from mmselfsup.utils import Extractor
+from mmselfsup.utils import clustering as _clustering


-@HOOKS.register_module
+@HOOKS.register_module()
 class DeepClusterHook(Hook):
    """Hook for DeepCluster.

+    This hook includes the global clustering process in DC.
+
    Args:
        extractor (dict): Config dict for feature extraction.
        clustering (dict): Config dict that specifies the clustering algorithm.
        unif_sampling (bool): Whether to apply uniform sampling.
        reweight (bool): Whether to apply loss re-weighting.
        reweight_pow (float): The power of re-weighting.
-        init_memory (bool): Whether to initialize memory banks for ODC.
-            Default: False.
-        initial (bool): Whether to call the hook initially. Default: True.
-        interval (int): Frequency of epochs to call the hook. Default: 1.
-        dist_mode (bool): Use distributed training or not. Default: True.
-        data_loaders (DataLoader): A PyTorch dataloader. Default: None.
+        init_memory (bool): Whether to initialize memory banks used in ODC.
+            Defaults to False.
+        initial (bool): Whether to call the hook initially. Defaults to True.
+        interval (int): Frequency of epochs to call the hook. Defaults to 1.
+        dist_mode (bool): Use distributed training or not. Defaults to True.
+        data_loaders (DataLoader): A PyTorch dataloader. Defaults to None.
    """

    def __init__(
@ -76,9 +76,8 @@ class DeepClusterHook(Hook):
            clustering_algo.cluster(features, verbose=True)
            assert isinstance(clustering_algo.labels, np.ndarray)
            new_labels = clustering_algo.labels.astype(np.int64)
-            np.save(
-                "{}/cluster_epoch_{}.npy".format(runner.work_dir,
-                                                 runner.epoch), new_labels)
+            np.save(f'{runner.work_dir}/cluster_epoch_{runner.epoch}.npy',
+                    new_labels)
            self.evaluate(runner, new_labels)
        else:
            new_labels = np.zeros((len(self.data_loaders[0].dataset), ),
@ -103,7 +102,8 @@ class DeepClusterHook(Hook):
            runner.model.module.set_reweight(new_labels, self.reweight_pow)

        # step 5: randomize classifier
-        runner.model.module.head.init_weights(init_linear='normal')
+        runner.model.module.head._is_init = False
+        runner.model.module.head.init_weights()
        if self.dist_mode:
            for p in runner.model.module.head.state_dict().values():
                dist.broadcast(p, 0)
@ -113,12 +113,12 @@ class DeepClusterHook(Hook):
            runner.model.module.memory_bank.init_memory(features, new_labels)

    def evaluate(self, runner, new_labels):
-        hist = np.bincount(new_labels, minlength=self.clustering_cfg.k)
-        empty_cls = (hist == 0).sum()
-        minimal_cls_size, maximal_cls_size = hist.min(), hist.max()
+        histogram = np.bincount(new_labels, minlength=self.clustering_cfg.k)
+        empty_cls = (histogram == 0).sum()
+        minimal_cls_size, maximal_cls_size = histogram.min(), histogram.max()
        if runner.rank == 0:
            print_log(
-                "empty_num: {}\tmin_cluster: {}\tmax_cluster:{}".format(
-                    empty_cls.item(), minimal_cls_size.item(),
-                    maximal_cls_size.item()),
+                f'empty_num: {empty_cls.item()}\t'
+                f'min_cluster: {minimal_cls_size.item()}\t'
+                f'max_cluster:{maximal_cls_size.item()}',
                logger='root')
--- a/mmselfsup/core/hooks/densecl_hook.py
+++ b/mmselfsup/core/hooks/densecl_hook.py
@ -0,0 +1,32 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from mmcv.runner import HOOKS, Hook
+
+
+@HOOKS.register_module()
+class DenseCLHook(Hook):
+    """Hook for DenseCL.
+
+    This hook includes ``loss_lambda`` warmup in DenseCL.
+    Borrowed from the authors' code: `<https://github.com/WXinlong/DenseCL>`_.
+
+    Args:
+        start_iters (int, optional): The number of warmup iterations to set
+            ``loss_lambda=0``. Defaults to 1000.
+    """
+
+    def __init__(self, start_iters=1000, **kwargs):
+        self.start_iters = start_iters
+
+    def before_run(self, runner):
+        assert hasattr(runner.model.module, 'loss_lambda'), \
+            "The runner must have attribute \"loss_lambda\" in DenseCL."
+        self.loss_lambda = runner.model.module.loss_lambda
+
+    def before_train_iter(self, runner):
+        assert hasattr(runner.model.module, 'loss_lambda'), \
+            "The runner must have attribute \"loss_lambda\" in DenseCL."
+        cur_iter = runner.iter
+        if cur_iter >= self.start_iters:
+            runner.model.module.loss_lambda = self.loss_lambda
+        else:
+            runner.model.module.loss_lambda = 0.
--- a/mmselfsup/core/hooks/odc_hook.py
+++ b/mmselfsup/core/hooks/odc_hook.py
@ -1,15 +1,15 @@
+# Copyright (c) OpenMMLab. All rights reserved.
 import numpy as np
-
-from mmcv.runner import Hook
-
-from openselfsup.utils import print_log
-from .registry import HOOKS
+from mmcv.runner import HOOKS, Hook
+from mmcv.utils import print_log


-@HOOKS.register_module
+@HOOKS.register_module()
 class ODCHook(Hook):
    """Hook for ODC.

+    This hook includes the online clustering process in ODC.
+
    Args:
        centroids_update_interval (int): Frequency of iterations
            to update centroids.
@ -18,7 +18,7 @@ class ODCHook(Hook):
        evaluate_interval (int): Frequency of iterations to evaluate clusters.
        reweight (bool): Whether to perform loss re-weighting.
        reweight_pow (float): The power of re-weighting.
-        dist_mode (bool): Use distributed training or not. Default: True.
+        dist_mode (bool): Use distributed training or not. Defaults to True.
    """

    def __init__(self,
@ -28,7 +28,7 @@ class ODCHook(Hook):
                 reweight,
                 reweight_pow,
                 dist_mode=True):
-        assert dist_mode, "non-dist mode is not implemented"
+        assert dist_mode, 'non-dist mode is not implemented'
        self.centroids_update_interval = centroids_update_interval
        self.deal_with_small_clusters_interval = \
            deal_with_small_clusters_interval
@ -61,19 +61,17 @@ class ODCHook(Hook):
            new_labels = runner.model.module.memory_bank.label_bank
            if new_labels.is_cuda:
                new_labels = new_labels.cpu()
-            np.save(
-                "{}/cluster_epoch_{}.npy".format(runner.work_dir,
-                                                 runner.epoch),
-                new_labels.numpy())
+            np.save(f'{runner.work_dir}/cluster_epoch_{runner.epoch + 1}.npy',
+                    new_labels.numpy())

    def evaluate(self, runner, new_labels):
-        hist = np.bincount(
+        histogram = np.bincount(
            new_labels, minlength=runner.model.module.memory_bank.num_classes)
-        empty_cls = (hist == 0).sum()
-        minimal_cls_size, maximal_cls_size = hist.min(), hist.max()
+        empty_cls = (histogram == 0).sum()
+        minimal_cls_size, maximal_cls_size = histogram.min(), histogram.max()
        if runner.rank == 0:
            print_log(
-                "empty_num: {}\tmin_cluster: {}\tmax_cluster:{}".format(
-                    empty_cls.item(), minimal_cls_size.item(),
-                    maximal_cls_size.item()),
+                f'empty_num: {empty_cls.item()}\t'
+                f'min_cluster: {minimal_cls_size.item()}\t'
+                f'max_cluster:{maximal_cls_size.item()}',
                logger='root')
--- a/mmselfsup/core/hooks/optimizer_hook.py
+++ b/mmselfsup/core/hooks/optimizer_hook.py
@ -0,0 +1,261 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from mmcv.runner import (HOOKS, Fp16OptimizerHook, OptimizerHook,
+                         allreduce_grads)
+from mmcv.utils import TORCH_VERSION, _BatchNorm, digit_version
+
+
+@HOOKS.register_module()
+class DistOptimizerHook(OptimizerHook):
+    """Optimizer hook for distributed training.
+
+    This hook can accumulate gradients every n intervals and freeze some
+    layers for some iters at the beginning.
+
+    Args:
+        update_interval (int, optional): The update interval of the weights,
+            set > 1 to accumulate the grad. Defaults to 1.
+        grad_clip (dict, optional): Dict to config the value of grad clip.
+            E.g., grad_clip = dict(max_norm=10). Defaults to None.
+        coalesce (bool, optional): Whether allreduce parameters as a whole.
+            Defaults to True.
+        bucket_size_mb (int, optional): Size of bucket, the unit is MB.
+            Defaults to -1.
+        frozen_layers_cfg (dict, optional): Dict to config frozen layers.
+            The key-value pair is layer name and its frozen iters. If frozen,
+            the layer gradient would be set to None. Defaults to dict().
+    """
+
+    def __init__(self,
+                 update_interval=1,
+                 grad_clip=None,
+                 coalesce=True,
+                 bucket_size_mb=-1,
+                 frozen_layers_cfg=dict()):
+        self.grad_clip = grad_clip
+        self.coalesce = coalesce
+        self.bucket_size_mb = bucket_size_mb
+        self.update_interval = update_interval
+        self.frozen_layers_cfg = frozen_layers_cfg
+        self.initialized = False
+
+    def has_batch_norm(self, module):
+        if isinstance(module, _BatchNorm):
+            return True
+        for m in module.children():
+            if self.has_batch_norm(m):
+                return True
+        return False
+
+    def _init(self, runner):
+        if runner.iter % self.update_interval != 0:
+            runner.logger.warning(
+                'Resume iter number is not divisible by update_interval in '
+                'GradientCumulativeOptimizerHook, which means the gradient of '
+                'some iters is lost and the result may be influenced slightly.'
+            )
+
+        if self.has_batch_norm(runner.model) and self.update_interval > 1:
+            runner.logger.warning(
+                'GradientCumulativeOptimizerHook may slightly decrease '
+                'performance if the model has BatchNorm layers.')
+
+        residual_iters = runner.max_iters
+
+        self.divisible_iters = (
+            residual_iters // self.update_interval * self.update_interval)
+        self.remainder_iters = residual_iters - self.divisible_iters
+
+        self.initialized = True
+
+    def before_run(self, runner):
+        runner.optimizer.zero_grad()
+
+    def after_train_iter(self, runner):
+        # In some cases, MMCV's GradientCumulativeOptimizerHook will
+        # cause the loss_factor to be zero and we fix this bug in our
+        # implementation.
+
+        if not self.initialized:
+            self._init(runner)
+
+        if runner.iter < self.divisible_iters:
+            loss_factor = self.update_interval
+        else:
+            loss_factor = self.remainder_iters
+
+        runner.outputs['loss'] /= loss_factor
+        runner.outputs['loss'].backward()
+
+        if (self.every_n_iters(runner, self.update_interval)
+                or self.is_last_iter(runner)):
+
+            # cancel gradient of certain layer for n iters
+            # according to frozen_layers_cfg dict
+            for layer, iters in self.frozen_layers_cfg.items():
+                if runner.iter < iters:
+                    for name, p in runner.model.module.named_parameters():
+                        if layer in name:
+                            p.grad = None
+
+            if self.grad_clip is not None:
+                grad_norm = self.clip_grads(runner.model.parameters())
+                if grad_norm is not None:
+                    # Add grad norm to the logger
+                    runner.log_buffer.update({'grad_norm': float(grad_norm)},
+                                             runner.outputs['num_samples'])
+
+            runner.optimizer.step()
+            runner.optimizer.zero_grad()
+
+
+if (TORCH_VERSION != 'parrots'
+        and digit_version(TORCH_VERSION) >= digit_version('1.6.0')):
+
+    @HOOKS.register_module()
+    class GradAccumFp16OptimizerHook(Fp16OptimizerHook):
+        """Fp16 optimizer hook (using PyTorch's implementation).
+
+        This hook can accumulate gradients every n intervals and freeze some
+        layers for some iters at the beginning.
+        If you are using PyTorch >= 1.6, torch.cuda.amp is used as the backend,
+        to take care of the optimization procedure.
+
+        Args:
+            update_interval (int, optional): The update interval of the
+                weights, set > 1 to accumulate the grad. Defaults to 1.
+            frozen_layers_cfg (dict, optional): Dict to config frozen layers.
+                The key-value pair is layer name and its frozen iters. If
+                frozen, the layer gradient would be set to None.
+                Defaults to dict().
+        """
+
+        def __init__(self,
+                     update_interval=1,
+                     frozen_layers_cfg=dict(),
+                     **kwargs):
+            super(GradAccumFp16OptimizerHook, self).__init__(**kwargs)
+            self.update_interval = update_interval
+            self.frozen_layers_cfg = frozen_layers_cfg
+
+        def after_train_iter(self, runner):
+            runner.outputs['loss'] /= self.update_interval
+            self.loss_scaler.scale(runner.outputs['loss']).backward()
+
+            if self.every_n_iters(runner, self.update_interval):
+
+                # cancel gradient of certain layer for n iters
+                # according to frozen_layers_cfg dict
+                for layer, iters in self.frozen_layers_cfg.items():
+                    if runner.iter < iters:
+                        for name, p in runner.model.module.named_parameters():
+                            if layer in name:
+                                p.grad = None
+
+                # copy fp16 grads in the model to fp32 params in the optimizer
+                self.loss_scaler.unscale_(runner.optimizer)
+
+                if self.grad_clip is not None:
+                    grad_norm = self.clip_grads(runner.model.parameters())
+                    if grad_norm is not None:
+                        # Add grad norm to the logger
+                        runner.log_buffer.update(
+                            {'grad_norm': float(grad_norm)},
+                            runner.outputs['num_samples'])
+
+                # backward and update scaler
+                self.loss_scaler.step(runner.optimizer)
+                self.loss_scaler.update(self._scale_update_param)
+
+                # save state_dict of loss_scaler
+                runner.meta.setdefault(
+                    'fp16', {})['loss_scaler'] = self.loss_scaler.state_dict()
+
+                # clear grads
+                runner.model.zero_grad()
+                runner.optimizer.zero_grad()
+
+else:
+
+    @HOOKS.register_module()
+    class GradAccumFp16OptimizerHook(Fp16OptimizerHook):
+        """Fp16 optimizer hook (using mmcv's implementation).
+
+        This hook can accumulate gradients every n intervals and freeze some
+        layers for some iters at the beginning.
+
+        Args:
+            update_interval (int, optional): The update interval of the
+                weights, set > 1 to accumulate the grad. Defaults to 1.
+            frozen_layers_cfg (dict, optional): Dict to config frozen layers.
+                The key-value pair is layer name and its frozen iters. If
+                frozen, the layer gradient would be set to None.
+                Defaults to dict().
+        """
+
+        def __init__(self,
+                     update_interval=1,
+                     frozen_layers_cfg=dict(),
+                     **kwargs):
+            super(GradAccumFp16OptimizerHook, self).__init__(**kwargs)
+            self.update_interval = update_interval
+            self.frozen_layers_cfg = frozen_layers_cfg
+
+        def after_train_iter(self, runner):
+            runner.outputs['loss'] /= self.update_interval
+
+            # scale the loss value
+            scaled_loss = runner.outputs['loss'] * self.loss_scaler.loss_scale
+            scaled_loss.backward()
+
+            if self.every_n_iters(runner, self.update_interval):
+
+                # cancel gradient of certain layer for n iters
+                # according to frozen_layers_cfg dict
+                for layer, iters in self.frozen_layers_cfg.items():
+                    if runner.iter < iters:
+                        for name, p in runner.model.module.named_parameters():
+                            if layer in name:
+                                p.grad = None
+
+                # copy fp16 grads in the model to fp32 params in the optimizer
+                fp32_weights = []
+                for param_group in runner.optimizer.param_groups:
+                    fp32_weights += param_group['params']
+                self.copy_grads_to_fp32(runner.model, fp32_weights)
+                # allreduce grads
+                if self.distributed:
+                    allreduce_grads(fp32_weights, self.coalesce,
+                                    self.bucket_size_mb)
+
+                has_overflow = self.loss_scaler.has_overflow(fp32_weights)
+                # if has overflow, skip this iteration
+                if not has_overflow:
+                    # scale the gradients back
+                    for param in fp32_weights:
+                        if param.grad is not None:
+                            param.grad.div_(self.loss_scaler.loss_scale)
+                    if self.grad_clip is not None:
+                        grad_norm = self.clip_grads(fp32_weights)
+                        if grad_norm is not None:
+                            # Add grad norm to the logger
+                            runner.log_buffer.update(
+                                {'grad_norm': float(grad_norm)},
+                                runner.outputs['num_samples'])
+                    # update fp32 params
+                    runner.optimizer.step()
+                    # copy fp32 params to the fp16 model
+                    self.copy_params_to_fp16(runner.model, fp32_weights)
+                else:
+                    runner.logger.warning(
+                        'Check overflow, downscale loss scale '
+                        f'to {self.loss_scaler.cur_scale}')
+
+                self.loss_scaler.update_scale(has_overflow)
+
+                # save state_dict of loss_scaler
+                runner.meta.setdefault(
+                    'fp16', {})['loss_scaler'] = self.loss_scaler.state_dict()
+
+                # clear grads
+                runner.model.zero_grad()
+                runner.optimizer.zero_grad()
--- a/mmselfsup/core/hooks/simsiam_hook.py
+++ b/mmselfsup/core/hooks/simsiam_hook.py
@ -0,0 +1,37 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from mmcv.runner import HOOKS, Hook
+
+
+@HOOKS.register_module()
+class SimSiamHook(Hook):
+    """Hook for SimSiam.
+
+    This hook is for SimSiam to fix learning rate of predictor.
+
+    Args:
+        fix_pred_lr (bool): whether to fix the lr of predictor or not.
+        lr (float): the value of fixed lr.
+        adjust_by_epoch (bool, optional): whether to set lr by epoch or iter.
+            Defaults to True.
+    """
+
+    def __init__(self, fix_pred_lr, lr, adjust_by_epoch=True, **kwargs):
+        self.fix_pred_lr = fix_pred_lr
+        self.lr = lr
+        self.adjust_by_epoch = adjust_by_epoch
+
+    def before_train_iter(self, runner):
+        if self.adjust_by_epoch:
+            return
+        else:
+            if self.fix_pred_lr:
+                for param_group in runner.optimizer.param_groups:
+                    if 'fix_lr' in param_group and param_group['fix_lr']:
+                        param_group['lr'] = self.lr
+
+    def before_train_epoch(self, runner):
+        """fix lr of predictor."""
+        if self.fix_pred_lr:
+            for param_group in runner.optimizer.param_groups:
+                if 'fix_lr' in param_group and param_group['fix_lr']:
+                    param_group['lr'] = self.lr
--- a/mmselfsup/core/hooks/swav_hook.py
+++ b/mmselfsup/core/hooks/swav_hook.py
@ -0,0 +1,81 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+
+import torch
+import torch.distributed as dist
+from mmcv.runner import HOOKS, Hook
+
+
+@HOOKS.register_module()
+class SwAVHook(Hook):
+    """Hook for SwAV.
+
+    This hook builds the queue in SwAV according to ``epoch_queue_starts``.
+    The queue will be saved in ``runner.work_dir`` or loaded at start epoch
+    if the path folder has queues saved before.
+
+    Args:
+        batch_size (int): the batch size per GPU for computing.
+        epoch_queue_starts (int, optional): from this epoch, starts to use the
+            queue. Defaults to 15.
+        crops_for_assign (list[int], optional): list of crops id used for
+            computing assignments. Defaults to [0, 1].
+        feat_dim (int, optional): feature dimension of output vector.
+            Defaults to 128.
+        queue_length (int, optional): length of the queue (0 for no queue).
+            Defaults to 0.
+        interval (int, optional): the interval to save the queue.
+            Defaults to 1.
+    """
+
+    def __init__(self,
+                 batch_size,
+                 epoch_queue_starts=15,
+                 crops_for_assign=[0, 1],
+                 feat_dim=128,
+                 queue_length=0,
+                 interval=1,
+                 **kwargs):
+        self.batch_size = batch_size * dist.get_world_size()\
+            if dist.is_initialized() else batch_size
+        self.epoch_queue_starts = epoch_queue_starts
+        self.crops_for_assign = crops_for_assign
+        self.feat_dim = feat_dim
+        self.queue_length = queue_length
+        self.interval = interval
+        self.queue = None
+
+    def before_run(self, runner):
+        if dist.is_initialized():
+            self.queue_path = osp.join(runner.work_dir,
+                                       'queue' + str(dist.get_rank()) + '.pth')
+        else:
+            self.queue_path = osp.join(runner.work_dir, 'queue.pth')
+        # build the queue
+        if osp.isfile(self.queue_path):
+            self.queue = torch.load(self.queue_path)['queue']
+            runner.model.module.head.queue = self.queue
+        # the queue needs to be divisible by the batch size
+        self.queue_length -= self.queue_length % self.batch_size
+
+    def before_train_epoch(self, runner):
+        # optionally starts a queue
+        if self.queue_length > 0 \
+            and runner.epoch >= self.epoch_queue_starts \
+                and self.queue is None:
+            self.queue = torch.zeros(
+                len(self.crops_for_assign),
+                self.queue_length // runner.world_size,
+                self.feat_dim,
+            ).cuda()
+
+        # set the boolean type of use_the_queue
+        runner.model.module.head.queue = self.queue
+        runner.model.module.head.use_queue = False
+
+    def after_train_epoch(self, runner):
+        self.queue = runner.model.module.head.queue
+
+        if self.queue is not None and self.every_n_epochs(
+                runner, self.interval):
+            torch.save({'queue': self.queue}, self.queue_path)
--- a/mmselfsup/core/optimizer/init.py
+++ b/mmselfsup/core/optimizer/init.py
@ -0,0 +1,6 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .builder import build_optimizer
+from .constructor import DefaultOptimizerConstructor
+from .optimizers import LARS
+
+__all__ = ['LARS', 'build_optimizer', 'DefaultOptimizerConstructor']
--- a/mmselfsup/core/optimizer/builder.py
+++ b/mmselfsup/core/optimizer/builder.py
@ -0,0 +1,47 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+
+from mmcv.runner.optimizer.builder import build_optimizer_constructor
+
+
+def build_optimizer(model, optimizer_cfg):
+    """Build optimizer from configs.
+
+    Args:
+        model (:obj:`nn.Module`): The model with parameters to be optimized.
+        optimizer_cfg (dict): The config dict of the optimizer.
+            Positional fields are:
+                - type: class name of the optimizer.
+                - lr: base learning rate.
+            Optional fields are:
+                - any arguments of the corresponding optimizer type, e.g.,
+                  weight_decay, momentum, etc.
+                - paramwise_options: a dict with regular expression as keys
+                  to match parameter names and a dict containing options as
+                  values. Options include 6 fields: lr, lr_mult, momentum,
+                  momentum_mult, weight_decay, weight_decay_mult.
+
+    Returns:
+        torch.optim.Optimizer: The initialized optimizer.
+
+    Example:
+        >>> model = torch.nn.modules.Conv1d(1, 1, 1)
+        >>> paramwise_options = {
+        >>>     '(bn|gn)(\\d+)?.(weight|bias)': dict(weight_decay_mult=0.1),
+        >>>     '\\Ahead.': dict(lr_mult=10, momentum=0)}
+        >>> optimizer_cfg = dict(type='SGD', lr=0.01, momentum=0.9,
+        >>>                      weight_decay=0.0001,
+        >>>                      paramwise_options=paramwise_options)
+        >>> optimizer = build_optimizer(model, optimizer_cfg)
+    """
+    optimizer_cfg = copy.deepcopy(optimizer_cfg)
+    constructor_type = optimizer_cfg.pop('constructor',
+                                         'DefaultOptimizerConstructor')
+    paramwise_cfg = optimizer_cfg.pop('paramwise_options', None)
+    optim_constructor = build_optimizer_constructor(
+        dict(
+            type=constructor_type,
+            optimizer_cfg=optimizer_cfg,
+            paramwise_cfg=paramwise_cfg))
+    optimizer = optim_constructor(model)
+    return optimizer
--- a/mmselfsup/core/optimizer/constructor.py
+++ b/mmselfsup/core/optimizer/constructor.py
@ -0,0 +1,81 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import re
+
+import torch.distributed as dist
+from mmcv.runner.optimizer.builder import OPTIMIZER_BUILDERS, OPTIMIZERS
+from mmcv.utils import build_from_cfg, print_log
+
+
+@OPTIMIZER_BUILDERS.register_module(force=True)
+class DefaultOptimizerConstructor:
+    """Rewrote default constructor for optimizers. By default each parameter
+    share the same optimizer settings, and we provide an argument
+    ``paramwise_cfg`` to specify parameter-wise settings. It is a dict and may
+    contain the following fields:
+
+    Args:
+        model (:obj:`nn.Module`): The model with parameters to be optimized.
+        optimizer_cfg (dict): The config dict of the optimizer.
+            Positional fields are
+                - `type`: class name of the optimizer.
+            Optional fields are
+                - any arguments of the corresponding optimizer type, e.g.,
+                  lr, weight_decay, momentum, etc.
+        paramwise_cfg (dict, optional): Parameter-wise options.
+            Defaults to None
+
+    Example 1:
+        >>> model = torch.nn.modules.Conv1d(1, 1, 1)
+        >>> optimizer_cfg = dict(type='SGD', lr=0.01, momentum=0.9,
+        >>>                      weight_decay=0.0001)
+        >>> paramwise_cfg = dict('bias': dict(weight_decay=0., \
+                                 lars_exclude=True))
+        >>> optim_builder = DefaultOptimizerConstructor(
+        >>>     optimizer_cfg, paramwise_cfg)
+        >>> optimizer = optim_builder(model)
+    """
+
+    def __init__(self, optimizer_cfg, paramwise_cfg=None):
+        if not isinstance(optimizer_cfg, dict):
+            raise TypeError('optimizer_cfg should be a dict',
+                            f'but got {type(optimizer_cfg)}')
+        self.optimizer_cfg = optimizer_cfg
+        self.paramwise_cfg = {} if paramwise_cfg is None else paramwise_cfg
+
+    def __call__(self, model):
+        if hasattr(model, 'module'):
+            model = model.module
+        optimizer_cfg = self.optimizer_cfg.copy()
+        paramwise_options = self.paramwise_cfg
+        # if no paramwise option is specified, just use the global setting
+        if paramwise_options is None:
+            optimizer_cfg['params'] = model.parameters()
+            return build_from_cfg(optimizer_cfg, OPTIMIZERS)
+        else:
+            assert isinstance(paramwise_options, dict)
+            params = []
+            for name, param in model.named_parameters():
+                param_group = {'params': [param]}
+                if not param.requires_grad:
+                    params.append(param_group)
+                    continue
+
+                for regexp, options in paramwise_options.items():
+                    if re.search(regexp, name):
+                        for key, value in options.items():
+                            if key.endswith('_mult'):  # is a multiplier
+                                key = key[:-5]
+                                assert key in optimizer_cfg, \
+                                    f'{key} not in optimizer_cfg'
+                                value = optimizer_cfg[key] * value
+                            param_group[key] = value
+                            if not dist.is_initialized() or \
+                                    dist.get_rank() == 0:
+                                print_log(f'paramwise_options -- \
+                                    {name}: {key}={value}')
+
+                # otherwise use the global settings
+                params.append(param_group)
+
+            optimizer_cfg['params'] = params
+            return build_from_cfg(optimizer_cfg, OPTIMIZERS)
--- a/mmselfsup/core/optimizer/optimizers.py
+++ b/mmselfsup/core/optimizer/optimizers.py
@ -1,25 +1,31 @@
+# Copyright (c) OpenMMLab. All rights reserved.
 import torch
+from mmcv.runner.optimizer.builder import OPTIMIZERS
+from torch.optim import *  # noqa: F401,F403
 from torch.optim.optimizer import Optimizer, required
-from torch.optim import *


+@OPTIMIZERS.register_module()
 class LARS(Optimizer):
-    r"""Implements layer-wise adaptive rate scaling for SGD.
+    """Implements layer-wise adaptive rate scaling for SGD.

    Args:
-        params (iterable): iterable of parameters to optimize or dicts defining
-            parameter groups
-        lr (float): base learning rate (\gamma_0)
-        momentum (float, optional): momentum factor (default: 0) ("m")
-        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
-            ("\beta")
-        dampening (float, optional): dampening for momentum (default: 0)
-        eta (float, optional): LARS coefficient
-        nesterov (bool, optional): enables Nesterov momentum (default: False)
+        params (iterable): Iterable of parameters to optimize or dicts defining
+            parameter groups.
+        lr (float): Base learning rate.
+        momentum (float, optional): Momentum factor. Defaults to 0 ('m')
+        weight_decay (float, optional): Weight decay (L2 penalty).
+            Defaults to 0. ('beta')
+        dampening (float, optional): Dampening for momentum. Defaults to 0.
+        eta (float, optional): LARS coefficient. Defaults to 0.001.
+        nesterov (bool, optional): Enables Nesterov momentum.
+            Defaults to False.
+        eps (float, optional): A small number to avoid dviding zero.
+            Defaults to 1e-8.

    Based on Algorithm 1 of the following paper by You, Gitman, and Ginsburg.
-    Large Batch Training of Convolutional Networks:
-        https://arxiv.org/abs/1708.03888
+    `Large Batch Training of Convolutional Networks:
+        <https://arxiv.org/abs/1708.03888>`_.

    Example:
        >>> optimizer = LARS(model.parameters(), lr=0.1, momentum=0.9,
@ -33,26 +39,32 @@ class LARS(Optimizer):
                 params,
                 lr=required,
                 momentum=0,
-                 dampening=0,
                 weight_decay=0,
+                 dampening=0,
                 eta=0.001,
-                 nesterov=False):
+                 nesterov=False,
+                 eps=1e-8):
        if lr is not required and lr < 0.0:
-            raise ValueError("Invalid learning rate: {}".format(lr))
+            raise ValueError(f'Invalid learning rate: {lr}')
        if momentum < 0.0:
-            raise ValueError("Invalid momentum value: {}".format(momentum))
+            raise ValueError(f'Invalid momentum value: {momentum}')
        if weight_decay < 0.0:
-            raise ValueError(
-                "Invalid weight_decay value: {}".format(weight_decay))
+            raise ValueError(f'Invalid weight_decay value: {weight_decay}')
        if eta < 0.0:
-            raise ValueError("Invalid LARS coefficient value: {}".format(eta))
+            raise ValueError(f'Invalid LARS coefficient value: {eta}')

        defaults = dict(
-            lr=lr, momentum=momentum, dampening=dampening,
-            weight_decay=weight_decay, nesterov=nesterov, eta=eta)
+            lr=lr,
+            momentum=momentum,
+            dampening=dampening,
+            weight_decay=weight_decay,
+            nesterov=nesterov,
+            eta=eta)
        if nesterov and (momentum <= 0 or dampening != 0):
-            raise ValueError("Nesterov momentum requires a momentum and zero dampening")
+            raise ValueError(
+                'Nesterov momentum requires a momentum and zero dampening')

+        self.eps = eps
        super(LARS, self).__init__(params, defaults)

    def __setstate__(self, state):
@ -93,9 +105,12 @@ class LARS(Optimizer):
                else:
                    weight_norm = torch.norm(p).item()
                    grad_norm = torch.norm(d_p).item()
-                    # Compute local learning rate for this layer
-                    local_lr = eta * weight_norm / \
-                        (grad_norm + weight_decay * weight_norm)
+                    if weight_norm != 0 and grad_norm != 0:
+                        # Compute local learning rate for this layer
+                        local_lr = eta * weight_norm / \
+                            (grad_norm + weight_decay * weight_norm + self.eps)
+                    else:
+                        local_lr = 1.

                actual_lr = local_lr * lr
                d_p = d_p.add(p, alpha=weight_decay).mul(actual_lr)
--- a/mmselfsup/hooks/init.py
+++ b/mmselfsup/hooks/init.py
@ -1,8 +0,0 @@
-from .builder import build_hook
-from .byol_hook import BYOLHook
-from .deepcluster_hook import DeepClusterHook
-from .odc_hook import ODCHook
-from .optimizer_hook import DistOptimizerHook
-from .extractor import Extractor
-from .validate_hook import ValidateHook
-from .registry import HOOKS
--- a/mmselfsup/hooks/builder.py
+++ b/mmselfsup/hooks/builder.py
@ -1,7 +0,0 @@
-from openselfsup.utils import build_from_cfg
-
-from .registry import HOOKS
-
-
-def build_hook(cfg, default_args=None):
-    return build_from_cfg(cfg, HOOKS, default_args)
--- a/mmselfsup/hooks/extractor.py
+++ b/mmselfsup/hooks/extractor.py
@ -1,61 +0,0 @@
-import torch.nn as nn
-from torch.utils.data import Dataset
-
-from openselfsup.utils import nondist_forward_collect, dist_forward_collect
-
-
-class Extractor(object):
-    """Feature extractor.
-
-    Args:
-        dataset (Dataset | dict): A PyTorch dataset or dict that indicates
-            the dataset.
-        imgs_per_gpu (int): Number of images on each GPU, i.e., batch size of
-            each GPU.
-        workers_per_gpu (int): How many subprocesses to use for data loading
-            for each GPU.
-        dist_mode (bool): Use distributed extraction or not. Default: False.
-    """
-
-    def __init__(self,
-                 dataset,
-                 imgs_per_gpu,
-                 workers_per_gpu,
-                 dist_mode=False):
-        from openselfsup import datasets
-        if isinstance(dataset, Dataset):
-            self.dataset = dataset
-        elif isinstance(dataset, dict):
-            self.dataset = datasets.build_dataset(dataset)
-        else:
-            raise TypeError(
-                'dataset must be a Dataset object or a dict, not {}'.format(
-                    type(dataset)))
-        self.data_loader = datasets.build_dataloader(
-            self.dataset,
-            imgs_per_gpu,
-            workers_per_gpu,
-            dist=dist_mode,
-            shuffle=False)
-        self.dist_mode = dist_mode
-        self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
-
-    def _forward_func(self, runner, **x):
-        backbone_feat = runner.model(mode='extract', **x)
-        last_layer_feat = runner.model.module.neck([backbone_feat[-1]])[0]
-        last_layer_feat = last_layer_feat.view(last_layer_feat.size(0), -1)
-        return dict(feature=last_layer_feat.cpu())
-
-    def __call__(self, runner):
-        func = lambda **x: self._forward_func(runner, **x)
-        if self.dist_mode:
-            feats = dist_forward_collect(
-                func,
-                self.data_loader,
-                runner.rank,
-                len(self.dataset),
-                ret_rank=-1)['feature']  # NxD
-        else:
-            feats = nondist_forward_collect(func, self.data_loader,
-                                            len(self.dataset))['feature']
-        return feats
--- a/mmselfsup/hooks/optimizer_hook.py
+++ b/mmselfsup/hooks/optimizer_hook.py
@ -1,31 +0,0 @@
-from mmcv.runner import OptimizerHook
-try:
-    import apex
-except:
-    print('apex is not installed')
-
-class DistOptimizerHook(OptimizerHook):
-    """Optimizer hook for distributed training."""
-
-    def __init__(self, update_interval=1, grad_clip=None, coalesce=True, bucket_size_mb=-1, use_fp16=False):
-        self.grad_clip = grad_clip
-        self.coalesce = coalesce
-        self.bucket_size_mb = bucket_size_mb
-        self.update_interval = update_interval
-        self.use_fp16 = use_fp16
-
-    def before_run(self, runner):
-        runner.optimizer.zero_grad()
-
-    def after_train_iter(self, runner):
-        runner.outputs['loss'] /= self.update_interval
-        if self.use_fp16:
-            with apex.amp.scale_loss(runner.outputs['loss'], runner.optimizer) as scaled_loss:
-                scaled_loss.backward()
-        else:
-            runner.outputs['loss'].backward()
-        if self.every_n_iters(runner, self.update_interval):
-            if self.grad_clip is not None:
-                self.clip_grads(runner.model.parameters())
-            runner.optimizer.step()
-            runner.optimizer.zero_grad()
--- a/mmselfsup/hooks/registry.py
+++ b/mmselfsup/hooks/registry.py
@ -1,3 +0,0 @@
-from openselfsup.utils import Registry
-
-HOOKS = Registry('hook')
--- a/mmselfsup/hooks/validate_hook.py
+++ b/mmselfsup/hooks/validate_hook.py
@ -1,86 +0,0 @@
-from mmcv.runner import Hook
-
-import torch
-from torch.utils.data import Dataset
-
-from openselfsup.utils import nondist_forward_collect, dist_forward_collect
-from .registry import HOOKS
-
-
-@HOOKS.register_module
-class ValidateHook(Hook):
-    """Validation hook.
-
-    Args:
-        dataset (Dataset | dict): A PyTorch dataset or dict that indicates
-            the dataset.
-        dist_mode (bool): Use distributed evaluation or not. Default: True.
-        initial (bool): Whether to evaluate before the training starts.
-            Default: True.
-        interval (int): Evaluation interval (by epochs). Default: 1.
-        **eval_kwargs: Evaluation arguments fed into the evaluate function of
-            the dataset.
-    """
-
-    def __init__(self,
-                 dataset,
-                 dist_mode=True,
-                 initial=True,
-                 interval=1,
-                 **eval_kwargs):
-        from openselfsup import datasets
-        if isinstance(dataset, Dataset):
-            self.dataset = dataset
-        elif isinstance(dataset, dict):
-            self.dataset = datasets.build_dataset(dataset)
-        else:
-            raise TypeError(
-                'dataset must be a Dataset object or a dict, not {}'.format(
-                    type(dataset)))
-        self.data_loader = datasets.build_dataloader(
-            self.dataset,
-            eval_kwargs['imgs_per_gpu'],
-            eval_kwargs['workers_per_gpu'],
-            dist=dist_mode,
-            shuffle=False,
-            prefetch=eval_kwargs.get('prefetch', False),
-            img_norm_cfg=eval_kwargs.get('img_norm_cfg', dict()),
-        )
-        self.dist_mode = dist_mode
-        self.initial = initial
-        self.interval = interval
-        self.eval_kwargs = eval_kwargs
-
-    def before_run(self, runner):
-        if self.initial:
-            self._run_validate(runner)
-
-    def after_train_epoch(self, runner):
-        if not self.every_n_epochs(runner, self.interval):
-            return
-        self._run_validate(runner)
-
-    def _run_validate(self, runner):
-        runner.model.eval()
-        func = lambda **x: runner.model(mode='test', **x)
-        if self.dist_mode:
-            results = dist_forward_collect(
-                func, self.data_loader, runner.rank,
-                len(self.dataset))  # dict{key: np.ndarray}
-        else:
-            results = nondist_forward_collect(func, self.data_loader,
-                                              len(self.dataset))
-        if runner.rank == 0:
-            for name, val in results.items():
-                self._evaluate(runner, torch.from_numpy(val), name)
-        runner.model.train()
-
-    def _evaluate(self, runner, results, keyword):
-        eval_res = self.dataset.evaluate(
-            results,
-            keyword=keyword,
-            logger=runner.logger,
-            **self.eval_kwargs['eval_param'])
-        for name, val in eval_res.items():
-            runner.log_buffer.output[name] = val
-        runner.log_buffer.ready = True
--- a/mmselfsup/models/init.py
+++ b/mmselfsup/models/init.py
@ -1,15 +0,0 @@
-from .backbones import *  # noqa: F401,F403
-from .builder import (build_backbone, build_model, build_head, build_loss)
-from .byol import BYOL
-from .heads import *
-from .classification import Classification
-from .deepcluster import DeepCluster
-from .odc import ODC
-from .necks import *
-from .npid import NPID
-from .memories import *
-from .moco import MOCO
-from .registry import (BACKBONES, MODELS, NECKS, MEMORIES, HEADS, LOSSES)
-from .rotation_pred import RotationPred
-from .relative_loc import RelativeLoc
-from .simclr import SimCLR
--- a/mmselfsup/models/backbones/init.py
+++ b/mmselfsup/models/backbones/init.py
@ -1 +0,0 @@
-from .resnet import ResNet, make_res_layer
--- a/Show More
+++ b/Show More