import os from yacs.config import CfgNode as CN # Global config object _C = CN() # Example usage: # from core.config import cfg regnet_cfg = _C # ---------------------------------------------------------------------------- # # Model options # ---------------------------------------------------------------------------- # _C.MODEL = CN() # Model type _C.MODEL.TYPE = "" # Number of weight layers _C.MODEL.DEPTH = 0 # Number of classes _C.MODEL.NUM_CLASSES = 10 # Loss function (see pycls/models/loss.py for options) _C.MODEL.LOSS_FUN = "cross_entropy" # ---------------------------------------------------------------------------- # # ResNet options # ---------------------------------------------------------------------------- # _C.RESNET = CN() # Transformation function (see pycls/models/resnet.py for options) _C.RESNET.TRANS_FUN = "basic_transform" # Number of groups to use (1 -> ResNet; > 1 -> ResNeXt) _C.RESNET.NUM_GROUPS = 1 # Width of each group (64 -> ResNet; 4 -> ResNeXt) _C.RESNET.WIDTH_PER_GROUP = 64 # Apply stride to 1x1 conv (True -> MSRA; False -> fb.torch) _C.RESNET.STRIDE_1X1 = True # ---------------------------------------------------------------------------- # # AnyNet options # ---------------------------------------------------------------------------- # _C.ANYNET = CN() # Stem type _C.ANYNET.STEM_TYPE = "plain_block" # Stem width _C.ANYNET.STEM_W = 32 # Block type _C.ANYNET.BLOCK_TYPE = "plain_block" # Depth for each stage (number of blocks in the stage) _C.ANYNET.DEPTHS = [] # Width for each stage (width of each block in the stage) _C.ANYNET.WIDTHS = [] # Strides for each stage (applies to the first block of each stage) _C.ANYNET.STRIDES = [] # Bottleneck multipliers for each stage (applies to bottleneck block) _C.ANYNET.BOT_MULS = [] # Group widths for each stage (applies to bottleneck block) _C.ANYNET.GROUP_WS = [] # Whether SE is enabled for res_bottleneck_block _C.ANYNET.SE_ON = False # SE ratio _C.ANYNET.SE_R = 0.25 # ---------------------------------------------------------------------------- # # RegNet options # ---------------------------------------------------------------------------- # _C.REGNET = CN() # Stem type _C.REGNET.STEM_TYPE = "simple_stem_in" # Stem width _C.REGNET.STEM_W = 32 # Block type _C.REGNET.BLOCK_TYPE = "res_bottleneck_block" # Stride of each stage _C.REGNET.STRIDE = 2 # Squeeze-and-Excitation (RegNetY) _C.REGNET.SE_ON = False _C.REGNET.SE_R = 0.25 # Depth _C.REGNET.DEPTH = 10 # Initial width _C.REGNET.W0 = 32 # Slope _C.REGNET.WA = 5.0 # Quantization _C.REGNET.WM = 2.5 # Group width _C.REGNET.GROUP_W = 16 # Bottleneck multiplier (bm = 1 / b from the paper) _C.REGNET.BOT_MUL = 1.0 # ---------------------------------------------------------------------------- # # EfficientNet options # ---------------------------------------------------------------------------- # _C.EN = CN() # Stem width _C.EN.STEM_W = 32 # Depth for each stage (number of blocks in the stage) _C.EN.DEPTHS = [] # Width for each stage (width of each block in the stage) _C.EN.WIDTHS = [] # Expansion ratios for MBConv blocks in each stage _C.EN.EXP_RATIOS = [] # Squeeze-and-Excitation (SE) ratio _C.EN.SE_R = 0.25 # Strides for each stage (applies to the first block of each stage) _C.EN.STRIDES = [] # Kernel sizes for each stage _C.EN.KERNELS = [] # Head width _C.EN.HEAD_W = 1280 # Drop connect ratio _C.EN.DC_RATIO = 0.0 # Dropout ratio _C.EN.DROPOUT_RATIO = 0.0 # ---------------------------------------------------------------------------- # # Batch norm options # ---------------------------------------------------------------------------- # _C.BN = CN() # BN epsilon _C.BN.EPS = 1e-5 # BN momentum (BN momentum in PyTorch = 1 - BN momentum in Caffe2) _C.BN.MOM = 0.1 # Precise BN stats _C.BN.USE_PRECISE_STATS = False _C.BN.NUM_SAMPLES_PRECISE = 1024 # Initialize the gamma of the final BN of each block to zero _C.BN.ZERO_INIT_FINAL_GAMMA = False # Use a different weight decay for BN layers _C.BN.USE_CUSTOM_WEIGHT_DECAY = False _C.BN.CUSTOM_WEIGHT_DECAY = 0.0 # ---------------------------------------------------------------------------- # # Optimizer options # ---------------------------------------------------------------------------- # _C.OPTIM = CN() # Base learning rate _C.OPTIM.BASE_LR = 0.1 # Learning rate policy select from {'cos', 'exp', 'steps'} _C.OPTIM.LR_POLICY = "cos" # Exponential decay factor _C.OPTIM.GAMMA = 0.1 # Steps for 'steps' policy (in epochs) _C.OPTIM.STEPS = [] # Learning rate multiplier for 'steps' policy _C.OPTIM.LR_MULT = 0.1 # Maximal number of epochs _C.OPTIM.MAX_EPOCH = 200 # Momentum _C.OPTIM.MOMENTUM = 0.9 # Momentum dampening _C.OPTIM.DAMPENING = 0.0 # Nesterov momentum _C.OPTIM.NESTEROV = True # L2 regularization _C.OPTIM.WEIGHT_DECAY = 5e-4 # Start the warm up from OPTIM.BASE_LR * OPTIM.WARMUP_FACTOR _C.OPTIM.WARMUP_FACTOR = 0.1 # Gradually warm up the OPTIM.BASE_LR over this number of epochs _C.OPTIM.WARMUP_EPOCHS = 0 # ---------------------------------------------------------------------------- # # Training options # ---------------------------------------------------------------------------- # _C.TRAIN = CN() # Dataset and split _C.TRAIN.DATASET = "" _C.TRAIN.SPLIT = "train" # Total mini-batch size _C.TRAIN.BATCH_SIZE = 128 # Image size _C.TRAIN.IM_SIZE = 224 # Evaluate model on test data every eval period epochs _C.TRAIN.EVAL_PERIOD = 1 # Save model checkpoint every checkpoint period epochs _C.TRAIN.CHECKPOINT_PERIOD = 1 # Resume training from the latest checkpoint in the output directory _C.TRAIN.AUTO_RESUME = True # Weights to start training from _C.TRAIN.WEIGHTS = "" # ---------------------------------------------------------------------------- # # Testing options # ---------------------------------------------------------------------------- # _C.TEST = CN() # Dataset and split _C.TEST.DATASET = "" _C.TEST.SPLIT = "val" # Total mini-batch size _C.TEST.BATCH_SIZE = 200 # Image size _C.TEST.IM_SIZE = 256 # Weights to use for testing _C.TEST.WEIGHTS = "" # ---------------------------------------------------------------------------- # # Common train/test data loader options # ---------------------------------------------------------------------------- # _C.DATA_LOADER = CN() # Number of data loader workers per training process _C.DATA_LOADER.NUM_WORKERS = 4 # Load data to pinned host memory _C.DATA_LOADER.PIN_MEMORY = True # ---------------------------------------------------------------------------- # # Memory options # ---------------------------------------------------------------------------- # _C.MEM = CN() # Perform ReLU inplace _C.MEM.RELU_INPLACE = True # ---------------------------------------------------------------------------- # # CUDNN options # ---------------------------------------------------------------------------- # _C.CUDNN = CN() # Perform benchmarking to select the fastest CUDNN algorithms to use # Note that this may increase the memory usage and will likely not result # in overall speedups when variable size inputs are used (e.g. COCO training) _C.CUDNN.BENCHMARK = True # ---------------------------------------------------------------------------- # # Precise timing options # ---------------------------------------------------------------------------- # _C.PREC_TIME = CN() # Perform precise timing at the start of training _C.PREC_TIME.ENABLED = False # Total mini-batch size _C.PREC_TIME.BATCH_SIZE = 128 # Number of iterations to warm up the caches _C.PREC_TIME.WARMUP_ITER = 3 # Number of iterations to compute avg time _C.PREC_TIME.NUM_ITER = 30 # ---------------------------------------------------------------------------- # # Misc options # ---------------------------------------------------------------------------- # # Number of GPUs to use (applies to both training and testing) _C.NUM_GPUS = 1 # Output directory _C.OUT_DIR = "/tmp" # Config destination (in OUT_DIR) _C.CFG_DEST = "config.yaml" # Note that non-determinism may still be present due to non-deterministic # operator implementations in GPU operator libraries _C.RNG_SEED = 1 # Log destination ('stdout' or 'file') _C.LOG_DEST = "stdout" # Log period in iters _C.LOG_PERIOD = 10 # Distributed backend _C.DIST_BACKEND = "nccl" # Hostname and port for initializing multi-process groups _C.HOST = "localhost" _C.PORT = 10001 # Models weights referred to by URL are downloaded to this local cache _C.DOWNLOAD_CACHE = "/tmp/pycls-download-cache" def assert_and_infer_cfg(cache_urls=True): """Checks config values invariants.""" assert ( not _C.OPTIM.STEPS or _C.OPTIM.STEPS[0] == 0 ), "The first lr step must start at 0" assert _C.TRAIN.SPLIT in [ "train", "val", "test", ], "Train split '{}' not supported".format(_C.TRAIN.SPLIT) assert ( _C.TRAIN.BATCH_SIZE % _C.NUM_GPUS == 0 ), "Train mini-batch size should be a multiple of NUM_GPUS." assert _C.TEST.SPLIT in [ "train", "val", "test", ], "Test split '{}' not supported".format(_C.TEST.SPLIT) assert ( _C.TEST.BATCH_SIZE % _C.NUM_GPUS == 0 ), "Test mini-batch size should be a multiple of NUM_GPUS." assert ( not _C.BN.USE_PRECISE_STATS or _C.NUM_GPUS == 1 ), "Precise BN stats computation not verified for > 1 GPU" assert _C.LOG_DEST in [ "stdout", "file", ], "Log destination '{}' not supported".format(_C.LOG_DEST) assert ( not _C.PREC_TIME.ENABLED or _C.NUM_GPUS == 1 ), "Precise iter time computation not verified for > 1 GPU" def dump_cfg(): """Dumps the config to the output directory.""" cfg_file = os.path.join(_C.OUT_DIR, _C.CFG_DEST) with open(cfg_file, "w") as f: _C.dump(stream=f) def load_cfg(out_dir, cfg_dest="config.yaml"): """Loads config from specified output directory.""" cfg_file = os.path.join(out_dir, cfg_dest) _C.merge_from_file(cfg_file)