[Feature] Support K-fold cross-validation (#563)

* Support to use `indices` to specify which samples to evaluate.

* Add KFoldDataset wrapper

* Rename 'K' to 'num_splits' accroding to sklearn

* Add `kfold-cross-valid.py`

* Add unit tests

* Add help doc and docstring
This commit is contained in:
Ma Zerun 2022-01-19 18:32:55 +08:00 committed by GitHub
parent 321ad09e6d
commit b39885d953
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 641 additions and 5 deletions

View File

@ -4,7 +4,7 @@ from .builder import (DATASETS, PIPELINES, SAMPLERS, build_dataloader,
build_dataset, build_sampler)
from .cifar import CIFAR10, CIFAR100
from .dataset_wrappers import (ClassBalancedDataset, ConcatDataset,
RepeatDataset)
KFoldDataset, RepeatDataset)
from .imagenet import ImageNet
from .imagenet21k import ImageNet21k
from .mnist import MNIST, FashionMNIST
@ -17,5 +17,5 @@ __all__ = [
'VOC', 'MultiLabelDataset', 'build_dataloader', 'build_dataset',
'DistributedSampler', 'ConcatDataset', 'RepeatDataset',
'ClassBalancedDataset', 'DATASETS', 'PIPELINES', 'ImageNet21k', 'SAMPLERS',
'build_sampler', 'RepeatAugSampler'
'build_sampler', 'RepeatAugSampler', 'KFoldDataset'
]

View File

@ -118,6 +118,7 @@ class BaseDataset(Dataset, metaclass=ABCMeta):
results,
metric='accuracy',
metric_options=None,
indices=None,
logger=None):
"""Evaluate the dataset.
@ -128,6 +129,8 @@ class BaseDataset(Dataset, metaclass=ABCMeta):
metric_options (dict, optional): Options for calculating metrics.
Allowed keys are 'topk', 'thrs' and 'average_mode'.
Defaults to None.
indices (list, optional): The indices of samples corresponding to
the results. Defaults to None.
logger (logging.Logger | str, optional): Logger used for printing
related information during evaluation. Defaults to None.
Returns:
@ -145,6 +148,8 @@ class BaseDataset(Dataset, metaclass=ABCMeta):
eval_results = {}
results = np.vstack(results)
gt_labels = self.get_gt_labels()
if indices is not None:
gt_labels = gt_labels[indices]
num_imgs = len(results)
assert len(gt_labels) == num_imgs, 'dataset testing results should '\
'be of the same length as gt_labels.'

View File

@ -1,4 +1,5 @@
# Copyright (c) OpenMMLab. All rights reserved.
import copy
import platform
import random
from functools import partial
@ -25,7 +26,7 @@ SAMPLERS = Registry('sampler')
def build_dataset(cfg, default_args=None):
from .dataset_wrappers import (ConcatDataset, RepeatDataset,
ClassBalancedDataset)
ClassBalancedDataset, KFoldDataset)
if isinstance(cfg, (list, tuple)):
dataset = ConcatDataset([build_dataset(c, default_args) for c in cfg])
elif cfg['type'] == 'RepeatDataset':
@ -34,6 +35,13 @@ def build_dataset(cfg, default_args=None):
elif cfg['type'] == 'ClassBalancedDataset':
dataset = ClassBalancedDataset(
build_dataset(cfg['dataset'], default_args), cfg['oversample_thr'])
elif cfg['type'] == 'KFoldDataset':
cp_cfg = copy.deepcopy(cfg)
if cp_cfg.get('test_mode', None) is None:
cp_cfg['test_mode'] = (default_args or {}).pop('test_mode', False)
cp_cfg['dataset'] = build_dataset(cp_cfg['dataset'], default_args)
cp_cfg.pop('type')
dataset = KFoldDataset(**cp_cfg)
else:
dataset = build_from_cfg(cfg, DATASETS, default_args)

View File

@ -170,3 +170,56 @@ class ClassBalancedDataset(object):
def __len__(self):
return len(self.repeat_indices)
@DATASETS.register_module()
class KFoldDataset:
"""A wrapper of dataset for K-Fold cross-validation.
K-Fold cross-validation divides all the samples in groups of samples,
called folds, of almost equal sizes. And we use k-1 of folds to do training
and use the fold left to do validation.
Args:
dataset (:obj:`CustomDataset`): The dataset to be divided.
fold (int): The fold used to do validation. Defaults to 0.
num_splits (int): The number of all folds. Defaults to 5.
test_mode (bool): Use the training dataset or validation dataset.
Defaults to False.
seed (int, optional): The seed to shuffle the dataset before splitting.
If None, not shuffle the dataset. Defaults to None.
"""
def __init__(self,
dataset,
fold=0,
num_splits=5,
test_mode=False,
seed=None):
self.dataset = dataset
self.CLASSES = dataset.CLASSES
self.test_mode = test_mode
self.num_splits = num_splits
length = len(dataset)
indices = list(range(length))
if isinstance(seed, int):
rng = np.random.default_rng(seed)
rng.shuffle(indices)
test_start = length * fold // num_splits
test_end = length * (fold + 1) // num_splits
if test_mode:
self.indices = indices[test_start:test_end]
else:
self.indices = indices[:test_start] + indices[test_end:]
def __getitem__(self, idx):
return self.dataset[self.indices[idx]]
def __len__(self):
return len(self.indices)
def evaluate(self, *args, **kwargs):
kwargs['indices'] = self.indices
return self.dataset.evaluate(*args, **kwargs)

View File

@ -28,6 +28,7 @@ class MultiLabelDataset(BaseDataset):
results,
metric='mAP',
metric_options=None,
indices=None,
logger=None,
**deprecated_kwargs):
"""Evaluate the dataset.
@ -62,6 +63,8 @@ class MultiLabelDataset(BaseDataset):
eval_results = {}
results = np.vstack(results)
gt_labels = self.get_gt_labels()
if indices is not None:
gt_labels = gt_labels[indices]
num_imgs = len(results)
assert len(gt_labels) == num_imgs, 'dataset testing results should '\
'be of the same length as gt_labels.'

View File

@ -1,9 +1,14 @@
import os.path as osp
from copy import deepcopy
from unittest.mock import patch
import torch
from mmcv.utils import digit_version
from mmcls.datasets import build_dataloader
from mmcls.datasets import ImageNet, build_dataloader, build_dataset
from mmcls.datasets.dataset_wrappers import (ClassBalancedDataset,
ConcatDataset, KFoldDataset,
RepeatDataset)
class TestDataloaderBuilder():
@ -119,3 +124,148 @@ class TestDataloaderBuilder():
expect = torch.tensor(
[0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6][1::2])
assert all(torch.cat(list(iter(dataloader))) == expect)
class TestDatasetBuilder():
@classmethod
def setup_class(cls):
data_prefix = osp.join(osp.dirname(__file__), '../data/dataset')
cls.dataset_cfg = dict(
type='ImageNet',
data_prefix=data_prefix,
ann_file=osp.join(data_prefix, 'ann.txt'),
pipeline=[],
test_mode=False,
)
def test_normal_dataset(self):
# Test build
dataset = build_dataset(self.dataset_cfg)
assert isinstance(dataset, ImageNet)
assert dataset.test_mode == self.dataset_cfg['test_mode']
# Test default_args
dataset = build_dataset(self.dataset_cfg, {'test_mode': True})
assert dataset.test_mode == self.dataset_cfg['test_mode']
cp_cfg = deepcopy(self.dataset_cfg)
cp_cfg.pop('test_mode')
dataset = build_dataset(cp_cfg, {'test_mode': True})
assert dataset.test_mode
def test_concat_dataset(self):
# Test build
dataset = build_dataset([self.dataset_cfg, self.dataset_cfg])
assert isinstance(dataset, ConcatDataset)
assert dataset.datasets[0].test_mode == self.dataset_cfg['test_mode']
# Test default_args
dataset = build_dataset([self.dataset_cfg, self.dataset_cfg],
{'test_mode': True})
assert dataset.datasets[0].test_mode == self.dataset_cfg['test_mode']
cp_cfg = deepcopy(self.dataset_cfg)
cp_cfg.pop('test_mode')
dataset = build_dataset([cp_cfg, cp_cfg], {'test_mode': True})
assert dataset.datasets[0].test_mode
def test_repeat_dataset(self):
# Test build
dataset = build_dataset(
dict(type='RepeatDataset', dataset=self.dataset_cfg, times=3))
assert isinstance(dataset, RepeatDataset)
assert dataset.dataset.test_mode == self.dataset_cfg['test_mode']
# Test default_args
dataset = build_dataset(
dict(type='RepeatDataset', dataset=self.dataset_cfg, times=3),
{'test_mode': True})
assert dataset.dataset.test_mode == self.dataset_cfg['test_mode']
cp_cfg = deepcopy(self.dataset_cfg)
cp_cfg.pop('test_mode')
dataset = build_dataset(
dict(type='RepeatDataset', dataset=cp_cfg, times=3),
{'test_mode': True})
assert dataset.dataset.test_mode
def test_class_balance_dataset(self):
# Test build
dataset = build_dataset(
dict(
type='ClassBalancedDataset',
dataset=self.dataset_cfg,
oversample_thr=1.,
))
assert isinstance(dataset, ClassBalancedDataset)
assert dataset.dataset.test_mode == self.dataset_cfg['test_mode']
# Test default_args
dataset = build_dataset(
dict(
type='ClassBalancedDataset',
dataset=self.dataset_cfg,
oversample_thr=1.,
), {'test_mode': True})
assert dataset.dataset.test_mode == self.dataset_cfg['test_mode']
cp_cfg = deepcopy(self.dataset_cfg)
cp_cfg.pop('test_mode')
dataset = build_dataset(
dict(
type='ClassBalancedDataset',
dataset=cp_cfg,
oversample_thr=1.,
), {'test_mode': True})
assert dataset.dataset.test_mode
def test_kfold_dataset(self):
# Test build
dataset = build_dataset(
dict(
type='KFoldDataset',
dataset=self.dataset_cfg,
fold=0,
num_splits=5,
test_mode=False,
))
assert isinstance(dataset, KFoldDataset)
assert not dataset.test_mode
assert dataset.dataset.test_mode == self.dataset_cfg['test_mode']
# Test default_args
dataset = build_dataset(
dict(
type='KFoldDataset',
dataset=self.dataset_cfg,
fold=0,
num_splits=5,
test_mode=False,
),
default_args={
'test_mode': True,
'classes': [1, 2, 3]
})
assert not dataset.test_mode
assert dataset.dataset.test_mode == self.dataset_cfg['test_mode']
assert dataset.dataset.CLASSES == [1, 2, 3]
cp_cfg = deepcopy(self.dataset_cfg)
cp_cfg.pop('test_mode')
dataset = build_dataset(
dict(
type='KFoldDataset',
dataset=self.dataset_cfg,
fold=0,
num_splits=5,
),
default_args={
'test_mode': True,
'classes': [1, 2, 3]
})
# The test_mode in default_args will be passed to KFoldDataset
assert dataset.test_mode
assert not dataset.dataset.test_mode
# Other default_args will be passed to child dataset.
assert dataset.dataset.CLASSES == [1, 2, 3]

View File

@ -8,7 +8,20 @@ import numpy as np
import pytest
from mmcls.datasets import (BaseDataset, ClassBalancedDataset, ConcatDataset,
RepeatDataset)
KFoldDataset, RepeatDataset)
def mock_evaluate(results,
metric='accuracy',
metric_options=None,
indices=None,
logger=None):
return dict(
results=results,
metric=metric,
metric_options=metric_options,
indices=indices,
logger=logger)
@patch.multiple(BaseDataset, __abstractmethods__=set())
@ -23,6 +36,8 @@ def construct_toy_multi_label_dataset(length):
dataset.data_infos = MagicMock()
dataset.data_infos.__len__.return_value = length
dataset.get_cat_ids = MagicMock(side_effect=lambda idx: cat_ids_list[idx])
dataset.evaluate = MagicMock(side_effect=mock_evaluate)
return dataset, cat_ids_list
@ -35,6 +50,7 @@ def construct_toy_single_label_dataset(length):
dataset.data_infos = MagicMock()
dataset.data_infos.__len__.return_value = length
dataset.get_cat_ids = MagicMock(side_effect=lambda idx: cat_ids_list[idx])
dataset.evaluate = MagicMock(side_effect=mock_evaluate)
return dataset, cat_ids_list
@ -107,3 +123,49 @@ def test_class_balanced_dataset(construct_dataset):
for idx in np.random.randint(0, len(repeat_factor_dataset), 3):
assert repeat_factor_dataset[idx] == bisect.bisect_right(
repeat_factors_cumsum, idx)
@pytest.mark.parametrize('construct_dataset', [
'construct_toy_multi_label_dataset', 'construct_toy_single_label_dataset'
])
def test_kfold_dataset(construct_dataset):
construct_toy_dataset = eval(construct_dataset)
dataset, _ = construct_toy_dataset(10)
# test without random seed
train_datasets = [
KFoldDataset(dataset, fold=i, num_splits=3, test_mode=False)
for i in range(5)
]
test_datasets = [
KFoldDataset(dataset, fold=i, num_splits=3, test_mode=True)
for i in range(5)
]
assert sum([i.indices for i in test_datasets], []) == list(range(10))
for train_set, test_set in zip(train_datasets, test_datasets):
train_samples = [train_set[i] for i in range(len(train_set))]
test_samples = [test_set[i] for i in range(len(test_set))]
assert set(train_samples + test_samples) == set(range(10))
# test with random seed
train_datasets = [
KFoldDataset(dataset, fold=i, num_splits=3, test_mode=False, seed=1)
for i in range(5)
]
test_datasets = [
KFoldDataset(dataset, fold=i, num_splits=3, test_mode=True, seed=1)
for i in range(5)
]
assert sum([i.indices for i in test_datasets], []) != list(range(10))
assert set(sum([i.indices for i in test_datasets], [])) == set(range(10))
for train_set, test_set in zip(train_datasets, test_datasets):
train_samples = [train_set[i] for i in range(len(train_set))]
test_samples = [test_set[i] for i in range(len(test_set))]
assert set(train_samples + test_samples) == set(range(10))
# test evaluate
for test_set in test_datasets:
eval_inputs = test_set.evaluate(None)
assert eval_inputs['indices'] == test_set.indices

355
tools/kfold-cross-valid.py Normal file
View File

@ -0,0 +1,355 @@
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import copy
import os
import os.path as osp
import time
from datetime import datetime
from pathlib import Path
import mmcv
import torch
from mmcv import Config, DictAction
from mmcv.runner import get_dist_info, init_dist
from mmcls import __version__
from mmcls.apis import init_random_seed, set_random_seed, train_model
from mmcls.datasets import build_dataset
from mmcls.models import build_classifier
from mmcls.utils import collect_env, get_root_logger, load_json_log
TEST_METRICS = ('precision', 'recall', 'f1_score', 'support', 'mAP', 'CP',
'CR', 'CF1', 'OP', 'OR', 'OF1', 'accuracy')
prog_description = """K-Fold cross-validation.
To start a 5-fold cross-validation experiment:
python tools/kfold-cross-valid.py $CONFIG --num-splits 5
To resume a 5-fold cross-validation from an interrupted experiment:
python tools/kfold-cross-valid.py $CONFIG --num-splits 5 --resume-from work_dirs/fold2/latest.pth
To summarize a 5-fold cross-validation:
python tools/kfold-cross-valid.py $CONFIG --num-splits 5 --summary
""" # noqa: E501
def parse_args():
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
description=prog_description)
parser.add_argument('config', help='train config file path')
parser.add_argument(
'--num-splits', type=int, help='The number of all folds.')
parser.add_argument(
'--fold',
type=int,
help='The fold used to do validation. '
'If specify, only do an experiment of the specified fold.')
parser.add_argument(
'--summary',
action='store_true',
help='Summarize the k-fold cross-validation results.')
parser.add_argument('--work-dir', help='the dir to save logs and models')
parser.add_argument(
'--resume-from', help='the checkpoint file to resume from')
parser.add_argument(
'--no-validate',
action='store_true',
help='whether not to evaluate the checkpoint during training')
group_gpus = parser.add_mutually_exclusive_group()
group_gpus.add_argument('--device', help='device used for training')
group_gpus.add_argument(
'--gpus',
type=int,
help='number of gpus to use '
'(only applicable to non-distributed training)')
group_gpus.add_argument(
'--gpu-ids',
type=int,
nargs='+',
help='ids of gpus to use '
'(only applicable to non-distributed training)')
parser.add_argument('--seed', type=int, default=None, help='random seed')
parser.add_argument(
'--deterministic',
action='store_true',
help='whether to set deterministic options for CUDNN backend.')
parser.add_argument(
'--cfg-options',
nargs='+',
action=DictAction,
help='override some settings in the used config, the key-value pair '
'in xxx=yyy format will be merged into config file. If the value to '
'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
'Note that the quotation marks are necessary and that no white space '
'is allowed.')
parser.add_argument(
'--launcher',
choices=['none', 'pytorch', 'slurm', 'mpi'],
default='none',
help='job launcher')
parser.add_argument('--local_rank', type=int, default=0)
args = parser.parse_args()
if 'LOCAL_RANK' not in os.environ:
os.environ['LOCAL_RANK'] = str(args.local_rank)
return args
def copy_config(old_cfg):
"""deepcopy a Config object."""
new_cfg = Config()
_cfg_dict = copy.deepcopy(old_cfg._cfg_dict)
_filename = copy.deepcopy(old_cfg._filename)
_text = copy.deepcopy(old_cfg._text)
super(Config, new_cfg).__setattr__('_cfg_dict', _cfg_dict)
super(Config, new_cfg).__setattr__('_filename', _filename)
super(Config, new_cfg).__setattr__('_text', _text)
return new_cfg
def train_single_fold(args, cfg, fold, distributed, seed):
# create the work_dir for the fold
work_dir = osp.join(cfg.work_dir, f'fold{fold}')
cfg.work_dir = work_dir
# create work_dir
mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
# wrap the dataset cfg
train_dataset = dict(
type='KFoldDataset',
fold=fold,
dataset=cfg.data.train,
num_splits=args.num_splits,
seed=seed,
)
val_dataset = dict(
type='KFoldDataset',
fold=fold,
# Use the same dataset with training.
dataset=copy.deepcopy(cfg.data.train),
num_splits=args.num_splits,
seed=seed,
test_mode=True,
)
val_dataset['dataset']['pipeline'] = cfg.data.val.pipeline
cfg.data.train = train_dataset
cfg.data.val = val_dataset
cfg.data.test = val_dataset
# dump config
stem, suffix = osp.basename(args.config).rsplit('.', 1)
cfg.dump(osp.join(cfg.work_dir, f'{stem}_fold{fold}.{suffix}'))
# init the logger before other steps
timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
log_file = osp.join(cfg.work_dir, f'{timestamp}.log')
logger = get_root_logger(log_file=log_file, log_level=cfg.log_level)
# init the meta dict to record some important information such as
# environment info and seed, which will be logged
meta = dict()
# log env info
env_info_dict = collect_env()
env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()])
dash_line = '-' * 60 + '\n'
logger.info('Environment info:\n' + dash_line + env_info + '\n' +
dash_line)
meta['env_info'] = env_info
# log some basic info
logger.info(f'Distributed training: {distributed}')
logger.info(f'Config:\n{cfg.pretty_text}')
logger.info(
f'-------- Cross-validation: [{fold+1}/{args.num_splits}] -------- ')
# set random seeds
# Use different seed in different folds
logger.info(f'Set random seed to {seed + fold}, '
f'deterministic: {args.deterministic}')
set_random_seed(seed + fold, deterministic=args.deterministic)
cfg.seed = seed + fold
meta['seed'] = seed + fold
model = build_classifier(cfg.model)
model.init_weights()
datasets = [build_dataset(cfg.data.train)]
if len(cfg.workflow) == 2:
val_dataset = copy.deepcopy(cfg.data.val)
val_dataset.pipeline = cfg.data.train.pipeline
datasets.append(build_dataset(val_dataset))
meta.update(
dict(
mmcls_version=__version__,
config=cfg.pretty_text,
CLASSES=datasets[0].CLASSES,
kfold=dict(fold=fold, num_splits=args.num_splits)))
# add an attribute for visualization convenience
train_model(
model,
datasets,
cfg,
distributed=distributed,
validate=(not args.no_validate),
timestamp=timestamp,
device='cpu' if args.device == 'cpu' else 'cuda',
meta=meta)
def summary(args, cfg):
summary = dict()
for fold in range(args.num_splits):
work_dir = Path(cfg.work_dir) / f'fold{fold}'
# Find the latest training log
log_files = list(work_dir.glob('*.log.json'))
if len(log_files) == 0:
continue
log_file = sorted(log_files)[-1]
date = datetime.fromtimestamp(log_file.lstat().st_mtime)
summary[fold] = {'date': date.strftime('%Y-%m-%d %H:%M:%S')}
# Find the latest eval log
json_log = load_json_log(log_file)
epochs = sorted(list(json_log.keys()))
eval_log = {}
def is_metric_key(key):
for metric in TEST_METRICS:
if metric in key:
return True
return False
for epoch in epochs[::-1]:
if any(is_metric_key(k) for k in json_log[epoch].keys()):
eval_log = json_log[epoch]
break
summary[fold]['epoch'] = epoch
summary[fold]['metric'] = {
k: v[0] # the value is a list with only one item.
for k, v in eval_log.items() if is_metric_key(k)
}
show_summary(args, summary)
def show_summary(args, summary_data):
try:
from rich.console import Console
from rich.table import Table
except ImportError:
raise ImportError('Please run `pip install rich` to install '
'package `rich` to draw the table.')
console = Console()
table = Table(title=f'{args.num_splits}-fold Cross-validation Summary')
table.add_column('Fold')
metrics = summary_data[0]['metric'].keys()
for metric in metrics:
table.add_column(metric)
table.add_column('Epoch')
table.add_column('Date')
for fold in range(args.num_splits):
row = [f'{fold+1}']
if fold not in summary_data:
table.add_row(*row)
continue
for metric in metrics:
metric_value = summary_data[fold]['metric'].get(metric, '')
def format_value(value):
if isinstance(value, float):
return f'{value:.2f}'
if isinstance(value, (list, tuple)):
return str([format_value(i) for i in value])
else:
return str(value)
row.append(format_value(metric_value))
row.append(str(summary_data[fold]['epoch']))
row.append(summary_data[fold]['date'])
table.add_row(*row)
console.print(table)
def main():
args = parse_args()
cfg = Config.fromfile(args.config)
if args.cfg_options is not None:
cfg.merge_from_dict(args.cfg_options)
# set cudnn_benchmark
if cfg.get('cudnn_benchmark', False):
torch.backends.cudnn.benchmark = True
# work_dir is determined in this priority: CLI > segment in file > filename
if args.work_dir is not None:
# update configs according to CLI args if args.work_dir is not None
cfg.work_dir = args.work_dir
elif cfg.get('work_dir', None) is None:
# use config filename as default work_dir if cfg.work_dir is None
cfg.work_dir = osp.join('./work_dirs',
osp.splitext(osp.basename(args.config))[0])
if args.summary:
summary(args, cfg)
return
# resume from the previous experiment
if args.resume_from is not None:
cfg.resume_from = args.resume_from
resume_kfold = torch.load(cfg.resume_from).get('meta',
{}).get('kfold', None)
if resume_kfold is None:
raise RuntimeError(
'No "meta" key in checkpoints or no "kfold" in the meta dict. '
'Please check if the resume checkpoint from a k-fold '
'cross-valid experiment.')
resume_fold = resume_kfold['fold']
assert args.num_splits == resume_kfold['num_splits']
else:
resume_fold = 0
if args.gpu_ids is not None:
cfg.gpu_ids = args.gpu_ids
else:
cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus)
# init distributed env first, since logger depends on the dist info.
if args.launcher == 'none':
distributed = False
else:
distributed = True
init_dist(args.launcher, **cfg.dist_params)
_, world_size = get_dist_info()
cfg.gpu_ids = range(world_size)
# init a unified random seed
seed = init_random_seed(args.seed)
# create work_dir
mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
if args.fold is not None:
folds = [args.fold]
else:
folds = range(resume_fold, args.num_splits)
for fold in folds:
cfg_ = copy_config(cfg)
if fold != resume_fold:
cfg_.resume_from = None
train_single_fold(args, cfg_, fold, distributed, seed)
if args.fold is None:
summary(args, cfg)
if __name__ == '__main__':
main()