[Enhance] Formatting non distributed training and inference and Supporting CPU training. (#42)
* [Docs] update batch size * Fix bug in non-distributed multi-gpu training/testing * support cpu training * update cpu training and testingpull/58/head
parent
501a6db963
commit
a8b528de3c
|
@ -1,6 +1,7 @@
|
|||
## Test a model
|
||||
|
||||
- single GPU
|
||||
- CPU
|
||||
- single node multiple GPU
|
||||
- multiple node
|
||||
|
||||
|
@ -10,6 +11,10 @@ You can use the following commands to infer a dataset.
|
|||
# single-gpu
|
||||
python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [optional arguments]
|
||||
|
||||
# CPU: disable GPUs and run single-gpu testing script
|
||||
export CUDA_VISIBLE_DEVICES=-1
|
||||
python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [optional arguments]
|
||||
|
||||
# multi-gpu
|
||||
./tools/dist_test.sh ${CONFIG_FILE} ${CHECKPOINT_FILE} ${GPU_NUM} [optional arguments]
|
||||
|
||||
|
@ -46,6 +51,20 @@ python tools/train.py ${CONFIG_FILE} [optional arguments]
|
|||
|
||||
If you want to specify the working directory in the command, you can add an argument `--work_dir ${YOUR_WORK_DIR}`.
|
||||
|
||||
### Train on CPU
|
||||
|
||||
The process of training on the CPU is consistent with single GPU training. We just need to disable GPUs before the training process.
|
||||
|
||||
```shell
|
||||
export CUDA_VISIBLE_DEVICES=-1
|
||||
python tools/train.py ${CONFIG_FILE} [optional arguments]
|
||||
```
|
||||
|
||||
**Note**:
|
||||
|
||||
We do not recommend users to use CPU for training because it is too slow. We support this feature to allow users to debug on machines without GPU for convenience.
|
||||
|
||||
|
||||
### Train with multiple GPUs
|
||||
|
||||
```shell
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
## Test a model
|
||||
|
||||
- single GPU
|
||||
- CPU
|
||||
- single node multiple GPU
|
||||
- multiple node
|
||||
|
||||
|
@ -10,6 +11,10 @@ You can use the following commands to infer a dataset.
|
|||
# single-gpu
|
||||
python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [optional arguments]
|
||||
|
||||
# CPU: disable GPUs and run single-gpu testing script
|
||||
export CUDA_VISIBLE_DEVICES=-1
|
||||
python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [optional arguments]
|
||||
|
||||
# multi-gpu
|
||||
./tools/dist_test.sh ${CONFIG_FILE} ${CHECKPOINT_FILE} ${GPU_NUM} [optional arguments]
|
||||
|
||||
|
@ -46,6 +51,20 @@ python tools/train.py ${CONFIG_FILE} [optional arguments]
|
|||
|
||||
If you want to specify the working directory in the command, you can add an argument `--work_dir ${YOUR_WORK_DIR}`.
|
||||
|
||||
### Train on CPU
|
||||
|
||||
The process of training on the CPU is consistent with single GPU training. We just need to disable GPUs before the training process.
|
||||
|
||||
```shell
|
||||
export CUDA_VISIBLE_DEVICES=-1
|
||||
python tools/train.py ${CONFIG_FILE} [optional arguments]
|
||||
```
|
||||
|
||||
**Note**:
|
||||
|
||||
We do not recommend users to use CPU for training because it is too slow. We support this feature to allow users to debug on machines without GPU for convenience.
|
||||
|
||||
|
||||
### Train with multiple GPUs
|
||||
|
||||
```shell
|
||||
|
|
|
@ -32,7 +32,7 @@ assert (digit_version(mmcv_minimum_version) <= mmcv_version
|
|||
f'Please install mmcv>={mmcv_minimum_version}, <={mmcv_maximum_version}.'
|
||||
|
||||
mmdet_minimum_version = '2.16.0'
|
||||
mmdet_maximum_version = '2.21.0'
|
||||
mmdet_maximum_version = '2.23.0'
|
||||
mmdet_version = digit_version(mmdet.__version__)
|
||||
|
||||
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import warnings
|
||||
from typing import Dict, Union
|
||||
|
||||
import torch
|
||||
|
@ -22,7 +23,7 @@ def train_model(model: Union[MMDataParallel, MMDistributedDataParallel],
|
|||
distributed: bool = False,
|
||||
validate: bool = False,
|
||||
timestamp: str = None,
|
||||
device: str = 'cuda',
|
||||
device: str = None,
|
||||
meta: Dict = None) -> None:
|
||||
logger = get_root_logger(log_level=cfg.log_level)
|
||||
|
||||
|
@ -54,13 +55,14 @@ def train_model(model: Union[MMDataParallel, MMDistributedDataParallel],
|
|||
broadcast_buffers=False,
|
||||
find_unused_parameters=find_unused_parameters)
|
||||
else:
|
||||
if device == 'cuda':
|
||||
model = MMDataParallel(
|
||||
model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids)
|
||||
elif device == 'cpu':
|
||||
if device == 'cpu':
|
||||
warnings.warn(
|
||||
'The argument `device` is deprecated. To use cpu to train, '
|
||||
'please refers to https://mmclassification.readthedocs.io/en'
|
||||
'/latest/getting_started.html#train-a-model')
|
||||
model = model.cpu()
|
||||
else:
|
||||
raise ValueError(F'unsupported device name {device}.')
|
||||
model = MMDataParallel(model, device_ids=cfg.gpu_ids)
|
||||
|
||||
# build runner
|
||||
optimizer = build_optimizer(model, cfg.optimizer)
|
||||
|
|
|
@ -55,8 +55,8 @@ def train_detector(model: nn.Module,
|
|||
broadcast_buffers=False,
|
||||
find_unused_parameters=find_unused_parameters)
|
||||
else:
|
||||
model = MMDataParallel(
|
||||
model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids)
|
||||
# Please use MMCV >= 1.4.4 for CPU training!
|
||||
model = MMDataParallel(model, device_ids=cfg.gpu_ids)
|
||||
|
||||
# build runner
|
||||
optimizer = build_optimizer(model, cfg.optimizer)
|
||||
|
|
|
@ -3,6 +3,7 @@ import argparse
|
|||
import os
|
||||
import os.path as osp
|
||||
import time
|
||||
import warnings
|
||||
|
||||
import mmcv
|
||||
import torch
|
||||
|
@ -64,10 +65,19 @@ def parse_args():
|
|||
help='whether to set deterministic options for CUDNN backend.')
|
||||
parser.add_argument('--local_rank', type=int, default=0)
|
||||
parser.add_argument(
|
||||
'--device',
|
||||
choices=['cpu', 'cuda'],
|
||||
default='cuda',
|
||||
help='device used for testing')
|
||||
'--device', default=None, help='device used for testing. (Deprecated)')
|
||||
parser.add_argument(
|
||||
'--gpu-ids',
|
||||
type=int,
|
||||
nargs='+',
|
||||
help='(Deprecated, please use --gpu-id) ids of gpus to use '
|
||||
'(only applicable to non-distributed testing)')
|
||||
parser.add_argument(
|
||||
'--gpu-id',
|
||||
type=int,
|
||||
default=0,
|
||||
help='id of gpu to use '
|
||||
'(only applicable to non-distributed testing)')
|
||||
parser.add_argument(
|
||||
'--show_task_results',
|
||||
action='store_true',
|
||||
|
@ -75,6 +85,15 @@ def parse_args():
|
|||
args = parser.parse_args()
|
||||
if 'LOCAL_RANK' not in os.environ:
|
||||
os.environ['LOCAL_RANK'] = str(args.local_rank)
|
||||
|
||||
if args.device:
|
||||
warnings.warn(
|
||||
'--device is deprecated. To use cpu to test, please '
|
||||
'refers to https://mmclassification.readthedocs.io/en/latest/'
|
||||
'getting_started.html#inference-with-pretrained-models')
|
||||
|
||||
assert args.metrics or args.out, \
|
||||
'Please specify at least one of output path and evaluation metrics.'
|
||||
return args
|
||||
|
||||
|
||||
|
@ -96,7 +115,14 @@ def main():
|
|||
# use config filename as default work_dir if cfg.work_dir is None
|
||||
cfg.work_dir = osp.join('./work_dirs',
|
||||
osp.splitext(osp.basename(args.config))[0])
|
||||
|
||||
if args.gpu_ids is not None:
|
||||
cfg.gpu_ids = args.gpu_ids[0:1]
|
||||
warnings.warn('`--gpu-ids` is deprecated, please use `--gpu-id`. '
|
||||
'Because we only support single GPU mode in '
|
||||
'non-distributed testing. Use the first GPU '
|
||||
'in `gpu_ids` now.')
|
||||
else:
|
||||
cfg.gpu_ids = [args.gpu_id]
|
||||
# init distributed env first, since logger depends on the dist info.
|
||||
if args.launcher == 'none':
|
||||
distributed = False
|
||||
|
|
|
@ -4,6 +4,7 @@ import copy
|
|||
import os
|
||||
import os.path as osp
|
||||
import time
|
||||
import warnings
|
||||
|
||||
import cv2
|
||||
import mmcv
|
||||
|
@ -45,7 +46,13 @@ def parse_args():
|
|||
'--gpu-ids',
|
||||
type=int,
|
||||
nargs='+',
|
||||
help='ids of gpus to use '
|
||||
help='(Deprecated, please use --gpu-id) ids of gpus to use '
|
||||
'(only applicable to non-distributed training)')
|
||||
group_gpus.add_argument(
|
||||
'--gpu-id',
|
||||
type=int,
|
||||
default=0,
|
||||
help='id of gpu to use '
|
||||
'(only applicable to non-distributed training)')
|
||||
parser.add_argument('--seed', type=int, default=None, help='random seed')
|
||||
parser.add_argument(
|
||||
|
@ -87,10 +94,19 @@ def main():
|
|||
osp.splitext(osp.basename(args.config))[0])
|
||||
if args.resume_from is not None:
|
||||
cfg.resume_from = args.resume_from
|
||||
if args.gpus is not None:
|
||||
cfg.gpu_ids = range(1)
|
||||
warnings.warn('`--gpus` is deprecated because we only support '
|
||||
'single GPU mode in non-distributed training. '
|
||||
'Use `gpus=1` now.')
|
||||
if args.gpu_ids is not None:
|
||||
cfg.gpu_ids = args.gpu_ids
|
||||
else:
|
||||
cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus)
|
||||
cfg.gpu_ids = args.gpu_ids[0:1]
|
||||
warnings.warn('`--gpu-ids` is deprecated, please use `--gpu-id`. '
|
||||
'Because we only support single GPU mode in '
|
||||
'non-distributed training. Use the first GPU '
|
||||
'in `gpu_ids` now.')
|
||||
if args.gpus is None and args.gpu_ids is None:
|
||||
cfg.gpu_ids = [args.gpu_id]
|
||||
|
||||
# init distributed env first, since logger depends on the dist info.
|
||||
if args.launcher == 'none':
|
||||
|
|
|
@ -27,6 +27,18 @@ def parse_args():
|
|||
nargs='+',
|
||||
help='evaluation metrics, which depends on the dataset, e.g., "bbox",'
|
||||
' "segm", "proposal" for COCO, and "mAP", "recall" for PASCAL VOC')
|
||||
parser.add_argument(
|
||||
'--gpu-ids',
|
||||
type=int,
|
||||
nargs='+',
|
||||
help='(Deprecated, please use --gpu-id) ids of gpus to use '
|
||||
'(only applicable to non-distributed training)')
|
||||
parser.add_argument(
|
||||
'--gpu-id',
|
||||
type=int,
|
||||
default=0,
|
||||
help='id of gpu to use '
|
||||
'(only applicable to non-distributed testing)')
|
||||
parser.add_argument('--show', action='store_true', help='show results')
|
||||
parser.add_argument(
|
||||
'--show-dir', help='directory where painted images will be saved')
|
||||
|
@ -116,7 +128,14 @@ def main():
|
|||
# currently only support single images testing
|
||||
samples_per_gpu = cfg.data.test.pop('samples_per_gpu', 1)
|
||||
assert samples_per_gpu == 1, 'currently only support single images testing'
|
||||
|
||||
if args.gpu_ids is not None:
|
||||
cfg.gpu_ids = args.gpu_ids[0:1]
|
||||
warnings.warn('`--gpu-ids` is deprecated, please use `--gpu-id`. '
|
||||
'Because we only support single GPU mode in '
|
||||
'non-distributed testing. Use the first GPU '
|
||||
'in `gpu_ids` now.')
|
||||
else:
|
||||
cfg.gpu_ids = [args.gpu_id]
|
||||
# init distributed env first, since logger depends on the dist info.
|
||||
if args.launcher == 'none':
|
||||
distributed = False
|
||||
|
@ -176,7 +195,8 @@ def main():
|
|||
shuffle=False)
|
||||
|
||||
if not distributed:
|
||||
model = MMDataParallel(model, device_ids=[0])
|
||||
# Please use MMCV >= 1.4.4 for CPU testing!
|
||||
model = MMDataParallel(model, device_ids=cfg.gpu_ids)
|
||||
show_kwargs = dict(show_score_thr=args.show_score_thr)
|
||||
if cfg.data.get('model_init', None) is not None:
|
||||
from mmfewshot.detection.apis import (single_gpu_model_init,
|
||||
|
|
|
@ -48,8 +48,14 @@ def parse_args():
|
|||
'--gpu-ids',
|
||||
type=int,
|
||||
nargs='+',
|
||||
help='ids of gpus to use '
|
||||
help='(Deprecated, please use --gpu-id) ids of gpus to use '
|
||||
'(only applicable to non-distributed training)')
|
||||
parser.add_argument(
|
||||
'--gpu-id',
|
||||
type=int,
|
||||
default=0,
|
||||
help='id of gpu to use '
|
||||
'(only applicable to non-distributed testing)')
|
||||
parser.add_argument('--seed', type=int, default=None, help='random seed')
|
||||
parser.add_argument(
|
||||
'--deterministic',
|
||||
|
@ -119,15 +125,24 @@ def main():
|
|||
osp.splitext(osp.basename(args.config))[0])
|
||||
if args.resume_from is not None:
|
||||
cfg.resume_from = args.resume_from
|
||||
if args.gpus is not None:
|
||||
cfg.gpu_ids = range(1)
|
||||
warnings.warn('`--gpus` is deprecated because we only support '
|
||||
'single GPU mode in non-distributed training. '
|
||||
'Use `gpus=1` now.')
|
||||
if args.gpu_ids is not None:
|
||||
cfg.gpu_ids = args.gpu_ids
|
||||
else:
|
||||
cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus)
|
||||
cfg.gpu_ids = args.gpu_ids[0:1]
|
||||
warnings.warn('`--gpu-ids` is deprecated, please use `--gpu-id`. '
|
||||
'Because we only support single GPU mode in '
|
||||
'non-distributed training. Use the first GPU '
|
||||
'in `gpu_ids` now.')
|
||||
if args.gpus is None and args.gpu_ids is None:
|
||||
cfg.gpu_ids = [args.gpu_id]
|
||||
|
||||
# init distributed env first, since logger depends on the dist info.
|
||||
if args.launcher == 'none':
|
||||
distributed = False
|
||||
rank, world_size = get_dist_info()
|
||||
rank = 0
|
||||
else:
|
||||
distributed = True
|
||||
init_dist(args.launcher, **cfg.dist_params)
|
||||
|
|
Loading…
Reference in New Issue