mmocr/tools/train.py

#!/usr/bin/env python
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import copy
import os
import os.path as osp
import time
import warnings

import mmcv
import torch
from mmcv import Config, DictAction
from mmcv.runner import get_dist_info, init_dist, set_random_seed
from mmcv.utils import get_git_hash

from mmocr import __version__
from mmocr.apis import init_random_seed, train_detector
from mmocr.datasets import build_dataset
from mmocr.models import build_detector
from mmocr.utils import collect_env, get_root_logger, is_2dlist


def parse_args():
    parser = argparse.ArgumentParser(description='Train a detector.')
    parser.add_argument('config', help='Train config file path.')
    parser.add_argument('--work-dir', help='The dir to save logs and models.')
    parser.add_argument(
        '--load-from', help='The checkpoint file to load from.')
    parser.add_argument(
        '--resume-from', help='The checkpoint file to resume from.')
    parser.add_argument(
        '--no-validate',
        action='store_true',
        help='Whether not to evaluate the checkpoint during training.')
    group_gpus = parser.add_mutually_exclusive_group()
    group_gpus.add_argument(
        '--gpus',
        type=int,
        help='Number of gpus to use '
        '(only applicable to non-distributed training).')
    group_gpus.add_argument(
        '--gpu-ids',
        type=int,
        nargs='+',
        help='ids of gpus to use '
        '(only applicable to non-distributed training).')
    parser.add_argument('--seed', type=int, default=None, help='Random seed.')
    parser.add_argument(
        '--deterministic',
        action='store_true',
        help='Whether to set deterministic options for CUDNN backend.')
    parser.add_argument(
        '--options',
        nargs='+',
        action=DictAction,
        help='Override some settings in the used config, the key-value pair '
        'in xxx=yyy format will be merged into config file (deprecate), '
        'change to --cfg-options instead.')
    parser.add_argument(
        '--cfg-options',
        nargs='+',
        action=DictAction,
        help='Override some settings in the used config, the key-value pair '
        'in xxx=yyy format will be merged into config file. If the value to '
        'be overwritten is a list, it should be of the form of either '
        'key="[a,b]" or key=a,b .The argument also allows nested list/tuple '
        'values, e.g. key="[(a,b),(c,d)]". Note that the quotation marks '
        'are necessary and that no white space is allowed.')
    parser.add_argument(
        '--launcher',
        choices=['none', 'pytorch', 'slurm', 'mpi'],
        default='none',
        help='Options for job launcher.')
    parser.add_argument('--local_rank', type=int, default=0)

    args = parser.parse_args()
    if 'LOCAL_RANK' not in os.environ:
        os.environ['LOCAL_RANK'] = str(args.local_rank)

    if args.options and args.cfg_options:
        raise ValueError(
            '--options and --cfg-options cannot be both '
            'specified, --options is deprecated in favor of --cfg-options')
    if args.options:
        warnings.warn('--options is deprecated in favor of --cfg-options')
        args.cfg_options = args.options

    return args


def main():
    args = parse_args()

    cfg = Config.fromfile(args.config)
    if args.cfg_options is not None:
        cfg.merge_from_dict(args.cfg_options)

    # set cudnn_benchmark
    if cfg.get('cudnn_benchmark', False):
        torch.backends.cudnn.benchmark = True

    # work_dir is determined in this priority: CLI > segment in file > filename
    if args.work_dir is not None:
        # update configs according to CLI args if args.work_dir is not None
        cfg.work_dir = args.work_dir
    elif cfg.get('work_dir', None) is None:
        # use config filename as default work_dir if cfg.work_dir is None
        cfg.work_dir = osp.join('./work_dirs',
                                osp.splitext(osp.basename(args.config))[0])
    if args.load_from is not None:
        cfg.load_from = args.load_from
    if args.resume_from is not None:
        cfg.resume_from = args.resume_from
    if args.gpu_ids is not None:
        cfg.gpu_ids = args.gpu_ids
    else:
        cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus)

    # init distributed env first, since logger depends on the dist info.
    if args.launcher == 'none':
        distributed = False
    else:
        distributed = True
        init_dist(args.launcher, **cfg.dist_params)
        # re-set gpu_ids with distributed training mode
        _, world_size = get_dist_info()
        cfg.gpu_ids = range(world_size)

    # create work_dir
    mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
    # dump config
    cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config)))
    # init the logger before other steps
    timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    log_file = osp.join(cfg.work_dir, f'{timestamp}.log')
    logger = get_root_logger(log_file=log_file, log_level=cfg.log_level)

    # init the meta dict to record some important information such as
    # environment info and seed, which will be logged
    meta = dict()
    # log env info
    env_info_dict = collect_env()
    env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()])
    dash_line = '-' * 60 + '\n'
    logger.info('Environment info:\n' + dash_line + env_info + '\n' +
                dash_line)
    meta['env_info'] = env_info
    meta['config'] = cfg.pretty_text
    # log some basic info
    logger.info(f'Distributed training: {distributed}')
    logger.info(f'Config:\n{cfg.pretty_text}')

    # set random seeds
    seed = init_random_seed(args.seed)
    logger.info(f'Set random seed to {seed}, '
                f'deterministic: {args.deterministic}')
    set_random_seed(seed, deterministic=args.deterministic)
    cfg.seed = seed
    meta['seed'] = seed
    meta['exp_name'] = osp.basename(args.config)

    model = build_detector(
        cfg.model,
        train_cfg=cfg.get('train_cfg'),
        test_cfg=cfg.get('test_cfg'))
    model.init_weights()

    datasets = [build_dataset(cfg.data.train)]
    if len(cfg.workflow) == 2:
        val_dataset = copy.deepcopy(cfg.data.val)
        if cfg.data.train.get('pipeline', None) is None:
            if is_2dlist(cfg.data.train.datasets):
                train_pipeline = cfg.data.train.datasets[0][0].pipeline
            else:
                train_pipeline = cfg.data.train.datasets[0].pipeline
        elif is_2dlist(cfg.data.train.pipeline):
            train_pipeline = cfg.data.train.pipeline[0]
        else:
            train_pipeline = cfg.data.train.pipeline

        if val_dataset['type'] in ['ConcatDataset', 'UniformConcatDataset']:
            for dataset in val_dataset['datasets']:
                dataset.pipeline = train_pipeline
        else:
            val_dataset.pipeline = train_pipeline
        datasets.append(build_dataset(val_dataset))
    if cfg.checkpoint_config is not None:
        # save mmdet version, config file content and class names in
        # checkpoints as meta data
        cfg.checkpoint_config.meta = dict(
            mmocr_version=__version__ + get_git_hash()[:7],
            CLASSES=datasets[0].CLASSES)
    # add an attribute for visualization convenience
    model.CLASSES = datasets[0].CLASSES
    train_detector(
        model,
        datasets,
        cfg,
        distributed=distributed,
        validate=(not args.no_validate),
        timestamp=timestamp,
        meta=meta)


if __name__ == '__main__':
    main()
Make tools executable (#209) Signed-off-by: lizz <lizz@sensetime.com> 2021-05-18 15:19:08 +08:00			`#!/usr/bin/env python`
[Enhancement] Add copyright info (#439) * add copyright info 2021-08-17 17:39:30 +08:00			`# Copyright (c) OpenMMLab. All rights reserved.`
[feature]: add textdet 2021-04-03 01:03:52 +08:00			`import argparse`
			`import copy`
			`import os`
			`import os.path as osp`
			`import time`
			`import warnings`

			`import mmcv`
			`import torch`
			`from mmcv import Config, DictAction`
Implement get_root_logger and train_detector (#4) 2021-04-08 01:24:04 +08:00			`from mmcv.runner import get_dist_info, init_dist, set_random_seed`
[feature]: add textdet 2021-04-03 01:03:52 +08:00			`from mmcv.utils import get_git_hash`

Implement get_root_logger and train_detector (#4) 2021-04-08 01:24:04 +08:00			`from mmocr import __version__`
[Fix] Always broadcast a random seed to all the processes (#600) 2021-11-18 22:26:21 +08:00			`from mmocr.apis import init_random_seed, train_detector`
[feature]: add textdet 2021-04-03 01:03:52 +08:00			`from mmocr.datasets import build_dataset`
			`from mmocr.models import build_detector`
[Fix] Support both ConcatDataset and UniformConcatDataset (#675) * support UniformConcatDataset * update * rm useless * handle 2d-list datasets 2021-12-22 20:32:02 +08:00			`from mmocr.utils import collect_env, get_root_logger, is_2dlist`
[feature]: add textdet 2021-04-03 01:03:52 +08:00

			`def parse_args():`
			`parser = argparse.ArgumentParser(description='Train a detector.')`
			`parser.add_argument('config', help='Train config file path.')`
			`parser.add_argument('--work-dir', help='The dir to save logs and models.')`
Ner task (#148) * update ner standard code format * add pytest * fix pre-commit * Annotate the dataset section * fix pre-commit for dataset * rm big files and add comments in dataset * rename configs for ner task * minor changes if metric * Note modification * fix pre-commit * detail modification * rm transform * rm magic number * fix warnings in pylint * fix pre-commit * correct help info * rename model files * rename err fixed * 428_tag * Adjust to more general pipline * update unit test rate * update * Unit test coverage over 90% and add Readme * modify details * fix precommit * update * fix pre-commit * update * update * update * update result * update readme * update baseline config * update config and small minor changes * minor changes in readme and etc. * back to original * update toy config * upload model and log * fix pytest * Modify the notes. * fix readme * Delete Chinese punctuation * add demo and fix some logic and naming problems * add To_tensor transformer for ner and load pretrained model in config * delete extra lines * split ner loss to MaskedCrossEntropyLoss and MaskedFocalLoss * update config * fix err * updata * modify noqa * update new model report * fix err in ner demo * Update ner_dataset.py * Update test_ner_dataset.py * Update ner_dataset.py * Update ner_transforms.py * rm toy config and data * add comment * add empty * fix conflict * fix precommit * fix pytest * fix pytest err * Update ner_dataset.py * change dataset name to cluener2020 * move the postprocess in metric to convertor * rm __init__ etc. * precommit * add discription in loss * add auto download * add http * update * remove some 'issert' * replace unsqueeze * update config * update doc and bert.py * update * update demo code Co-authored-by: weihuaqiang <weihuaqiang@sensetime.com> Co-authored-by: Hongbin Sun <hongbin306@gmail.com> 2021-05-18 11:33:51 +08:00			`parser.add_argument(`
			`'--load-from', help='The checkpoint file to load from.')`
[feature]: add textdet 2021-04-03 01:03:52 +08:00			`parser.add_argument(`
			`'--resume-from', help='The checkpoint file to resume from.')`
			`parser.add_argument(`
			`'--no-validate',`
			`action='store_true',`
			`help='Whether not to evaluate the checkpoint during training.')`
			`group_gpus = parser.add_mutually_exclusive_group()`
			`group_gpus.add_argument(`
			`'--gpus',`
			`type=int,`
			`help='Number of gpus to use '`
			`'(only applicable to non-distributed training).')`
			`group_gpus.add_argument(`
			`'--gpu-ids',`
			`type=int,`
			`nargs='+',`
			`help='ids of gpus to use '`
			`'(only applicable to non-distributed training).')`
			`parser.add_argument('--seed', type=int, default=None, help='Random seed.')`
			`parser.add_argument(`
			`'--deterministic',`
			`action='store_true',`
			`help='Whether to set deterministic options for CUDNN backend.')`
			`parser.add_argument(`
			`'--options',`
			`nargs='+',`
			`action=DictAction,`
			`help='Override some settings in the used config, the key-value pair '`
			`'in xxx=yyy format will be merged into config file (deprecate), '`
			`'change to --cfg-options instead.')`
			`parser.add_argument(`
			`'--cfg-options',`
			`nargs='+',`
			`action=DictAction,`
			`help='Override some settings in the used config, the key-value pair '`
			`'in xxx=yyy format will be merged into config file. If the value to '`
			`'be overwritten is a list, it should be of the form of either '`
			`'key="[a,b]" or key=a,b .The argument also allows nested list/tuple '`
			`'values, e.g. key="[(a,b),(c,d)]". Note that the quotation marks '`
			`'are necessary and that no white space is allowed.')`
			`parser.add_argument(`
			`'--launcher',`
			`choices=['none', 'pytorch', 'slurm', 'mpi'],`
			`default='none',`
			`help='Options for job launcher.')`
			`parser.add_argument('--local_rank', type=int, default=0)`

			`args = parser.parse_args()`
			`if 'LOCAL_RANK' not in os.environ:`
			`os.environ['LOCAL_RANK'] = str(args.local_rank)`

			`if args.options and args.cfg_options:`
			`raise ValueError(`
			`'--options and --cfg-options cannot be both '`
			`'specified, --options is deprecated in favor of --cfg-options')`
			`if args.options:`
			`warnings.warn('--options is deprecated in favor of --cfg-options')`
			`args.cfg_options = args.options`

			`return args`


			`def main():`
			`args = parse_args()`

			`cfg = Config.fromfile(args.config)`
			`if args.cfg_options is not None:`
			`cfg.merge_from_dict(args.cfg_options)`

			`# set cudnn_benchmark`
			`if cfg.get('cudnn_benchmark', False):`
			`torch.backends.cudnn.benchmark = True`

			`# work_dir is determined in this priority: CLI > segment in file > filename`
			`if args.work_dir is not None:`
			`# update configs according to CLI args if args.work_dir is not None`
			`cfg.work_dir = args.work_dir`
			`elif cfg.get('work_dir', None) is None:`
			`# use config filename as default work_dir if cfg.work_dir is None`
			`cfg.work_dir = osp.join('./work_dirs',`
			`osp.splitext(osp.basename(args.config))[0])`
Ner task (#148) * update ner standard code format * add pytest * fix pre-commit * Annotate the dataset section * fix pre-commit for dataset * rm big files and add comments in dataset * rename configs for ner task * minor changes if metric * Note modification * fix pre-commit * detail modification * rm transform * rm magic number * fix warnings in pylint * fix pre-commit * correct help info * rename model files * rename err fixed * 428_tag * Adjust to more general pipline * update unit test rate * update * Unit test coverage over 90% and add Readme * modify details * fix precommit * update * fix pre-commit * update * update * update * update result * update readme * update baseline config * update config and small minor changes * minor changes in readme and etc. * back to original * update toy config * upload model and log * fix pytest * Modify the notes. * fix readme * Delete Chinese punctuation * add demo and fix some logic and naming problems * add To_tensor transformer for ner and load pretrained model in config * delete extra lines * split ner loss to MaskedCrossEntropyLoss and MaskedFocalLoss * update config * fix err * updata * modify noqa * update new model report * fix err in ner demo * Update ner_dataset.py * Update test_ner_dataset.py * Update ner_dataset.py * Update ner_transforms.py * rm toy config and data * add comment * add empty * fix conflict * fix precommit * fix pytest * fix pytest err * Update ner_dataset.py * change dataset name to cluener2020 * move the postprocess in metric to convertor * rm __init__ etc. * precommit * add discription in loss * add auto download * add http * update * remove some 'issert' * replace unsqueeze * update config * update doc and bert.py * update * update demo code Co-authored-by: weihuaqiang <weihuaqiang@sensetime.com> Co-authored-by: Hongbin Sun <hongbin306@gmail.com> 2021-05-18 11:33:51 +08:00			`if args.load_from is not None:`
			`cfg.load_from = args.load_from`
[feature]: add textdet 2021-04-03 01:03:52 +08:00			`if args.resume_from is not None:`
			`cfg.resume_from = args.resume_from`
			`if args.gpu_ids is not None:`
			`cfg.gpu_ids = args.gpu_ids`
			`else:`
			`cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus)`

			`# init distributed env first, since logger depends on the dist info.`
			`if args.launcher == 'none':`
			`distributed = False`
			`else:`
			`distributed = True`
			`init_dist(args.launcher, **cfg.dist_params)`
			`# re-set gpu_ids with distributed training mode`
			`_, world_size = get_dist_info()`
			`cfg.gpu_ids = range(world_size)`

			`# create work_dir`
			`mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))`
			`# dump config`
			`cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config)))`
			`# init the logger before other steps`
			`timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())`
			`log_file = osp.join(cfg.work_dir, f'{timestamp}.log')`
			`logger = get_root_logger(log_file=log_file, log_level=cfg.log_level)`

			`# init the meta dict to record some important information such as`
			`# environment info and seed, which will be logged`
			`meta = dict()`
			`# log env info`
			`env_info_dict = collect_env()`
			`env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()])`
			`dash_line = '-' * 60 + '\n'`
			`logger.info('Environment info:\n' + dash_line + env_info + '\n' +`
			`dash_line)`
			`meta['env_info'] = env_info`
			`meta['config'] = cfg.pretty_text`
			`# log some basic info`
			`logger.info(f'Distributed training: {distributed}')`
			`logger.info(f'Config:\n{cfg.pretty_text}')`

			`# set random seeds`
[Fix] Always broadcast a random seed to all the processes (#600) 2021-11-18 22:26:21 +08:00			`seed = init_random_seed(args.seed)`
			`logger.info(f'Set random seed to {seed}, '`
			`f'deterministic: {args.deterministic}')`
			`set_random_seed(seed, deterministic=args.deterministic)`
			`cfg.seed = seed`
			`meta['seed'] = seed`
[feature]: add textdet 2021-04-03 01:03:52 +08:00			`meta['exp_name'] = osp.basename(args.config)`

			`model = build_detector(`
			`cfg.model,`
			`train_cfg=cfg.get('train_cfg'),`
			`test_cfg=cfg.get('test_cfg'))`
Fix #282: Support init_cfg & update depreciated configs (#365) * update coco ref * init_cfg for dbnet * initcfg for mask_rcnn * textsnake init_cfg * fix dbnet * panet initcfg * psenet initcfg * fcenet initcfg * drrg initcfg * add init_cfg to detectors * update maskrcnn config file to support mmdet * fix init_cfg of fce_head * crnn initcfg * init_weights in training * nrtr initcfg * robust_scanner initcfg * sar init_cfg * seg init_cfg * tps_crnn init_cfg * sdmgr initcfg * ner init_cfg * fix textsnake * sdmgr initcfg * move "pretrained" to "init_cfg" for config files * Moduleslist update * fix seg * ner init_cfg * fix base * fix encode decode recognizer * revert dbnet config * fix crnn * fix base.py * fix robust_scanner * fix panet * fix test * remove redundant init_weights() in fcehead * clean up * relex mmdet version in workflow * Add dependency version check * Update mmocr/models/textdet/dense_heads/pse_head.py Co-authored-by: Hongbin Sun <hongbin306@gmail.com> Co-authored-by: Hongbin Sun <hongbin306@gmail.com> 2021-07-20 23:18:25 +08:00			`model.init_weights()`
[feature]: add textdet 2021-04-03 01:03:52 +08:00
			`datasets = [build_dataset(cfg.data.train)]`
			`if len(cfg.workflow) == 2:`
			`val_dataset = copy.deepcopy(cfg.data.val)`
[Fix] Support both ConcatDataset and UniformConcatDataset (#675) * support UniformConcatDataset * update * rm useless * handle 2d-list datasets 2021-12-22 20:32:02 +08:00			`if cfg.data.train.get('pipeline', None) is None:`
			`if is_2dlist(cfg.data.train.datasets):`
			`train_pipeline = cfg.data.train.datasets[0][0].pipeline`
			`else:`
			`train_pipeline = cfg.data.train.datasets[0].pipeline`
			`elif is_2dlist(cfg.data.train.pipeline):`
			`train_pipeline = cfg.data.train.pipeline[0]`
Added support for ConcatDataset with two workflows (#348) 2021-07-06 10:37:37 +02:00			`else:`
			`train_pipeline = cfg.data.train.pipeline`

[Fix] Support both ConcatDataset and UniformConcatDataset (#675) * support UniformConcatDataset * update * rm useless * handle 2d-list datasets 2021-12-22 20:32:02 +08:00			`if val_dataset['type'] in ['ConcatDataset', 'UniformConcatDataset']:`
Added support for ConcatDataset with two workflows (#348) 2021-07-06 10:37:37 +02:00			`for dataset in val_dataset['datasets']:`
			`dataset.pipeline = train_pipeline`
			`else:`
			`val_dataset.pipeline = train_pipeline`
[feature]: add textdet 2021-04-03 01:03:52 +08:00			`datasets.append(build_dataset(val_dataset))`
			`if cfg.checkpoint_config is not None:`
			`# save mmdet version, config file content and class names in`
			`# checkpoints as meta data`
			`cfg.checkpoint_config.meta = dict(`
Implement get_root_logger and train_detector (#4) 2021-04-08 01:24:04 +08:00			`mmocr_version=__version__ + get_git_hash()[:7],`
[feature]: add textdet 2021-04-03 01:03:52 +08:00			`CLASSES=datasets[0].CLASSES)`
			`# add an attribute for visualization convenience`
			`model.CLASSES = datasets[0].CLASSES`
			`train_detector(`
			`model,`
			`datasets,`
			`cfg,`
			`distributed=distributed,`
			`validate=(not args.no_validate),`
			`timestamp=timestamp,`
			`meta=meta)`


			`if __name__ == '__main__':`
			`main()`