mmocr/tools/dataset_converters/prepare_dataset.py

51 lines
1.5 KiB
Python

# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import os.path as osp
import warnings
from mmocr.datasets.preparers import DatasetPreparer
from mmocr.utils import register_all_modules
def parse_args():
parser = argparse.ArgumentParser(
description='Preparing datasets used in MMOCR.')
parser.add_argument(
'datasets',
help='A list of the dataset names that would like to prepare.',
nargs='+')
parser.add_argument(
'--nproc', help='Number of processes to run', default=4, type=int)
parser.add_argument(
'--task',
default='textdet',
choices=['textdet', 'textrecog', 'textspotting', 'kie'],
help='Task type. Options are "textdet", "textrecog", "textspotting"'
' and "kie".')
parser.add_argument(
'--dataset-zoo-path',
default='./dataset_zoo',
help='Path to dataset zoo config files.')
args = parser.parse_args()
return args
def main():
args = parse_args()
register_all_modules()
for dataset in args.datasets:
if not osp.isdir(osp.join(args.dataset_zoo_path, dataset)):
warnings.warn(f'{dataset} is not supported yet. Please check '
'dataset zoo for supported datasets.')
continue
preparer = DatasetPreparer(
cfg_path=args.dataset_zoo_path,
dataset_name=dataset,
task=args.task,
nproc=args.nproc)
preparer()
if __name__ == '__main__':
main()