mirror of https://github.com/open-mmlab/mmocr.git
51 lines
1.5 KiB
Python
51 lines
1.5 KiB
Python
# Copyright (c) OpenMMLab. All rights reserved.
|
|
import argparse
|
|
import os.path as osp
|
|
import warnings
|
|
|
|
from mmocr.datasets.preparers import DatasetPreparer
|
|
from mmocr.utils import register_all_modules
|
|
|
|
|
|
def parse_args():
|
|
parser = argparse.ArgumentParser(
|
|
description='Preparing datasets used in MMOCR.')
|
|
parser.add_argument(
|
|
'datasets',
|
|
help='A list of the dataset names that would like to prepare.',
|
|
nargs='+')
|
|
parser.add_argument(
|
|
'--nproc', help='Number of processes to run', default=4, type=int)
|
|
parser.add_argument(
|
|
'--task',
|
|
default='textdet',
|
|
choices=['textdet', 'textrecog', 'textspotting', 'kie'],
|
|
help='Task type. Options are "textdet", "textrecog", "textspotting"'
|
|
' and "kie".')
|
|
parser.add_argument(
|
|
'--dataset-zoo-path',
|
|
default='./dataset_zoo',
|
|
help='Path to dataset zoo config files.')
|
|
args = parser.parse_args()
|
|
return args
|
|
|
|
|
|
def main():
|
|
args = parse_args()
|
|
register_all_modules()
|
|
for dataset in args.datasets:
|
|
if not osp.isdir(osp.join(args.dataset_zoo_path, dataset)):
|
|
warnings.warn(f'{dataset} is not supported yet. Please check '
|
|
'dataset zoo for supported datasets.')
|
|
continue
|
|
preparer = DatasetPreparer(
|
|
cfg_path=args.dataset_zoo_path,
|
|
dataset_name=dataset,
|
|
task=args.task,
|
|
nproc=args.nproc)
|
|
preparer()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|