mirror of
https://github.com/open-mmlab/mmocr.git
synced 2025-06-03 21:54:47 +08:00
[TODO] update recog data_migrator
This commit is contained in:
parent
23e1f2432a
commit
ee1212a5cd
@ -3,7 +3,7 @@ import argparse
|
|||||||
import json
|
import json
|
||||||
from typing import List, Tuple
|
from typing import List, Tuple
|
||||||
|
|
||||||
from mmocr.datasets.utils.loader import AnnFileLoader
|
from mmocr.datasets import RecogLMDBDataset
|
||||||
from mmocr.utils import StringStrip, dump_ocr_data, recog_anno_to_imginfo
|
from mmocr.utils import StringStrip, dump_ocr_data, recog_anno_to_imginfo
|
||||||
|
|
||||||
|
|
||||||
@ -14,7 +14,8 @@ def parse_legacy_data(in_path: str,
|
|||||||
Args:
|
Args:
|
||||||
in_path (str): Path to annotation file.
|
in_path (str): Path to annotation file.
|
||||||
format (str): Annotation format. Choices are 'txt', 'json' and 'lmdb'.
|
format (str): Annotation format. Choices are 'txt', 'json' and 'lmdb'.
|
||||||
|
For 'lmdb' format, the lmdb file should only contains labels. For
|
||||||
|
lmdb file with labels and images, the conversion is unnecessary.
|
||||||
Returns:
|
Returns:
|
||||||
tuple(list[str], list[str]): File paths and labels.
|
tuple(list[str], list[str]): File paths and labels.
|
||||||
"""
|
"""
|
||||||
@ -22,15 +23,12 @@ def parse_legacy_data(in_path: str,
|
|||||||
labels = []
|
labels = []
|
||||||
strip_cls = StringStrip()
|
strip_cls = StringStrip()
|
||||||
if format == 'lmdb':
|
if format == 'lmdb':
|
||||||
# TODO: Backend might be deprecated
|
dataset = RecogLMDBDataset(
|
||||||
loader = AnnFileLoader(
|
|
||||||
in_path,
|
in_path,
|
||||||
parser=dict(type='LineJsonParser', keys=['filename', 'text']),
|
parser_cfg=dict(type='LineJsonParser', keys=['filename', 'text']))
|
||||||
file_format='lmdb')
|
for data_info in dataset:
|
||||||
num = len(loader)
|
file_path = data_info['img_path']
|
||||||
for i in range(num):
|
label = data_info['instances'][0]['text']
|
||||||
line_json = loader[i]
|
|
||||||
file_path, label = line_json['filename'], line_json['text']
|
|
||||||
file_path = strip_cls(file_path)
|
file_path = strip_cls(file_path)
|
||||||
label = strip_cls(label)
|
label = strip_cls(label)
|
||||||
# MJ's file_path starts with './'
|
# MJ's file_path starts with './'
|
||||||
@ -83,6 +81,8 @@ def parse_args():
|
|||||||
default='txt',
|
default='txt',
|
||||||
help='Legacy data format')
|
help='Legacy data format')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
if args.out_path.split('.')[-1] != 'json':
|
||||||
|
raise ValueError('The output path must be a json file.')
|
||||||
return args
|
return args
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user