mirror of https://github.com/open-mmlab/mmocr.git
90 lines
2.6 KiB
Python
90 lines
2.6 KiB
Python
# Copyright (c) OpenMMLab. All rights reserved.
|
|
import argparse
|
|
import json
|
|
import os.path as osp
|
|
|
|
import cv2
|
|
|
|
from mmocr.utils import list_from_file, list_to_file
|
|
|
|
|
|
def parse_old_label(data_root, in_path, img_size=False):
|
|
imgid2imgname = {}
|
|
imgid2anno = {}
|
|
idx = 0
|
|
for line in list_from_file(in_path):
|
|
line = line.strip().split()
|
|
img_full_path = osp.join(data_root, line[0])
|
|
if not osp.exists(img_full_path):
|
|
continue
|
|
ann_file = osp.join(data_root, line[1])
|
|
if not osp.exists(ann_file):
|
|
continue
|
|
|
|
img_info = {}
|
|
img_info['file_name'] = line[0]
|
|
if img_size:
|
|
img = cv2.imread(img_full_path)
|
|
h, w = img.shape[:2]
|
|
img_info['height'] = h
|
|
img_info['width'] = w
|
|
imgid2imgname[idx] = img_info
|
|
|
|
imgid2anno[idx] = []
|
|
char_annos = []
|
|
for t, ann_line in enumerate(list_from_file(ann_file)):
|
|
ann_line = ann_line.strip()
|
|
if t == 0:
|
|
img_info['text'] = ann_line
|
|
else:
|
|
char_box = [float(x) for x in ann_line.split()]
|
|
char_text = img_info['text'][t - 1]
|
|
char_ann = dict(char_box=char_box, char_text=char_text)
|
|
char_annos.append(char_ann)
|
|
imgid2anno[idx] = char_annos
|
|
idx += 1
|
|
|
|
return imgid2imgname, imgid2anno
|
|
|
|
|
|
def gen_line_dict_file(out_path, imgid2imgname, imgid2anno, img_size=False):
|
|
lines = []
|
|
for key, value in imgid2imgname.items():
|
|
if key in imgid2anno:
|
|
anno = imgid2anno[key]
|
|
line_dict = {}
|
|
line_dict['file_name'] = value['file_name']
|
|
line_dict['text'] = value['text']
|
|
if img_size:
|
|
line_dict['height'] = value['height']
|
|
line_dict['width'] = value['width']
|
|
line_dict['annotations'] = anno
|
|
lines.append(json.dumps(line_dict))
|
|
list_to_file(out_path, lines)
|
|
|
|
|
|
def parse_args():
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument(
|
|
'--data-root', help='data root for both image file and anno file')
|
|
parser.add_argument(
|
|
'--in-path',
|
|
help='mapping file of image_name and ann_file,'
|
|
' "image_name ann_file" in each line')
|
|
parser.add_argument(
|
|
'--out-path', help='output txt path with line-json format')
|
|
|
|
args = parser.parse_args()
|
|
return args
|
|
|
|
|
|
def main():
|
|
args = parse_args()
|
|
imgid2imgname, imgid2anno = parse_old_label(args.data_root, args.in_path)
|
|
gen_line_dict_file(args.out_path, imgid2imgname, imgid2anno)
|
|
print('finish')
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|