mirror of
https://github.com/open-mmlab/mmocr.git
synced 2025-06-03 21:54:47 +08:00
[Fix] Fix Data Converter Issues (#955)
* fix naf mask issue; fix lv path issue * fix path * fix ic13, ic11 path issue; fix cocotextv2 mask issue * fix funsd format
This commit is contained in:
parent
ae1cf42503
commit
b4678eb657
@ -92,6 +92,8 @@ def collect_cocotext_info(root_path, split, print_every=1000):
|
|||||||
w, h = math.ceil(w), math.ceil(h)
|
w, h = math.ceil(w), math.ceil(h)
|
||||||
bbox = [x, y, w, h]
|
bbox = [x, y, w, h]
|
||||||
segmentation = [max(0, int(x)) for x in ann['mask']]
|
segmentation = [max(0, int(x)) for x in ann['mask']]
|
||||||
|
if len(segmentation) < 8 or len(segmentation) % 2 != 0:
|
||||||
|
segmentation = [x, y, x + w, y, x + w, y + h, x, y + h]
|
||||||
anno = dict(
|
anno = dict(
|
||||||
iscrowd=iscrowd,
|
iscrowd=iscrowd,
|
||||||
category_id=1,
|
category_id=1,
|
||||||
|
@ -83,9 +83,10 @@ def load_img_info(files):
|
|||||||
'.')[0]
|
'.')[0]
|
||||||
# read imgs while ignoring orientations
|
# read imgs while ignoring orientations
|
||||||
img = mmcv.imread(img_file, 'unchanged')
|
img = mmcv.imread(img_file, 'unchanged')
|
||||||
|
img_file = img_file.split('data/lv/')[1]
|
||||||
|
|
||||||
img_info = dict(
|
img_info = dict(
|
||||||
file_name=osp.join(osp.basename(img_file)),
|
file_name=img_file,
|
||||||
height=img.shape[0],
|
height=img.shape[0],
|
||||||
width=img.shape[1],
|
width=img.shape[1],
|
||||||
segm_file=osp.join(osp.basename(gt_file)))
|
segm_file=osp.join(osp.basename(gt_file)))
|
||||||
|
@ -139,14 +139,15 @@ def load_json_info(gt_file, img_info):
|
|||||||
if anno['type'] == 'blank':
|
if anno['type'] == 'blank':
|
||||||
continue
|
continue
|
||||||
|
|
||||||
xs, ys = [], []
|
xs, ys, segmentation = [], [], []
|
||||||
for p in anno['poly_points']:
|
for p in anno['poly_points']:
|
||||||
xs.append(p[0])
|
xs.append(p[0])
|
||||||
ys.append(p[1])
|
ys.append(p[1])
|
||||||
|
segmentation.append(p[0])
|
||||||
|
segmentation.append(p[1])
|
||||||
x, y = max(0, min(xs)), max(0, min(ys))
|
x, y = max(0, min(xs)), max(0, min(ys))
|
||||||
w, h = max(xs) - x, max(ys) - y
|
w, h = max(xs) - x, max(ys) - y
|
||||||
bbox = [x, y, w, h]
|
bbox = [x, y, w, h]
|
||||||
segmentation = anno['poly_points']
|
|
||||||
|
|
||||||
anno = dict(
|
anno = dict(
|
||||||
iscrowd=0,
|
iscrowd=0,
|
||||||
|
@ -140,9 +140,9 @@ def generate_ann(root_path, split, image_infos, preserve_vertical, format):
|
|||||||
|
|
||||||
dst_image_root = osp.join(root_path, 'dst_imgs', split)
|
dst_image_root = osp.join(root_path, 'dst_imgs', split)
|
||||||
if split == 'training':
|
if split == 'training':
|
||||||
dst_label_file = osp.join(root_path, 'train_label.txt')
|
dst_label_file = osp.join(root_path, f'train_label.{format}')
|
||||||
elif split == 'test':
|
elif split == 'test':
|
||||||
dst_label_file = osp.join(root_path, 'test_label.txt')
|
dst_label_file = osp.join(root_path, f'test_label.{format}')
|
||||||
os.makedirs(dst_image_root, exist_ok=True)
|
os.makedirs(dst_image_root, exist_ok=True)
|
||||||
|
|
||||||
lines = []
|
lines = []
|
||||||
|
@ -39,7 +39,7 @@ def convert_annotations(root_path, split, format):
|
|||||||
'r',
|
'r',
|
||||||
encoding='"utf-8-sig') as f:
|
encoding='"utf-8-sig') as f:
|
||||||
annos = f.readlines()
|
annos = f.readlines()
|
||||||
dst_image_root = osp.join(root_path, split)
|
dst_image_root = osp.join(root_path, split.lower())
|
||||||
for anno in annos:
|
for anno in annos:
|
||||||
# text may contain comma ','
|
# text may contain comma ','
|
||||||
dst_img_name, word = anno.split(', "')
|
dst_img_name, word = anno.split(', "')
|
||||||
@ -58,7 +58,7 @@ def convert_annotations(root_path, split, format):
|
|||||||
else:
|
else:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
list_to_file(osp.join(root_path, f'{split}_label.{format}'), lines)
|
list_to_file(osp.join(root_path, f'{split.lower()}_label.{format}'), lines)
|
||||||
|
|
||||||
|
|
||||||
def parse_args():
|
def parse_args():
|
||||||
|
@ -39,7 +39,7 @@ def convert_annotations(root_path, split, format):
|
|||||||
'r',
|
'r',
|
||||||
encoding='"utf-8-sig') as f:
|
encoding='"utf-8-sig') as f:
|
||||||
annos = f.readlines()
|
annos = f.readlines()
|
||||||
dst_image_root = osp.join(root_path, split)
|
dst_image_root = osp.join(root_path, split.lower())
|
||||||
for anno in annos:
|
for anno in annos:
|
||||||
# text may contain comma ','
|
# text may contain comma ','
|
||||||
dst_img_name, word = anno.split(', "')
|
dst_img_name, word = anno.split(', "')
|
||||||
@ -58,7 +58,7 @@ def convert_annotations(root_path, split, format):
|
|||||||
else:
|
else:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
list_to_file(osp.join(root_path, f'{split}_label.{format}'), lines)
|
list_to_file(osp.join(root_path, f'{split.lower()}_label.{format}'), lines)
|
||||||
|
|
||||||
|
|
||||||
def parse_args():
|
def parse_args():
|
||||||
|
Loading…
x
Reference in New Issue
Block a user