diff --git a/configs/kie/sdmgr/README.md b/configs/kie/sdmgr/README.md index d8d7e878..e1751b00 100644 --- a/configs/kie/sdmgr/README.md +++ b/configs/kie/sdmgr/README.md @@ -21,5 +21,5 @@ | Method | Modality | Macro F1-Score | Download | | :--------------------------------------------------------------------: | :--------------: | :------------: | :-------------------------------------------------------------------------------------------------------------------------------------: | -| [sdmgr_unet16](/configs/kie/sdmgr/sdmgr_unet16_60e_wildreceipt.py) | Visual + Textual | 0.880 | [model](https://download.openmmlab.com/mmocr/kie/sdmgr/todo.pth) \| [log](https://download.openmmlab.com/mmocr/kie/sdmgr/todo.log.json) | -| [sdmgr_novisual](/configs/kie/sdmgr/sdmgr_novisual_60e_wildreceipt.py) | Textual | 0.871 | [model](https://download.openmmlab.com/mmocr/kie/sdmgr/todo.pth) \| [log](https://download.openmmlab.com/mmocr/kie/sdmgr/todo.log.json) | +| [sdmgr_unet16](/configs/kie/sdmgr/sdmgr_unet16_60e_wildreceipt.py) | Visual + Textual | 0.876 | [model](https://download.openmmlab.com/mmocr/kie/sdmgr/sdmgr_unet16_60e_wildreceipt_20210405-16a47642.pth) \| [log](https://download.openmmlab.com/mmocr/kie/sdmgr/20210405_104508.log.json) | +| [sdmgr_novisual](/configs/kie/sdmgr/sdmgr_novisual_60e_wildreceipt.py) | Textual | 0.864 | [model](https://download.openmmlab.com/mmocr/kie/sdmgr/sdmgr_novisual_60e_wildreceipt_20210405-07bc26ad.pth) \| [log](https://download.openmmlab.com/mmocr/kie/sdmgr/20210405_141138.log.json) | diff --git a/configs/kie/sdmgr/sdmgr_novisual_60e_wildreceipt.py b/configs/kie/sdmgr/sdmgr_novisual_60e_wildreceipt.py index e29a8ebf..1d8eb47c 100644 --- a/configs/kie/sdmgr/sdmgr_novisual_60e_wildreceipt.py +++ b/configs/kie/sdmgr/sdmgr_novisual_60e_wildreceipt.py @@ -89,3 +89,5 @@ log_level = 'INFO' load_from = None resume_from = None workflow = [('train', 1)] + +find_unused_parameters = True diff --git a/configs/kie/sdmgr/sdmgr_unet16_60e_wildreceipt.py b/configs/kie/sdmgr/sdmgr_unet16_60e_wildreceipt.py index f51ac974..9a4dd1a7 100644 --- a/configs/kie/sdmgr/sdmgr_unet16_60e_wildreceipt.py +++ b/configs/kie/sdmgr/sdmgr_unet16_60e_wildreceipt.py @@ -89,3 +89,5 @@ log_level = 'INFO' load_from = None resume_from = None workflow = [('train', 1)] + +find_unused_parameters = True diff --git a/configs/textrecog/seg/README.md b/configs/textrecog/seg/README.md index 62c8a2e3..779a8a4a 100644 --- a/configs/textrecog/seg/README.md +++ b/configs/textrecog/seg/README.md @@ -25,10 +25,10 @@ A Baseline Method for Segmentation based Text Recognition. ## Results and Models -| Backbone | Neck | Head | | | Regular Text | | | Irregular Text | base_lr | batch_size/gpu | gpus | download | -| :------: | :----: | :--: | :-: | :----: | :----------: | :--: | :-: | :------------: | :-----: | :------------: | :--: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | -| | | | | IIIT5K | SVT | IC13 | | CT80 | -| R31-1/16 | FPNOCR | 1x | | 90.9 | 81.8 | 90.7 | | 80.9 | 1e-4 | 16 | 4 | [model](https://download.openmmlab.com/mmocr/textrecog/seg/seg_r31_1by16_fpnocr_academic-0c50e163.pth) \| [log](https://download.openmmlab.com/mmocr/textrecog/seg/20210325_112835.log.json) | +|Backbone|Neck|Head|||Regular Text|||Irregular Text|download +| :-------------: | :-----: | :-----: | :------: | :-----: | :----: | :-----: | :-----: | :-----: | :-----: | +|||||IIIT5K|SVT|IC13||CT80| +|R31-1/16|FPNOCR|1x||90.9|81.8|90.7||80.9|[model](https://download.openmmlab.com/mmocr/textrecog/seg/seg_r31_1by16_fpnocr_academic-0c50e163.pth) \| [log](https://download.openmmlab.com/mmocr/textrecog/seg/20210325_112835.log.json) | **Notes:** diff --git a/docs/install.md b/docs/install.md index c835ca7c..f386966a 100644 --- a/docs/install.md +++ b/docs/install.md @@ -98,7 +98,7 @@ e. Clone the mmocr repository into it. The mmdetection repo is separate from the ```shell cd .. -git clone git@gitlab.sz.sensetime.com:kuangzhh/mmocr.git +git clone https://github.com/open-mmlab/mmocr.git cd mmocr ``` @@ -142,7 +142,7 @@ export PYTHONPATH=$(pwd):$PYTHONPATH # install mmocr cd .. -git clone git@gitlab.sz.sensetime.com:kuangzhh/mmocr.git +git clone https://github.com/open-mmlab/mmocr.git cd mmocr # code/mmocr pip install -r requirements.txt @@ -233,7 +233,6 @@ mmocr │   ├── test_models │   ├── test_tools │   └── test_utils -├── tmp.txt └── tools ├── data ├── dist_test.sh @@ -248,5 +247,3 @@ mmocr ├── test.py └── train.py ``` - -The icdar2017 official annotations can be converted into the coco format that mmocr supports using `code/mmocr/tools/data_converter/icdar_converter.py`. diff --git a/tools/data/textrecog/seg_synthtext_converter.py b/tools/data/textrecog/seg_synthtext_converter.py index 64ed9701..fc4e0600 100644 --- a/tools/data/textrecog/seg_synthtext_converter.py +++ b/tools/data/textrecog/seg_synthtext_converter.py @@ -1,66 +1,64 @@ import argparse -import codecs import json import os.path as osp import cv2 -def read_json(fpath): - with codecs.open(fpath, 'r', 'utf-8') as f: - obj = json.load(f) - return obj - - -def parse_old_label(img_prefix, in_path): +def parse_old_label(data_root, in_path, img_size=False): imgid2imgname = {} imgid2anno = {} idx = 0 with open(in_path, 'r') as fr: for line in fr: line = line.strip().split() - img_full_path = osp.join(img_prefix, line[0]) + img_full_path = osp.join(data_root, line[0]) if not osp.exists(img_full_path): continue - img = cv2.imread(img_full_path) - h, w = img.shape[:2] + ann_file = osp.join(data_root, line[1]) + if not osp.exists(ann_file): + continue + img_info = {} img_info['file_name'] = line[0] - img_info['height'] = h - img_info['width'] = w + if img_size: + img = cv2.imread(img_full_path) + h, w = img.shape[:2] + img_info['height'] = h + img_info['width'] = w imgid2imgname[idx] = img_info + imgid2anno[idx] = [] - for i in range(len(line[1:]) // 8): - seg = [int(x) for x in line[(1 + i * 8):(1 + (i + 1) * 8)]] - points_x = seg[0:2:8] - points_y = seg[1:2:9] - box = [ - min(points_x), - min(points_y), - max(points_x), - max(points_y) - ] - new_anno = {} - new_anno['iscrowd'] = 0 - new_anno['category_id'] = 1 - new_anno['bbox'] = box - new_anno['segmentation'] = [seg] - imgid2anno[idx].append(new_anno) + char_annos = [] + with open(ann_file, 'r') as fr: + t = 0 + for line in fr: + line = line.strip() + if t == 0: + img_info['text'] = line + else: + char_box = [float(x) for x in line.split()] + char_text = img_info['text'][t - 1] + char_ann = dict(char_box=char_box, char_text=char_text) + char_annos.append(char_ann) + t += 1 + imgid2anno[idx] = char_annos idx += 1 return imgid2imgname, imgid2anno -def gen_line_dict_file(out_path, imgid2imgname, imgid2anno): - # import pdb; pdb.set_trace() - with codecs.open(out_path, 'w', 'utf-8') as fw: +def gen_line_dict_file(out_path, imgid2imgname, imgid2anno, img_size=False): + with open(out_path, 'w', encoding='utf-8') as fw: for key, value in imgid2imgname.items(): if key in imgid2anno: anno = imgid2anno[key] line_dict = {} line_dict['file_name'] = value['file_name'] - line_dict['height'] = value['height'] - line_dict['width'] = value['width'] + line_dict['text'] = value['text'] + if img_size: + line_dict['height'] = value['height'] + line_dict['width'] = value['width'] line_dict['annotations'] = anno line_dict_str = json.dumps(line_dict) fw.write(line_dict_str + '\n') @@ -69,8 +67,7 @@ def gen_line_dict_file(out_path, imgid2imgname, imgid2anno): def parse_args(): parser = argparse.ArgumentParser() parser.add_argument( - '--img-prefix', - help='image prefix, to generate full image path with "image_name"') + '--data-root', help='data root for both image file and anno file') parser.add_argument( '--in-path', help='mapping file of image_name and ann_file,' @@ -84,7 +81,7 @@ def parse_args(): def main(): args = parse_args() - imgid2imgname, imgid2anno = parse_old_label(args.img_prefix, args.in_path) + imgid2imgname, imgid2anno = parse_old_label(args.data_root, args.in_path) gen_line_dict_file(args.out_path, imgid2imgname, imgid2anno) print('finish')