fix #21: update sdmgr readme and config

pull/2/head
Hongbin Sun 2021-04-05 15:50:28 +08:00
parent 9af0bed144
commit 3727cab712
6 changed files with 46 additions and 48 deletions

View File

@ -21,5 +21,5 @@
| Method | Modality | Macro F1-Score | Download |
| :--------------------------------------------------------------------: | :--------------: | :------------: | :-------------------------------------------------------------------------------------------------------------------------------------: |
| [sdmgr_unet16](/configs/kie/sdmgr/sdmgr_unet16_60e_wildreceipt.py) | Visual + Textual | 0.880 | [model](https://download.openmmlab.com/mmocr/kie/sdmgr/todo.pth) \| [log](https://download.openmmlab.com/mmocr/kie/sdmgr/todo.log.json) |
| [sdmgr_novisual](/configs/kie/sdmgr/sdmgr_novisual_60e_wildreceipt.py) | Textual | 0.871 | [model](https://download.openmmlab.com/mmocr/kie/sdmgr/todo.pth) \| [log](https://download.openmmlab.com/mmocr/kie/sdmgr/todo.log.json) |
| [sdmgr_unet16](/configs/kie/sdmgr/sdmgr_unet16_60e_wildreceipt.py) | Visual + Textual | 0.876 | [model](https://download.openmmlab.com/mmocr/kie/sdmgr/sdmgr_unet16_60e_wildreceipt_20210405-16a47642.pth) \| [log](https://download.openmmlab.com/mmocr/kie/sdmgr/20210405_104508.log.json) |
| [sdmgr_novisual](/configs/kie/sdmgr/sdmgr_novisual_60e_wildreceipt.py) | Textual | 0.864 | [model](https://download.openmmlab.com/mmocr/kie/sdmgr/sdmgr_novisual_60e_wildreceipt_20210405-07bc26ad.pth) \| [log](https://download.openmmlab.com/mmocr/kie/sdmgr/20210405_141138.log.json) |

View File

@ -89,3 +89,5 @@ log_level = 'INFO'
load_from = None
resume_from = None
workflow = [('train', 1)]
find_unused_parameters = True

View File

@ -89,3 +89,5 @@ log_level = 'INFO'
load_from = None
resume_from = None
workflow = [('train', 1)]
find_unused_parameters = True

View File

@ -25,10 +25,10 @@ A Baseline Method for Segmentation based Text Recognition.
## Results and Models
| Backbone | Neck | Head | | | Regular Text | | | Irregular Text | base_lr | batch_size/gpu | gpus | download |
| :------: | :----: | :--: | :-: | :----: | :----------: | :--: | :-: | :------------: | :-----: | :------------: | :--: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| | | | | IIIT5K | SVT | IC13 | | CT80 |
| R31-1/16 | FPNOCR | 1x | | 90.9 | 81.8 | 90.7 | | 80.9 | 1e-4 | 16 | 4 | [model](https://download.openmmlab.com/mmocr/textrecog/seg/seg_r31_1by16_fpnocr_academic-0c50e163.pth) \| [log](https://download.openmmlab.com/mmocr/textrecog/seg/20210325_112835.log.json) |
|Backbone|Neck|Head|||Regular Text|||Irregular Text|download
| :-------------: | :-----: | :-----: | :------: | :-----: | :----: | :-----: | :-----: | :-----: | :-----: |
|||||IIIT5K|SVT|IC13||CT80|
|R31-1/16|FPNOCR|1x||90.9|81.8|90.7||80.9|[model](https://download.openmmlab.com/mmocr/textrecog/seg/seg_r31_1by16_fpnocr_academic-0c50e163.pth) \| [log](https://download.openmmlab.com/mmocr/textrecog/seg/20210325_112835.log.json) |
**Notes:**

View File

@ -98,7 +98,7 @@ e. Clone the mmocr repository into it. The mmdetection repo is separate from the
```shell
cd ..
git clone git@gitlab.sz.sensetime.com:kuangzhh/mmocr.git
git clone https://github.com/open-mmlab/mmocr.git
cd mmocr
```
@ -142,7 +142,7 @@ export PYTHONPATH=$(pwd):$PYTHONPATH
# install mmocr
cd ..
git clone git@gitlab.sz.sensetime.com:kuangzhh/mmocr.git
git clone https://github.com/open-mmlab/mmocr.git
cd mmocr # code/mmocr
pip install -r requirements.txt
@ -233,7 +233,6 @@ mmocr
│   ├── test_models
│   ├── test_tools
│   └── test_utils
├── tmp.txt
└── tools
├── data
├── dist_test.sh
@ -248,5 +247,3 @@ mmocr
├── test.py
└── train.py
```
The icdar2017 official annotations can be converted into the coco format that mmocr supports using `code/mmocr/tools/data_converter/icdar_converter.py`.

View File

@ -1,66 +1,64 @@
import argparse
import codecs
import json
import os.path as osp
import cv2
def read_json(fpath):
with codecs.open(fpath, 'r', 'utf-8') as f:
obj = json.load(f)
return obj
def parse_old_label(img_prefix, in_path):
def parse_old_label(data_root, in_path, img_size=False):
imgid2imgname = {}
imgid2anno = {}
idx = 0
with open(in_path, 'r') as fr:
for line in fr:
line = line.strip().split()
img_full_path = osp.join(img_prefix, line[0])
img_full_path = osp.join(data_root, line[0])
if not osp.exists(img_full_path):
continue
img = cv2.imread(img_full_path)
h, w = img.shape[:2]
ann_file = osp.join(data_root, line[1])
if not osp.exists(ann_file):
continue
img_info = {}
img_info['file_name'] = line[0]
img_info['height'] = h
img_info['width'] = w
if img_size:
img = cv2.imread(img_full_path)
h, w = img.shape[:2]
img_info['height'] = h
img_info['width'] = w
imgid2imgname[idx] = img_info
imgid2anno[idx] = []
for i in range(len(line[1:]) // 8):
seg = [int(x) for x in line[(1 + i * 8):(1 + (i + 1) * 8)]]
points_x = seg[0:2:8]
points_y = seg[1:2:9]
box = [
min(points_x),
min(points_y),
max(points_x),
max(points_y)
]
new_anno = {}
new_anno['iscrowd'] = 0
new_anno['category_id'] = 1
new_anno['bbox'] = box
new_anno['segmentation'] = [seg]
imgid2anno[idx].append(new_anno)
char_annos = []
with open(ann_file, 'r') as fr:
t = 0
for line in fr:
line = line.strip()
if t == 0:
img_info['text'] = line
else:
char_box = [float(x) for x in line.split()]
char_text = img_info['text'][t - 1]
char_ann = dict(char_box=char_box, char_text=char_text)
char_annos.append(char_ann)
t += 1
imgid2anno[idx] = char_annos
idx += 1
return imgid2imgname, imgid2anno
def gen_line_dict_file(out_path, imgid2imgname, imgid2anno):
# import pdb; pdb.set_trace()
with codecs.open(out_path, 'w', 'utf-8') as fw:
def gen_line_dict_file(out_path, imgid2imgname, imgid2anno, img_size=False):
with open(out_path, 'w', encoding='utf-8') as fw:
for key, value in imgid2imgname.items():
if key in imgid2anno:
anno = imgid2anno[key]
line_dict = {}
line_dict['file_name'] = value['file_name']
line_dict['height'] = value['height']
line_dict['width'] = value['width']
line_dict['text'] = value['text']
if img_size:
line_dict['height'] = value['height']
line_dict['width'] = value['width']
line_dict['annotations'] = anno
line_dict_str = json.dumps(line_dict)
fw.write(line_dict_str + '\n')
@ -69,8 +67,7 @@ def gen_line_dict_file(out_path, imgid2imgname, imgid2anno):
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument(
'--img-prefix',
help='image prefix, to generate full image path with "image_name"')
'--data-root', help='data root for both image file and anno file')
parser.add_argument(
'--in-path',
help='mapping file of image_name and ann_file,'
@ -84,7 +81,7 @@ def parse_args():
def main():
args = parse_args()
imgid2imgname, imgid2anno = parse_old_label(args.img_prefix, args.in_path)
imgid2imgname, imgid2anno = parse_old_label(args.data_root, args.in_path)
gen_line_dict_file(args.out_path, imgid2imgname, imgid2anno)
print('finish')