fix #21: update sdmgr readme and config

2021-04-05 15:50:28 +08:00 · 2021-04-05 15:50:28 +08:00 · 3727cab712
parent 9af0bed144
commit 3727cab712
6 changed files with 46 additions and 48 deletions
--- a/configs/kie/sdmgr/README.md
+++ b/configs/kie/sdmgr/README.md
@ -21,5 +21,5 @@

 |                                 Method                                 |     Modality     | Macro F1-Score |                                                                Download                                                                 |
 | :--------------------------------------------------------------------: | :--------------: | :------------: | :-------------------------------------------------------------------------------------------------------------------------------------: |
-|   [sdmgr_unet16](/configs/kie/sdmgr/sdmgr_unet16_60e_wildreceipt.py)   | Visual + Textual |     0.880      | [model](https://download.openmmlab.com/mmocr/kie/sdmgr/todo.pth) \| [log](https://download.openmmlab.com/mmocr/kie/sdmgr/todo.log.json) |
-| [sdmgr_novisual](/configs/kie/sdmgr/sdmgr_novisual_60e_wildreceipt.py) |     Textual      |     0.871      | [model](https://download.openmmlab.com/mmocr/kie/sdmgr/todo.pth) \| [log](https://download.openmmlab.com/mmocr/kie/sdmgr/todo.log.json) |
+|   [sdmgr_unet16](/configs/kie/sdmgr/sdmgr_unet16_60e_wildreceipt.py)   | Visual + Textual |     0.876      | [model](https://download.openmmlab.com/mmocr/kie/sdmgr/sdmgr_unet16_60e_wildreceipt_20210405-16a47642.pth) \| [log](https://download.openmmlab.com/mmocr/kie/sdmgr/20210405_104508.log.json) |
+| [sdmgr_novisual](/configs/kie/sdmgr/sdmgr_novisual_60e_wildreceipt.py) |     Textual      |     0.864      | [model](https://download.openmmlab.com/mmocr/kie/sdmgr/sdmgr_novisual_60e_wildreceipt_20210405-07bc26ad.pth) \| [log](https://download.openmmlab.com/mmocr/kie/sdmgr/20210405_141138.log.json) |
--- a/configs/kie/sdmgr/sdmgr_novisual_60e_wildreceipt.py
+++ b/configs/kie/sdmgr/sdmgr_novisual_60e_wildreceipt.py
@ -89,3 +89,5 @@ log_level = 'INFO'
 load_from = None
 resume_from = None
 workflow = [('train', 1)]
+
+find_unused_parameters = True
--- a/configs/kie/sdmgr/sdmgr_unet16_60e_wildreceipt.py
+++ b/configs/kie/sdmgr/sdmgr_unet16_60e_wildreceipt.py
@ -89,3 +89,5 @@ log_level = 'INFO'
 load_from = None
 resume_from = None
 workflow = [('train', 1)]
+
+find_unused_parameters = True
--- a/configs/textrecog/seg/README.md
+++ b/configs/textrecog/seg/README.md
@ -25,10 +25,10 @@ A Baseline Method for Segmentation based Text Recognition.

 ## Results and Models

-| Backbone |  Neck  | Head |     |        | Regular Text |      |     | Irregular Text | base_lr | batch_size/gpu | gpus |                                                                                           download                                                                                           |
-| :------: | :----: | :--: | :-: | :----: | :----------: | :--: | :-: | :------------: | :-----: | :------------: | :--: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
-|          |        |      |     | IIIT5K |     SVT      | IC13 |     |      CT80      |
-| R31-1/16 | FPNOCR |  1x  |     |  90.9  |     81.8     | 90.7 |     |      80.9      |  1e-4   |       16       |  4   | [model](https://download.openmmlab.com/mmocr/textrecog/seg/seg_r31_1by16_fpnocr_academic-0c50e163.pth) \| [log](https://download.openmmlab.com/mmocr/textrecog/seg/20210325_112835.log.json) |
+|Backbone|Neck|Head|||Regular Text|||Irregular Text|download
+| :-------------: | :-----: | :-----: | :------: | :-----: | :----: | :-----: | :-----: | :-----: | :-----: |
+|||||IIIT5K|SVT|IC13||CT80|
+|R31-1/16|FPNOCR|1x||90.9|81.8|90.7||80.9|[model](https://download.openmmlab.com/mmocr/textrecog/seg/seg_r31_1by16_fpnocr_academic-0c50e163.pth) \| [log](https://download.openmmlab.com/mmocr/textrecog/seg/20210325_112835.log.json) |

 **Notes:**

--- a/docs/install.md
+++ b/docs/install.md
@ -98,7 +98,7 @@ e. Clone the mmocr repository into it. The mmdetection repo is separate from the

 ```shell
 cd ..
-git clone git@gitlab.sz.sensetime.com:kuangzhh/mmocr.git
+git clone https://github.com/open-mmlab/mmocr.git
 cd mmocr
 ```

@ -142,7 +142,7 @@ export PYTHONPATH=$(pwd):$PYTHONPATH

 # install mmocr
 cd ..
-git clone git@gitlab.sz.sensetime.com:kuangzhh/mmocr.git
+git clone https://github.com/open-mmlab/mmocr.git
 cd mmocr # code/mmocr

 pip install -r requirements.txt
@ -233,7 +233,6 @@ mmocr
 │   ├── test_models
 │   ├── test_tools
 │   └── test_utils
-├── tmp.txt
 └── tools
    ├── data
    ├── dist_test.sh
@ -248,5 +247,3 @@ mmocr
    ├── test.py
    └── train.py
 ```
-
-The icdar2017 official annotations can be converted into the coco format that mmocr supports using `code/mmocr/tools/data_converter/icdar_converter.py`.
--- a/tools/data/textrecog/seg_synthtext_converter.py
+++ b/tools/data/textrecog/seg_synthtext_converter.py
@ -1,66 +1,64 @@
 import argparse
-import codecs
 import json
 import os.path as osp

 import cv2


-def read_json(fpath):
-    with codecs.open(fpath, 'r', 'utf-8') as f:
-        obj = json.load(f)
-    return obj
-
-
-def parse_old_label(img_prefix, in_path):
+def parse_old_label(data_root, in_path, img_size=False):
    imgid2imgname = {}
    imgid2anno = {}
    idx = 0
    with open(in_path, 'r') as fr:
        for line in fr:
            line = line.strip().split()
-            img_full_path = osp.join(img_prefix, line[0])
+            img_full_path = osp.join(data_root, line[0])
            if not osp.exists(img_full_path):
                continue
-            img = cv2.imread(img_full_path)
-            h, w = img.shape[:2]
+            ann_file = osp.join(data_root, line[1])
+            if not osp.exists(ann_file):
+                continue
+
            img_info = {}
            img_info['file_name'] = line[0]
-            img_info['height'] = h
-            img_info['width'] = w
+            if img_size:
+                img = cv2.imread(img_full_path)
+                h, w = img.shape[:2]
+                img_info['height'] = h
+                img_info['width'] = w
            imgid2imgname[idx] = img_info
+
            imgid2anno[idx] = []
-            for i in range(len(line[1:]) // 8):
-                seg = [int(x) for x in line[(1 + i * 8):(1 + (i + 1) * 8)]]
-                points_x = seg[0:2:8]
-                points_y = seg[1:2:9]
-                box = [
-                    min(points_x),
-                    min(points_y),
-                    max(points_x),
-                    max(points_y)
-                ]
-                new_anno = {}
-                new_anno['iscrowd'] = 0
-                new_anno['category_id'] = 1
-                new_anno['bbox'] = box
-                new_anno['segmentation'] = [seg]
-                imgid2anno[idx].append(new_anno)
+            char_annos = []
+            with open(ann_file, 'r') as fr:
+                t = 0
+                for line in fr:
+                    line = line.strip()
+                    if t == 0:
+                        img_info['text'] = line
+                    else:
+                        char_box = [float(x) for x in line.split()]
+                        char_text = img_info['text'][t - 1]
+                        char_ann = dict(char_box=char_box, char_text=char_text)
+                        char_annos.append(char_ann)
+                    t += 1
+            imgid2anno[idx] = char_annos
            idx += 1

    return imgid2imgname, imgid2anno


-def gen_line_dict_file(out_path, imgid2imgname, imgid2anno):
-    # import pdb; pdb.set_trace()
-    with codecs.open(out_path, 'w', 'utf-8') as fw:
+def gen_line_dict_file(out_path, imgid2imgname, imgid2anno, img_size=False):
+    with open(out_path, 'w', encoding='utf-8') as fw:
        for key, value in imgid2imgname.items():
            if key in imgid2anno:
                anno = imgid2anno[key]
                line_dict = {}
                line_dict['file_name'] = value['file_name']
-                line_dict['height'] = value['height']
-                line_dict['width'] = value['width']
+                line_dict['text'] = value['text']
+                if img_size:
+                    line_dict['height'] = value['height']
+                    line_dict['width'] = value['width']
                line_dict['annotations'] = anno
                line_dict_str = json.dumps(line_dict)
                fw.write(line_dict_str + '\n')
@ -69,8 +67,7 @@ def gen_line_dict_file(out_path, imgid2imgname, imgid2anno):
 def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
-        '--img-prefix',
-        help='image prefix, to generate full image path with "image_name"')
+        '--data-root', help='data root for both image file and anno file')
    parser.add_argument(
        '--in-path',
        help='mapping file of image_name and ann_file,'
@ -84,7 +81,7 @@ def parse_args():

 def main():
    args = parse_args()
-    imgid2imgname, imgid2anno = parse_old_label(args.img_prefix, args.in_path)
+    imgid2imgname, imgid2anno = parse_old_label(args.data_root, args.in_path)
    gen_line_dict_file(args.out_path, imgid2imgname, imgid2anno)
    print('finish')