Merge branch 'develop' of https://github.com/PaddlePaddle/PaddleOCR into fixocr

2025-06-03 21:53:39 +08:00 · 2020-05-25 20:28:36 +08:00 · 2020-05-25 20:28:36 +08:00 · afecc4973a
commit afecc4973a
parent 75d2c47d8a c63624b3fd
5 changed files with 20 additions and 7 deletions
--- a/ppocr/data/det/dataset_traversal.py
+++ b/ppocr/data/det/dataset_traversal.py
@ -13,6 +13,7 @@
 #limitations under the License.
 import os
 import sys
 import math
 import random
 import functools
@ -42,6 +43,10 @@ class TrainReader(object):
            img_num = len(label_infor_list)
            img_id_list = list(range(img_num))
            random.shuffle(img_id_list)
            if sys.platform == "win32":
                print("multiprocess is not fully compatible with Windows."
                      "num_workers will be 1.")
                self.num_workers = 1
            for img_id in range(process_id, img_num, self.num_workers):
                label_infor = label_infor_list[img_id_list[img_id]]
                outs = self.process(label_infor)
--- a/ppocr/data/reader_main.py
+++ b/ppocr/data/reader_main.py
@ -66,6 +66,8 @@ def reader_main(config=None, mode=None):
    reader_function = params['reader_function']
    function = create_module(reader_function)(params)
    if mode == "train":
        if sys.platform == "win32":
            return function(0)
        readers = []
        num_workers = params['num_workers']
        for process_id in range(num_workers):
--- a/ppocr/data/rec/dataset_traversal.py
+++ b/ppocr/data/rec/dataset_traversal.py
@ -13,6 +13,7 @@
 #limitations under the License.
 import os
 import sys
 import math
 import random
 import numpy as np
@ -191,16 +192,21 @@ class SimpleReader(object):
                img_num = len(label_infor_list)
                img_id_list = list(range(img_num))
                random.shuffle(img_id_list)
                if sys.platform=="win32":
                    print("multiprocess is not fully compatible with Windows."
                          "num_workers will be 1.")
                    self.num_workers = 1
                for img_id in range(process_id, img_num, self.num_workers):
                    label_infor = label_infor_list[img_id_list[img_id]]
                    substr = label_infor.decode('utf-8').strip("\n").split("\t")
                    img_path = self.img_set_dir + "/" + substr[0]
                    img = cv2.imread(img_path)
                    if img.shape[-1]==1 or len(list(img.shape))==2:
                        img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
                    if img is None:
                        logger.info("{} does not exist!".format(img_path))
                        continue
                    if img.shape[-1]==1 or len(list(img.shape))==2:
                        img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
                    label = substr[1]
                    outs = process_image(img, self.image_shape, label,
                                         self.char_ops, self.loss_type,
--- a/ppocr/utils/character.py
+++ b/ppocr/utils/character.py
@ -34,7 +34,7 @@ class CharacterOps(object):
            with open(character_dict_path, "rb") as fin:
                lines = fin.readlines()
                for line in lines:
-                    line = line.decode('utf-8').strip("\n")
+                    line = line.decode('utf-8').strip("\n").strip("\r\n")
                    self.character_str += line
            dict_character = list(self.character_str)
        elif self.character_type == "en_sensitive":
--- a/tools/eval_utils/eval_rec_utils.py
+++ b/tools/eval_utils/eval_rec_utils.py
@ -48,7 +48,7 @@ def eval_rec_run(exe, config, eval_info_dict, mode):
    total_sample_num = 0
    total_acc_num = 0
    total_batch_num = 0
-    if mode == "eval":
+    if mode == "test":
        is_remove_duplicate = False
    else:
        is_remove_duplicate = True
@ -91,11 +91,11 @@ def test_rec_benchmark(exe, config, eval_info_dict):
    total_correct_number = 0
    eval_data_acc_info = {}
    for eval_data in eval_data_list:
-        config['TestReader']['lmdb_sets_dir'] = \
+        config['EvalReader']['lmdb_sets_dir'] = \
            eval_data_dir + "/" + eval_data
-        eval_reader = reader_main(config=config, mode="test")
+        eval_reader = reader_main(config=config, mode="eval")
        eval_info_dict['reader'] = eval_reader
-        metrics = eval_rec_run(exe, config, eval_info_dict, "test")
+        metrics = eval_rec_run(exe, config, eval_info_dict, "eval")
        total_evaluation_data_number += metrics['total_sample_num']
        total_correct_number += metrics['total_acc_num']
        eval_data_acc_info[eval_data] = metrics