mirror of
https://github.com/PaddlePaddle/PaddleOCR.git
synced 2025-06-03 21:53:39 +08:00
Merge branch 'develop' of https://github.com/PaddlePaddle/PaddleOCR into fixocr
This commit is contained in:
commit
afecc4973a
5
ppocr/data/det/dataset_traversal.py
Executable file → Normal file
5
ppocr/data/det/dataset_traversal.py
Executable file → Normal file
@ -13,6 +13,7 @@
|
|||||||
#limitations under the License.
|
#limitations under the License.
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
import math
|
import math
|
||||||
import random
|
import random
|
||||||
import functools
|
import functools
|
||||||
@ -42,6 +43,10 @@ class TrainReader(object):
|
|||||||
img_num = len(label_infor_list)
|
img_num = len(label_infor_list)
|
||||||
img_id_list = list(range(img_num))
|
img_id_list = list(range(img_num))
|
||||||
random.shuffle(img_id_list)
|
random.shuffle(img_id_list)
|
||||||
|
if sys.platform == "win32":
|
||||||
|
print("multiprocess is not fully compatible with Windows."
|
||||||
|
"num_workers will be 1.")
|
||||||
|
self.num_workers = 1
|
||||||
for img_id in range(process_id, img_num, self.num_workers):
|
for img_id in range(process_id, img_num, self.num_workers):
|
||||||
label_infor = label_infor_list[img_id_list[img_id]]
|
label_infor = label_infor_list[img_id_list[img_id]]
|
||||||
outs = self.process(label_infor)
|
outs = self.process(label_infor)
|
||||||
|
@ -66,6 +66,8 @@ def reader_main(config=None, mode=None):
|
|||||||
reader_function = params['reader_function']
|
reader_function = params['reader_function']
|
||||||
function = create_module(reader_function)(params)
|
function = create_module(reader_function)(params)
|
||||||
if mode == "train":
|
if mode == "train":
|
||||||
|
if sys.platform == "win32":
|
||||||
|
return function(0)
|
||||||
readers = []
|
readers = []
|
||||||
num_workers = params['num_workers']
|
num_workers = params['num_workers']
|
||||||
for process_id in range(num_workers):
|
for process_id in range(num_workers):
|
||||||
|
@ -13,6 +13,7 @@
|
|||||||
#limitations under the License.
|
#limitations under the License.
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
import math
|
import math
|
||||||
import random
|
import random
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@ -191,16 +192,21 @@ class SimpleReader(object):
|
|||||||
img_num = len(label_infor_list)
|
img_num = len(label_infor_list)
|
||||||
img_id_list = list(range(img_num))
|
img_id_list = list(range(img_num))
|
||||||
random.shuffle(img_id_list)
|
random.shuffle(img_id_list)
|
||||||
|
if sys.platform=="win32":
|
||||||
|
print("multiprocess is not fully compatible with Windows."
|
||||||
|
"num_workers will be 1.")
|
||||||
|
self.num_workers = 1
|
||||||
for img_id in range(process_id, img_num, self.num_workers):
|
for img_id in range(process_id, img_num, self.num_workers):
|
||||||
label_infor = label_infor_list[img_id_list[img_id]]
|
label_infor = label_infor_list[img_id_list[img_id]]
|
||||||
substr = label_infor.decode('utf-8').strip("\n").split("\t")
|
substr = label_infor.decode('utf-8').strip("\n").split("\t")
|
||||||
img_path = self.img_set_dir + "/" + substr[0]
|
img_path = self.img_set_dir + "/" + substr[0]
|
||||||
img = cv2.imread(img_path)
|
img = cv2.imread(img_path)
|
||||||
if img.shape[-1]==1 or len(list(img.shape))==2:
|
|
||||||
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
|
|
||||||
if img is None:
|
if img is None:
|
||||||
logger.info("{} does not exist!".format(img_path))
|
logger.info("{} does not exist!".format(img_path))
|
||||||
continue
|
continue
|
||||||
|
if img.shape[-1]==1 or len(list(img.shape))==2:
|
||||||
|
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
|
||||||
|
|
||||||
label = substr[1]
|
label = substr[1]
|
||||||
outs = process_image(img, self.image_shape, label,
|
outs = process_image(img, self.image_shape, label,
|
||||||
self.char_ops, self.loss_type,
|
self.char_ops, self.loss_type,
|
||||||
|
@ -34,7 +34,7 @@ class CharacterOps(object):
|
|||||||
with open(character_dict_path, "rb") as fin:
|
with open(character_dict_path, "rb") as fin:
|
||||||
lines = fin.readlines()
|
lines = fin.readlines()
|
||||||
for line in lines:
|
for line in lines:
|
||||||
line = line.decode('utf-8').strip("\n")
|
line = line.decode('utf-8').strip("\n").strip("\r\n")
|
||||||
self.character_str += line
|
self.character_str += line
|
||||||
dict_character = list(self.character_str)
|
dict_character = list(self.character_str)
|
||||||
elif self.character_type == "en_sensitive":
|
elif self.character_type == "en_sensitive":
|
||||||
|
@ -48,7 +48,7 @@ def eval_rec_run(exe, config, eval_info_dict, mode):
|
|||||||
total_sample_num = 0
|
total_sample_num = 0
|
||||||
total_acc_num = 0
|
total_acc_num = 0
|
||||||
total_batch_num = 0
|
total_batch_num = 0
|
||||||
if mode == "eval":
|
if mode == "test":
|
||||||
is_remove_duplicate = False
|
is_remove_duplicate = False
|
||||||
else:
|
else:
|
||||||
is_remove_duplicate = True
|
is_remove_duplicate = True
|
||||||
@ -91,11 +91,11 @@ def test_rec_benchmark(exe, config, eval_info_dict):
|
|||||||
total_correct_number = 0
|
total_correct_number = 0
|
||||||
eval_data_acc_info = {}
|
eval_data_acc_info = {}
|
||||||
for eval_data in eval_data_list:
|
for eval_data in eval_data_list:
|
||||||
config['TestReader']['lmdb_sets_dir'] = \
|
config['EvalReader']['lmdb_sets_dir'] = \
|
||||||
eval_data_dir + "/" + eval_data
|
eval_data_dir + "/" + eval_data
|
||||||
eval_reader = reader_main(config=config, mode="test")
|
eval_reader = reader_main(config=config, mode="eval")
|
||||||
eval_info_dict['reader'] = eval_reader
|
eval_info_dict['reader'] = eval_reader
|
||||||
metrics = eval_rec_run(exe, config, eval_info_dict, "test")
|
metrics = eval_rec_run(exe, config, eval_info_dict, "eval")
|
||||||
total_evaluation_data_number += metrics['total_sample_num']
|
total_evaluation_data_number += metrics['total_sample_num']
|
||||||
total_correct_number += metrics['total_acc_num']
|
total_correct_number += metrics['total_acc_num']
|
||||||
eval_data_acc_info[eval_data] = metrics
|
eval_data_acc_info[eval_data] = metrics
|
||||||
|
Loading…
x
Reference in New Issue
Block a user