mirror of
https://github.com/PaddlePaddle/PaddleOCR.git
synced 2025-06-03 21:53:39 +08:00
Merge pull request #3289 from WenmuZhou/fx_pse
fix a bug of train log not save and add filepath check to dataset
This commit is contained in:
commit
07026825ea
@ -46,6 +46,7 @@ class SimpleDataSet(Dataset):
|
|||||||
self.seed = seed
|
self.seed = seed
|
||||||
logger.info("Initialize indexs of datasets:%s" % label_file_list)
|
logger.info("Initialize indexs of datasets:%s" % label_file_list)
|
||||||
self.data_lines = self.get_image_info_list(label_file_list, ratio_list)
|
self.data_lines = self.get_image_info_list(label_file_list, ratio_list)
|
||||||
|
self.check_data()
|
||||||
self.data_idx_order_list = list(range(len(self.data_lines)))
|
self.data_idx_order_list = list(range(len(self.data_lines)))
|
||||||
if self.mode == "train" and self.do_shuffle:
|
if self.mode == "train" and self.do_shuffle:
|
||||||
self.shuffle_data_random()
|
self.shuffle_data_random()
|
||||||
@ -102,16 +103,8 @@ class SimpleDataSet(Dataset):
|
|||||||
|
|
||||||
def __getitem__(self, idx):
|
def __getitem__(self, idx):
|
||||||
file_idx = self.data_idx_order_list[idx]
|
file_idx = self.data_idx_order_list[idx]
|
||||||
data_line = self.data_lines[file_idx]
|
data = self.data_lines[file_idx]
|
||||||
try:
|
try:
|
||||||
data_line = data_line.decode('utf-8')
|
|
||||||
substr = data_line.strip("\n").strip("\r").split(self.delimiter)
|
|
||||||
file_name = substr[0]
|
|
||||||
label = substr[1]
|
|
||||||
img_path = os.path.join(self.data_dir, file_name)
|
|
||||||
data = {'img_path': img_path, 'label': label}
|
|
||||||
if not os.path.exists(img_path):
|
|
||||||
raise Exception("{} does not exist!".format(img_path))
|
|
||||||
with open(data['img_path'], 'rb') as f:
|
with open(data['img_path'], 'rb') as f:
|
||||||
img = f.read()
|
img = f.read()
|
||||||
data['image'] = img
|
data['image'] = img
|
||||||
@ -120,8 +113,8 @@ class SimpleDataSet(Dataset):
|
|||||||
except:
|
except:
|
||||||
error_meg = traceback.format_exc()
|
error_meg = traceback.format_exc()
|
||||||
self.logger.error(
|
self.logger.error(
|
||||||
"When parsing line {}, error happened with msg: {}".format(
|
"When parsing file {} and label {}, error happened with msg: {}".format(
|
||||||
data_line, error_meg))
|
data['img_path'],data['label'], error_meg))
|
||||||
outs = None
|
outs = None
|
||||||
if outs is None:
|
if outs is None:
|
||||||
# during evaluation, we should fix the idx to get same results for many times of evaluation.
|
# during evaluation, we should fix the idx to get same results for many times of evaluation.
|
||||||
@ -132,3 +125,17 @@ class SimpleDataSet(Dataset):
|
|||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
return len(self.data_idx_order_list)
|
return len(self.data_idx_order_list)
|
||||||
|
|
||||||
|
def check_data(self):
|
||||||
|
new_data_lines = []
|
||||||
|
for data_line in self.data_lines:
|
||||||
|
data_line = data_line.decode('utf-8')
|
||||||
|
substr = data_line.strip("\n").strip("\r").split(self.delimiter)
|
||||||
|
file_name = substr[0]
|
||||||
|
label = substr[1]
|
||||||
|
img_path = os.path.join(self.data_dir, file_name)
|
||||||
|
if os.path.exists(img_path):
|
||||||
|
new_data_lines.append({'img_path': img_path, 'label': label})
|
||||||
|
else:
|
||||||
|
self.logger.info("{} does not exist!".format(img_path))
|
||||||
|
self.data_lines = new_data_lines
|
@ -24,9 +24,6 @@ from paddle import inference
|
|||||||
import time
|
import time
|
||||||
from ppocr.utils.logging import get_logger
|
from ppocr.utils.logging import get_logger
|
||||||
|
|
||||||
logger = get_logger()
|
|
||||||
|
|
||||||
|
|
||||||
def str2bool(v):
|
def str2bool(v):
|
||||||
return v.lower() in ("true", "t", "1")
|
return v.lower() in ("true", "t", "1")
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user