add encoding for open (#10769)
parent
1b412a9944
commit
c1b943fb25
ppocr
|
@ -118,7 +118,7 @@ class BaseRecLabelEncode(object):
|
||||||
self.lower = True
|
self.lower = True
|
||||||
else:
|
else:
|
||||||
self.character_str = []
|
self.character_str = []
|
||||||
with open(character_dict_path, "rb") as fin:
|
with open(character_dict_path, "rb", encoding="utf-8") as fin:
|
||||||
lines = fin.readlines()
|
lines = fin.readlines()
|
||||||
for line in lines:
|
for line in lines:
|
||||||
line = line.decode('utf-8').strip("\n").strip("\r\n")
|
line = line.decode('utf-8').strip("\n").strip("\r\n")
|
||||||
|
@ -278,7 +278,7 @@ class KieLabelEncode(object):
|
||||||
char = line.strip()
|
char = line.strip()
|
||||||
self.dict[char] = idx
|
self.dict[char] = idx
|
||||||
idx += 1
|
idx += 1
|
||||||
with open(class_path, "r") as fin:
|
with open(class_path, "r", encoding="utf-8") as fin:
|
||||||
lines = fin.readlines()
|
lines = fin.readlines()
|
||||||
for idx, line in enumerate(lines):
|
for idx, line in enumerate(lines):
|
||||||
line = line.strip("\n")
|
line = line.strip("\n")
|
||||||
|
@ -640,7 +640,7 @@ class TableLabelEncode(AttnLabelEncode):
|
||||||
self.replace_empty_cell_token = replace_empty_cell_token
|
self.replace_empty_cell_token = replace_empty_cell_token
|
||||||
|
|
||||||
dict_character = []
|
dict_character = []
|
||||||
with open(character_dict_path, "rb") as fin:
|
with open(character_dict_path, "rb", encoding="utf-8") as fin:
|
||||||
lines = fin.readlines()
|
lines = fin.readlines()
|
||||||
for line in lines:
|
for line in lines:
|
||||||
line = line.decode('utf-8').strip("\n").strip("\r\n")
|
line = line.decode('utf-8').strip("\n").strip("\r\n")
|
||||||
|
@ -1380,7 +1380,7 @@ class SRLabelEncode(BaseRecLabelEncode):
|
||||||
super(SRLabelEncode, self).__init__(max_text_length,
|
super(SRLabelEncode, self).__init__(max_text_length,
|
||||||
character_dict_path, use_space_char)
|
character_dict_path, use_space_char)
|
||||||
self.dic = {}
|
self.dic = {}
|
||||||
with open(character_dict_path, 'r') as fin:
|
with open(character_dict_path, 'r', encoding="utf-8") as fin:
|
||||||
for line in fin.readlines():
|
for line in fin.readlines():
|
||||||
line = line.strip()
|
line = line.strip()
|
||||||
character, sequence = line.split()
|
character, sequence = line.split()
|
||||||
|
|
|
@ -59,7 +59,7 @@ class PubTabDataSet(Dataset):
|
||||||
file_list = [file_list]
|
file_list = [file_list]
|
||||||
data_lines = []
|
data_lines = []
|
||||||
for idx, file in enumerate(file_list):
|
for idx, file in enumerate(file_list):
|
||||||
with open(file, "rb") as f:
|
with open(file, "rb", encoding="utf-8") as f:
|
||||||
lines = f.readlines()
|
lines = f.readlines()
|
||||||
if self.mode == "train" or ratio_list[idx] < 1.0:
|
if self.mode == "train" or ratio_list[idx] < 1.0:
|
||||||
random.seed(self.seed)
|
random.seed(self.seed)
|
||||||
|
@ -112,7 +112,7 @@ class PubTabDataSet(Dataset):
|
||||||
'file_name': file_name
|
'file_name': file_name
|
||||||
}
|
}
|
||||||
|
|
||||||
with open(data['img_path'], 'rb') as f:
|
with open(data['img_path'], 'rb', encoding="utf-8") as f:
|
||||||
img = f.read()
|
img = f.read()
|
||||||
data['image'] = img
|
data['image'] = img
|
||||||
outs = transform(data, self.ops)
|
outs = transform(data, self.ops)
|
||||||
|
|
|
@ -74,7 +74,7 @@ class SimpleDataSet(Dataset):
|
||||||
file_list = [file_list]
|
file_list = [file_list]
|
||||||
data_lines = []
|
data_lines = []
|
||||||
for idx, file in enumerate(file_list):
|
for idx, file in enumerate(file_list):
|
||||||
with open(file, "rb") as f:
|
with open(file, "rb", encoding="utf-8") as f:
|
||||||
lines = f.readlines()
|
lines = f.readlines()
|
||||||
if self.mode == "train" or ratio_list[idx] < 1.0:
|
if self.mode == "train" or ratio_list[idx] < 1.0:
|
||||||
random.seed(self.seed)
|
random.seed(self.seed)
|
||||||
|
@ -120,7 +120,7 @@ class SimpleDataSet(Dataset):
|
||||||
data = {'img_path': img_path, 'label': label}
|
data = {'img_path': img_path, 'label': label}
|
||||||
if not os.path.exists(img_path):
|
if not os.path.exists(img_path):
|
||||||
continue
|
continue
|
||||||
with open(data['img_path'], 'rb') as f:
|
with open(data['img_path'], 'rb', encoding="utf-8") as f:
|
||||||
img = f.read()
|
img = f.read()
|
||||||
data['image'] = img
|
data['image'] = img
|
||||||
data = transform(data, load_data_ops)
|
data = transform(data, load_data_ops)
|
||||||
|
@ -146,7 +146,7 @@ class SimpleDataSet(Dataset):
|
||||||
data = {'img_path': img_path, 'label': label}
|
data = {'img_path': img_path, 'label': label}
|
||||||
if not os.path.exists(img_path):
|
if not os.path.exists(img_path):
|
||||||
raise Exception("{} does not exist!".format(img_path))
|
raise Exception("{} does not exist!".format(img_path))
|
||||||
with open(data['img_path'], 'rb') as f:
|
with open(data['img_path'], 'rb', encoding="utf-8") as f:
|
||||||
img = f.read()
|
img = f.read()
|
||||||
data['image'] = img
|
data['image'] = img
|
||||||
data['ext_data'] = self.get_ext_data()
|
data['ext_data'] = self.get_ext_data()
|
||||||
|
@ -240,7 +240,7 @@ class MultiScaleDataSet(SimpleDataSet):
|
||||||
data = {'img_path': img_path, 'label': label}
|
data = {'img_path': img_path, 'label': label}
|
||||||
if not os.path.exists(img_path):
|
if not os.path.exists(img_path):
|
||||||
raise Exception("{} does not exist!".format(img_path))
|
raise Exception("{} does not exist!".format(img_path))
|
||||||
with open(data['img_path'], 'rb') as f:
|
with open(data['img_path'], 'rb', encoding="utf-8") as f:
|
||||||
img = f.read()
|
img = f.read()
|
||||||
data['image'] = img
|
data['image'] = img
|
||||||
data['ext_data'] = self.get_ext_data()
|
data['ext_data'] = self.get_ext_data()
|
||||||
|
|
|
@ -31,7 +31,7 @@ class BaseRecLabelDecode(object):
|
||||||
self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
|
self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
|
||||||
dict_character = list(self.character_str)
|
dict_character = list(self.character_str)
|
||||||
else:
|
else:
|
||||||
with open(character_dict_path, "rb") as fin:
|
with open(character_dict_path, "rb", encoding="utf-8") as fin:
|
||||||
lines = fin.readlines()
|
lines = fin.readlines()
|
||||||
for line in lines:
|
for line in lines:
|
||||||
line = line.decode('utf-8').strip("\n").strip("\r\n")
|
line = line.decode('utf-8').strip("\n").strip("\r\n")
|
||||||
|
|
|
@ -26,7 +26,7 @@ class TableLabelDecode(AttnLabelDecode):
|
||||||
merge_no_span_structure=False,
|
merge_no_span_structure=False,
|
||||||
**kwargs):
|
**kwargs):
|
||||||
dict_character = []
|
dict_character = []
|
||||||
with open(character_dict_path, "rb") as fin:
|
with open(character_dict_path, "rb", encoding="utf-8") as fin:
|
||||||
lines = fin.readlines()
|
lines = fin.readlines()
|
||||||
for line in lines:
|
for line in lines:
|
||||||
line = line.decode('utf-8').strip("\n").strip("\r\n")
|
line = line.decode('utf-8').strip("\n").strip("\r\n")
|
||||||
|
|
Loading…
Reference in New Issue