add encoding for open (#10769)

2023-08-29 13:51:46 +08:00 · 2023-08-29 13:51:46 +08:00 · c1b943fb25
parent 1b412a9944
commit c1b943fb25
5 changed files with 12 additions and 12 deletions
--- a/ppocr/data/imaug/label_ops.py
+++ b/ppocr/data/imaug/label_ops.py
@ -118,7 +118,7 @@ class BaseRecLabelEncode(object):
            self.lower = True
        else:
            self.character_str = []
-            with open(character_dict_path, "rb") as fin:
+            with open(character_dict_path, "rb", encoding="utf-8") as fin:
                lines = fin.readlines()
                for line in lines:
                    line = line.decode('utf-8').strip("\n").strip("\r\n")
@ -278,7 +278,7 @@ class KieLabelEncode(object):
                char = line.strip()
                self.dict[char] = idx
                idx += 1
-        with open(class_path, "r") as fin:
+        with open(class_path, "r", encoding="utf-8") as fin:
            lines = fin.readlines()
            for idx, line in enumerate(lines):
                line = line.strip("\n")
@ -640,7 +640,7 @@ class TableLabelEncode(AttnLabelEncode):
        self.replace_empty_cell_token = replace_empty_cell_token
        dict_character = []
-        with open(character_dict_path, "rb") as fin:
+        with open(character_dict_path, "rb", encoding="utf-8") as fin:
            lines = fin.readlines()
            for line in lines:
                line = line.decode('utf-8').strip("\n").strip("\r\n")
@ -1380,7 +1380,7 @@ class SRLabelEncode(BaseRecLabelEncode):
        super(SRLabelEncode, self).__init__(max_text_length,
                                            character_dict_path, use_space_char)
        self.dic = {}
-        with open(character_dict_path, 'r') as fin:
+        with open(character_dict_path, 'r', encoding="utf-8") as fin:
            for line in fin.readlines():
                line = line.strip()
                character, sequence = line.split()
--- a/ppocr/data/pubtab_dataset.py
+++ b/ppocr/data/pubtab_dataset.py
@ -59,7 +59,7 @@ class PubTabDataSet(Dataset):
            file_list = [file_list]
        data_lines = []
        for idx, file in enumerate(file_list):
-            with open(file, "rb") as f:
+            with open(file, "rb", encoding="utf-8") as f:
                lines = f.readlines()
                if self.mode == "train" or ratio_list[idx] < 1.0:
                    random.seed(self.seed)
@ -112,7 +112,7 @@ class PubTabDataSet(Dataset):
                'file_name': file_name
            }
-            with open(data['img_path'], 'rb') as f:
+            with open(data['img_path'], 'rb', encoding="utf-8") as f:
                img = f.read()
                data['image'] = img
            outs = transform(data, self.ops)
--- a/ppocr/data/simple_dataset.py
+++ b/ppocr/data/simple_dataset.py
@ -74,7 +74,7 @@ class SimpleDataSet(Dataset):
            file_list = [file_list]
        data_lines = []
        for idx, file in enumerate(file_list):
-            with open(file, "rb") as f:
+            with open(file, "rb", encoding="utf-8") as f:
                lines = f.readlines()
                if self.mode == "train" or ratio_list[idx] < 1.0:
                    random.seed(self.seed)
@ -120,7 +120,7 @@ class SimpleDataSet(Dataset):
            data = {'img_path': img_path, 'label': label}
            if not os.path.exists(img_path):
                continue
-            with open(data['img_path'], 'rb') as f:
+            with open(data['img_path'], 'rb', encoding="utf-8") as f:
                img = f.read()
                data['image'] = img
            data = transform(data, load_data_ops)
@ -146,7 +146,7 @@ class SimpleDataSet(Dataset):
            data = {'img_path': img_path, 'label': label}
            if not os.path.exists(img_path):
                raise Exception("{} does not exist!".format(img_path))
-            with open(data['img_path'], 'rb') as f:
+            with open(data['img_path'], 'rb', encoding="utf-8") as f:
                img = f.read()
                data['image'] = img
            data['ext_data'] = self.get_ext_data()
@ -240,7 +240,7 @@ class MultiScaleDataSet(SimpleDataSet):
            data = {'img_path': img_path, 'label': label}
            if not os.path.exists(img_path):
                raise Exception("{} does not exist!".format(img_path))
-            with open(data['img_path'], 'rb') as f:
+            with open(data['img_path'], 'rb', encoding="utf-8") as f:
                img = f.read()
                data['image'] = img
            data['ext_data'] = self.get_ext_data()
--- a/ppocr/postprocess/rec_postprocess.py
+++ b/ppocr/postprocess/rec_postprocess.py
@ -31,7 +31,7 @@ class BaseRecLabelDecode(object):
            self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
            dict_character = list(self.character_str)
        else:
-            with open(character_dict_path, "rb") as fin:
+            with open(character_dict_path, "rb", encoding="utf-8") as fin:
                lines = fin.readlines()
                for line in lines:
                    line = line.decode('utf-8').strip("\n").strip("\r\n")
--- a/ppocr/postprocess/table_postprocess.py
+++ b/ppocr/postprocess/table_postprocess.py
@ -26,7 +26,7 @@ class TableLabelDecode(AttnLabelDecode):
                 merge_no_span_structure=False,
                 **kwargs):
        dict_character = []
-        with open(character_dict_path, "rb") as fin:
+        with open(character_dict_path, "rb", encoding="utf-8") as fin:
            lines = fin.readlines()
            for line in lines:
                line = line.decode('utf-8').strip("\n").strip("\r\n")