From 941869e6e880a8e70923797c9932b1217aad1b64 Mon Sep 17 00:00:00 2001 From: xiaoting <31891223+tink2123@users.noreply.github.com> Date: Thu, 31 Aug 2023 15:43:17 +0800 Subject: [PATCH] Revert "add encoding for open (#10769)" (#10789) This reverts commit c1b943fb25ff47d0f868e1696a796e68de2e880c. --- ppocr/data/imaug/label_ops.py | 8 ++++---- ppocr/data/pubtab_dataset.py | 4 ++-- ppocr/data/simple_dataset.py | 8 ++++---- ppocr/postprocess/rec_postprocess.py | 2 +- ppocr/postprocess/table_postprocess.py | 2 +- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/ppocr/data/imaug/label_ops.py b/ppocr/data/imaug/label_ops.py index 48f12b96a..148b09368 100644 --- a/ppocr/data/imaug/label_ops.py +++ b/ppocr/data/imaug/label_ops.py @@ -118,7 +118,7 @@ class BaseRecLabelEncode(object): self.lower = True else: self.character_str = [] - with open(character_dict_path, "rb", encoding="utf-8") as fin: + with open(character_dict_path, "rb") as fin: lines = fin.readlines() for line in lines: line = line.decode('utf-8').strip("\n").strip("\r\n") @@ -278,7 +278,7 @@ class KieLabelEncode(object): char = line.strip() self.dict[char] = idx idx += 1 - with open(class_path, "r", encoding="utf-8") as fin: + with open(class_path, "r") as fin: lines = fin.readlines() for idx, line in enumerate(lines): line = line.strip("\n") @@ -640,7 +640,7 @@ class TableLabelEncode(AttnLabelEncode): self.replace_empty_cell_token = replace_empty_cell_token dict_character = [] - with open(character_dict_path, "rb", encoding="utf-8") as fin: + with open(character_dict_path, "rb") as fin: lines = fin.readlines() for line in lines: line = line.decode('utf-8').strip("\n").strip("\r\n") @@ -1380,7 +1380,7 @@ class SRLabelEncode(BaseRecLabelEncode): super(SRLabelEncode, self).__init__(max_text_length, character_dict_path, use_space_char) self.dic = {} - with open(character_dict_path, 'r', encoding="utf-8") as fin: + with open(character_dict_path, 'r') as fin: for line in fin.readlines(): line = line.strip() character, sequence = line.split() diff --git a/ppocr/data/pubtab_dataset.py b/ppocr/data/pubtab_dataset.py index c84a7af90..642d3eb19 100644 --- a/ppocr/data/pubtab_dataset.py +++ b/ppocr/data/pubtab_dataset.py @@ -59,7 +59,7 @@ class PubTabDataSet(Dataset): file_list = [file_list] data_lines = [] for idx, file in enumerate(file_list): - with open(file, "rb", encoding="utf-8") as f: + with open(file, "rb") as f: lines = f.readlines() if self.mode == "train" or ratio_list[idx] < 1.0: random.seed(self.seed) @@ -112,7 +112,7 @@ class PubTabDataSet(Dataset): 'file_name': file_name } - with open(data['img_path'], 'rb', encoding="utf-8") as f: + with open(data['img_path'], 'rb') as f: img = f.read() data['image'] = img outs = transform(data, self.ops) diff --git a/ppocr/data/simple_dataset.py b/ppocr/data/simple_dataset.py index d17f931fa..044eafe10 100644 --- a/ppocr/data/simple_dataset.py +++ b/ppocr/data/simple_dataset.py @@ -74,7 +74,7 @@ class SimpleDataSet(Dataset): file_list = [file_list] data_lines = [] for idx, file in enumerate(file_list): - with open(file, "rb", encoding="utf-8") as f: + with open(file, "rb") as f: lines = f.readlines() if self.mode == "train" or ratio_list[idx] < 1.0: random.seed(self.seed) @@ -120,7 +120,7 @@ class SimpleDataSet(Dataset): data = {'img_path': img_path, 'label': label} if not os.path.exists(img_path): continue - with open(data['img_path'], 'rb', encoding="utf-8") as f: + with open(data['img_path'], 'rb') as f: img = f.read() data['image'] = img data = transform(data, load_data_ops) @@ -146,7 +146,7 @@ class SimpleDataSet(Dataset): data = {'img_path': img_path, 'label': label} if not os.path.exists(img_path): raise Exception("{} does not exist!".format(img_path)) - with open(data['img_path'], 'rb', encoding="utf-8") as f: + with open(data['img_path'], 'rb') as f: img = f.read() data['image'] = img data['ext_data'] = self.get_ext_data() @@ -240,7 +240,7 @@ class MultiScaleDataSet(SimpleDataSet): data = {'img_path': img_path, 'label': label} if not os.path.exists(img_path): raise Exception("{} does not exist!".format(img_path)) - with open(data['img_path'], 'rb', encoding="utf-8") as f: + with open(data['img_path'], 'rb') as f: img = f.read() data['image'] = img data['ext_data'] = self.get_ext_data() diff --git a/ppocr/postprocess/rec_postprocess.py b/ppocr/postprocess/rec_postprocess.py index 3af3536e3..f64ea1ce7 100644 --- a/ppocr/postprocess/rec_postprocess.py +++ b/ppocr/postprocess/rec_postprocess.py @@ -31,7 +31,7 @@ class BaseRecLabelDecode(object): self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz" dict_character = list(self.character_str) else: - with open(character_dict_path, "rb", encoding="utf-8") as fin: + with open(character_dict_path, "rb") as fin: lines = fin.readlines() for line in lines: line = line.decode('utf-8').strip("\n").strip("\r\n") diff --git a/ppocr/postprocess/table_postprocess.py b/ppocr/postprocess/table_postprocess.py index 05e89bb92..a47061f93 100644 --- a/ppocr/postprocess/table_postprocess.py +++ b/ppocr/postprocess/table_postprocess.py @@ -26,7 +26,7 @@ class TableLabelDecode(AttnLabelDecode): merge_no_span_structure=False, **kwargs): dict_character = [] - with open(character_dict_path, "rb", encoding="utf-8") as fin: + with open(character_dict_path, "rb") as fin: lines = fin.readlines() for line in lines: line = line.decode('utf-8').strip("\n").strip("\r\n")