Merge pull request #4969 from WenmuZhou/fix_vqa

add encoding='utf-8'
2021-12-19 15:54:52 +08:00 · 2021-12-19 15:54:52 +08:00 · 1b3cf0da43
parent 68deaab13f dc51469b42
commit 1b3cf0da43
9 changed files with 27 additions and 13 deletions
--- a/ppstructure/vqa/eval_ser.py
+++ b/ppstructure/vqa/eval_ser.py
@ -128,12 +128,16 @@ def evaluate(args,
        "f1": f1_score(out_label_list, preds_list),
    }

-    with open(os.path.join(args.output_dir, "test_gt.txt"), "w") as fout:
+    with open(
+            os.path.join(args.output_dir, "test_gt.txt"), "w",
+            encoding='utf-8') as fout:
        for lbl in out_label_list:
            for l in lbl:
                fout.write(l + "\t")
            fout.write("\n")
-    with open(os.path.join(args.output_dir, "test_pred.txt"), "w") as fout:
+    with open(
+            os.path.join(args.output_dir, "test_pred.txt"), "w",
+            encoding='utf-8') as fout:
        for lbl in preds_list:
            for l in lbl:
                fout.write(l + "\t")
--- a/ppstructure/vqa/helper/eval_with_label_end2end.py
+++ b/ppstructure/vqa/helper/eval_with_label_end2end.py
@ -37,7 +37,7 @@ def parse_ser_results_fp(fp, fp_type="gt", ignore_background=True):
    assert fp_type in ["gt", "pred"]
    key = "label" if fp_type == "gt" else "pred"
    res_dict = dict()
-    with open(fp, "r") as fin:
+    with open(fp, "r", encoding='utf-8') as fin:
        lines = fin.readlines()

    for _, line in enumerate(lines):
--- a/ppstructure/vqa/helper/trans_xfun_data.py
+++ b/ppstructure/vqa/helper/trans_xfun_data.py
@ -16,13 +16,13 @@ import json


 def transfer_xfun_data(json_path=None, output_file=None):
-    with open(json_path, "r") as fin:
+    with open(json_path, "r", encoding='utf-8') as fin:
        lines = fin.readlines()

    json_info = json.loads(lines[0])
    documents = json_info["documents"]
    label_info = {}
-    with open(output_file, "w") as fout:
+    with open(output_file, "w", encoding='utf-8') as fout:
        for idx, document in enumerate(documents):
            img_info = document["img"]
            document = document["document"]
--- a/ppstructure/vqa/infer_re.py
+++ b/ppstructure/vqa/infer_re.py
@ -92,7 +92,7 @@ def infer(args):
 def load_ocr(img_folder, json_path):
    import json
    d = []
-    with open(json_path, "r") as fin:
+    with open(json_path, "r", encoding='utf-8') as fin:
        lines = fin.readlines()
        for line in lines:
            image_name, info_str = line.split("\t")
--- a/ppstructure/vqa/infer_ser.py
+++ b/ppstructure/vqa/infer_ser.py
@ -59,7 +59,8 @@ def pad_sentences(tokenizer,
            encoded_inputs["bbox"] = encoded_inputs["bbox"] + [[0, 0, 0, 0]
                                                               ] * difference
        else:
-            assert False, f"padding_side of tokenizer just supports [\"right\"] but got {tokenizer.padding_side}"
+            assert False, "padding_side of tokenizer just supports [\"right\"] but got {}".format(
+                tokenizer.padding_side)
    else:
        if return_attention_mask:
            encoded_inputs["attention_mask"] = [1] * len(encoded_inputs[
@ -224,7 +225,7 @@ def infer(args):

    # load ocr results json
    ocr_results = dict()
-    with open(args.ocr_json_path, "r") as fin:
+    with open(args.ocr_json_path, "r", encoding='utf-8') as fin:
        lines = fin.readlines()
        for line in lines:
            img_name, json_info = line.split("\t")
@ -234,7 +235,10 @@ def infer(args):
    infer_imgs = get_image_file_list(args.infer_imgs)

    # loop for infer
-    with open(os.path.join(args.output_dir, "infer_results.txt"), "w") as fout:
+    with open(
+            os.path.join(args.output_dir, "infer_results.txt"),
+            "w",
+            encoding='utf-8') as fout:
        for idx, img_path in enumerate(infer_imgs):
            print("process: [{}/{}]".format(idx, len(infer_imgs), img_path))

--- a/ppstructure/vqa/infer_ser_e2e.py
+++ b/ppstructure/vqa/infer_ser_e2e.py
@ -113,7 +113,10 @@ if __name__ == "__main__":

    # loop for infer
    ser_engine = SerPredictor(args)
-    with open(os.path.join(args.output_dir, "infer_results.txt"), "w") as fout:
+    with open(
+            os.path.join(args.output_dir, "infer_results.txt"),
+            "w",
+            encoding='utf-8') as fout:
        for idx, img_path in enumerate(infer_imgs):
            print("process: [{}/{}], {}".format(idx, len(infer_imgs), img_path))

--- a/ppstructure/vqa/infer_ser_re_e2e.py
+++ b/ppstructure/vqa/infer_ser_re_e2e.py
@ -112,7 +112,10 @@ if __name__ == "__main__":

    # loop for infer
    ser_re_engine = SerReSystem(args)
-    with open(os.path.join(args.output_dir, "infer_results.txt"), "w") as fout:
+    with open(
+            os.path.join(args.output_dir, "infer_results.txt"),
+            "w",
+            encoding='utf-8') as fout:
        for idx, img_path in enumerate(infer_imgs):
            print("process: [{}/{}], {}".format(idx, len(infer_imgs), img_path))

--- a/ppstructure/vqa/utils.py
+++ b/ppstructure/vqa/utils.py
@ -32,7 +32,7 @@ def set_seed(seed):


 def get_bio_label_maps(label_map_path):
-    with open(label_map_path, "r") as fin:
+    with open(label_map_path, "r", encoding='utf-8') as fin:
        lines = fin.readlines()
    lines = [line.strip() for line in lines]
    if "O" not in lines:
--- a/ppstructure/vqa/xfun.py
+++ b/ppstructure/vqa/xfun.py
@ -162,7 +162,7 @@ class XFUNDataset(Dataset):
        return encoded_inputs

    def read_all_lines(self, ):
-        with open(self.label_path, "r") as fin:
+        with open(self.label_path, "r", encoding='utf-8') as fin:
            lines = fin.readlines()
        return lines