实现功能：当--savefile为true时，在--output下以当前图片名称后接“.txt”为文件名保存ocr推理结果，解决了issues： (#10628)

* 实现功能：当--savefile为true时，在--output下以当前图片名称后接“.txt”为文件名保存ocr推理结果，解决了issues： https://github.com/PaddlePaddle/PaddleOCR/issues/10533 * 删除了乱码字符 * 1.删除了乱码字符 * 1.删除了乱码字符
2023-08-21 10:25:47 +08:00 · 2023-08-21 10:25:47 +08:00 · 2bd552c801
parent 21da7f4c00
commit 2bd552c801
1 changed files with 17 additions and 1 deletions
--- a/paddleocr.py
+++ b/paddleocr.py
@ -408,6 +408,7 @@ def parse_args(mMain=True):
    parser.add_argument("--det", type=str2bool, default=True)
    parser.add_argument("--rec", type=str2bool, default=True)
    parser.add_argument("--type", type=str, default='ocr')
+    parser.add_argument("--savefile", type=str2bool, default=False)
    parser.add_argument(
        "--ocr_version",
        type=str,
@ -619,7 +620,7 @@ class PaddleOCR(predict_system.TextSystem):
    def ocr(self, img, det=True, rec=True, cls=True):
        """
        ocr with paddleocr
-        args：
+        args:
            img: img for ocr, support ndarray, img_path and list or ndarray
            det: use text detection or not. If false, only rec will be exec. Default is True
            rec: use text recognition or not. If false, only det will be exec. Default is True
@ -768,10 +769,25 @@ def main():
                                rec=args.rec,
                                cls=args.use_angle_cls)
            if result is not None:
+                lines = []
                for idx in range(len(result)):
                    res = result[idx]
                    for line in res:
                        logger.info(line)
+                        val = '['
+                        for box in line[0]:
+                            val += str(box[0]) + ',' + str(box[1]) + ','
+
+                        val = val[:-1]
+                        val += '],' + line[1][0] + ',' + str(line[1][1]) + '\n'
+                        lines.append(val)
+                if args.savefile:
+                    if os.path.exists(args.output) is False:
+                        os.mkdir(args.output)
+                    outfile = args.output + '/' + img_name + '.txt'
+                    with open(outfile,'w',encoding='utf-8') as f:
+                        f.writelines(lines)
+                     
        elif args.type == 'structure':
            img, flag_gif, flag_pdf = check_and_read(img_path)
            if not flag_gif and not flag_pdf: