diff --git a/ppstructure/predict_system.py b/ppstructure/predict_system.py index bb061c998..b32b70629 100644 --- a/ppstructure/predict_system.py +++ b/ppstructure/predict_system.py @@ -229,7 +229,9 @@ def main(args): if args.recovery and args.use_pdf2docx_api and flag_pdf: from pdf2docx.converter import Converter - docx_file = os.path.join(args.output, '{}.docx'.format(img_name)) + os.makedirs(args.output, exist_ok=True) + docx_file = os.path.join(args.output, + '{}_api.docx'.format(img_name)) cv = Converter(image_file) cv.convert(docx_file) cv.close() diff --git a/ppstructure/recovery/recovery_to_doc.py b/ppstructure/recovery/recovery_to_doc.py index 1d8f8d9d4..050181208 100644 --- a/ppstructure/recovery/recovery_to_doc.py +++ b/ppstructure/recovery/recovery_to_doc.py @@ -73,7 +73,7 @@ def convert_info_docx(img, res, save_folder, img_name): text_run.font.size = shared.Pt(10) # save to docx - docx_path = os.path.join(save_folder, '{}.docx'.format(img_name)) + docx_path = os.path.join(save_folder, '{}_ocr.docx'.format(img_name)) doc.save(docx_path) logger.info('docx save to {}'.format(docx_path)) diff --git a/ppstructure/recovery/requirements.txt b/ppstructure/recovery/requirements.txt index ec08f9d0a..761b9d7c3 100644 --- a/ppstructure/recovery/requirements.txt +++ b/ppstructure/recovery/requirements.txt @@ -1,5 +1,4 @@ python-docx -PyMuPDF==1.19.0 beautifulsoup4 fonttools>=4.24.0 fire>=0.3.0 diff --git a/ppstructure/recovery/table_process.py b/ppstructure/recovery/table_process.py index 982e6b760..77a6ef765 100644 --- a/ppstructure/recovery/table_process.py +++ b/ppstructure/recovery/table_process.py @@ -278,8 +278,6 @@ class HtmlToDocx(HTMLParser): cell_col += colspan cell_row += 1 - doc.save('1.docx') - def handle_data(self, data): if self.skip: return diff --git a/requirements.txt b/requirements.txt index 8c5b12f83..b6dd6e57b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,4 +16,4 @@ openpyxl attrdict Polygon3 lanms-neo==1.0.2 -PyMuPDF==1.19.0 \ No newline at end of file +PyMuPDF<1.21.0 \ No newline at end of file