update recovery (#8358)

pull/8388/head
user1018 2022-11-18 10:14:26 +08:00 committed by GitHub
parent 44e60e81cc
commit 7b61b8f3d0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 5 additions and 6 deletions

View File

@ -229,7 +229,9 @@ def main(args):
if args.recovery and args.use_pdf2docx_api and flag_pdf:
from pdf2docx.converter import Converter
docx_file = os.path.join(args.output, '{}.docx'.format(img_name))
os.makedirs(args.output, exist_ok=True)
docx_file = os.path.join(args.output,
'{}_api.docx'.format(img_name))
cv = Converter(image_file)
cv.convert(docx_file)
cv.close()

View File

@ -73,7 +73,7 @@ def convert_info_docx(img, res, save_folder, img_name):
text_run.font.size = shared.Pt(10)
# save to docx
docx_path = os.path.join(save_folder, '{}.docx'.format(img_name))
docx_path = os.path.join(save_folder, '{}_ocr.docx'.format(img_name))
doc.save(docx_path)
logger.info('docx save to {}'.format(docx_path))

View File

@ -1,5 +1,4 @@
python-docx
PyMuPDF==1.19.0
beautifulsoup4
fonttools>=4.24.0
fire>=0.3.0

View File

@ -278,8 +278,6 @@ class HtmlToDocx(HTMLParser):
cell_col += colspan
cell_row += 1
doc.save('1.docx')
def handle_data(self, data):
if self.skip:
return

View File

@ -16,4 +16,4 @@ openpyxl
attrdict
Polygon3
lanms-neo==1.0.2
PyMuPDF==1.19.0
PyMuPDF<1.21.0