update recovery (#8358)
parent
44e60e81cc
commit
7b61b8f3d0
|
@ -229,7 +229,9 @@ def main(args):
|
|||
|
||||
if args.recovery and args.use_pdf2docx_api and flag_pdf:
|
||||
from pdf2docx.converter import Converter
|
||||
docx_file = os.path.join(args.output, '{}.docx'.format(img_name))
|
||||
os.makedirs(args.output, exist_ok=True)
|
||||
docx_file = os.path.join(args.output,
|
||||
'{}_api.docx'.format(img_name))
|
||||
cv = Converter(image_file)
|
||||
cv.convert(docx_file)
|
||||
cv.close()
|
||||
|
|
|
@ -73,7 +73,7 @@ def convert_info_docx(img, res, save_folder, img_name):
|
|||
text_run.font.size = shared.Pt(10)
|
||||
|
||||
# save to docx
|
||||
docx_path = os.path.join(save_folder, '{}.docx'.format(img_name))
|
||||
docx_path = os.path.join(save_folder, '{}_ocr.docx'.format(img_name))
|
||||
doc.save(docx_path)
|
||||
logger.info('docx save to {}'.format(docx_path))
|
||||
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
python-docx
|
||||
PyMuPDF==1.19.0
|
||||
beautifulsoup4
|
||||
fonttools>=4.24.0
|
||||
fire>=0.3.0
|
||||
|
|
|
@ -278,8 +278,6 @@ class HtmlToDocx(HTMLParser):
|
|||
cell_col += colspan
|
||||
cell_row += 1
|
||||
|
||||
doc.save('1.docx')
|
||||
|
||||
def handle_data(self, data):
|
||||
if self.skip:
|
||||
return
|
||||
|
|
|
@ -16,4 +16,4 @@ openpyxl
|
|||
attrdict
|
||||
Polygon3
|
||||
lanms-neo==1.0.2
|
||||
PyMuPDF==1.19.0
|
||||
PyMuPDF<1.21.0
|
Loading…
Reference in New Issue