mirror of
https://github.com/PaddlePaddle/PaddleOCR.git
synced 2025-06-03 21:53:39 +08:00
update recovery (#8358)
This commit is contained in:
parent
44e60e81cc
commit
7b61b8f3d0
@ -229,7 +229,9 @@ def main(args):
|
|||||||
|
|
||||||
if args.recovery and args.use_pdf2docx_api and flag_pdf:
|
if args.recovery and args.use_pdf2docx_api and flag_pdf:
|
||||||
from pdf2docx.converter import Converter
|
from pdf2docx.converter import Converter
|
||||||
docx_file = os.path.join(args.output, '{}.docx'.format(img_name))
|
os.makedirs(args.output, exist_ok=True)
|
||||||
|
docx_file = os.path.join(args.output,
|
||||||
|
'{}_api.docx'.format(img_name))
|
||||||
cv = Converter(image_file)
|
cv = Converter(image_file)
|
||||||
cv.convert(docx_file)
|
cv.convert(docx_file)
|
||||||
cv.close()
|
cv.close()
|
||||||
|
@ -73,7 +73,7 @@ def convert_info_docx(img, res, save_folder, img_name):
|
|||||||
text_run.font.size = shared.Pt(10)
|
text_run.font.size = shared.Pt(10)
|
||||||
|
|
||||||
# save to docx
|
# save to docx
|
||||||
docx_path = os.path.join(save_folder, '{}.docx'.format(img_name))
|
docx_path = os.path.join(save_folder, '{}_ocr.docx'.format(img_name))
|
||||||
doc.save(docx_path)
|
doc.save(docx_path)
|
||||||
logger.info('docx save to {}'.format(docx_path))
|
logger.info('docx save to {}'.format(docx_path))
|
||||||
|
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
python-docx
|
python-docx
|
||||||
PyMuPDF==1.19.0
|
|
||||||
beautifulsoup4
|
beautifulsoup4
|
||||||
fonttools>=4.24.0
|
fonttools>=4.24.0
|
||||||
fire>=0.3.0
|
fire>=0.3.0
|
||||||
|
@ -278,8 +278,6 @@ class HtmlToDocx(HTMLParser):
|
|||||||
cell_col += colspan
|
cell_col += colspan
|
||||||
cell_row += 1
|
cell_row += 1
|
||||||
|
|
||||||
doc.save('1.docx')
|
|
||||||
|
|
||||||
def handle_data(self, data):
|
def handle_data(self, data):
|
||||||
if self.skip:
|
if self.skip:
|
||||||
return
|
return
|
||||||
|
@ -16,4 +16,4 @@ openpyxl
|
|||||||
attrdict
|
attrdict
|
||||||
Polygon3
|
Polygon3
|
||||||
lanms-neo==1.0.2
|
lanms-neo==1.0.2
|
||||||
PyMuPDF==1.19.0
|
PyMuPDF<1.21.0
|
Loading…
x
Reference in New Issue
Block a user