442 lines
17 KiB
Python
442 lines
17 KiB
Python
import sys
|
|
import tarfile
|
|
import os
|
|
import time
|
|
import datetime
|
|
import functools
|
|
import cv2
|
|
import platform
|
|
import numpy as np
|
|
from qtpy.QtWidgets import QApplication, QWidget, QPushButton, QProgressBar, \
|
|
QGridLayout, QMessageBox, QLabel, QFileDialog
|
|
from qtpy.QtCore import Signal, QThread, QObject
|
|
from qtpy.QtGui import QImage, QPixmap, QIcon
|
|
|
|
file = os.path.dirname(os.path.abspath(__file__))
|
|
root = os.path.abspath(os.path.join(file, '../../'))
|
|
sys.path.append(file)
|
|
sys.path.insert(0, root)
|
|
|
|
from ppstructure.predict_system import StructureSystem, save_structure_res
|
|
from ppstructure.utility import parse_args, draw_structure_result
|
|
from ppocr.utils.network import download_with_progressbar
|
|
from ppstructure.recovery.recovery_to_doc import sorted_layout_boxes, convert_info_docx
|
|
# from ScreenShotWidget import ScreenShotWidget
|
|
|
|
__APPNAME__ = "pdf2word"
|
|
__VERSION__ = "0.1.1"
|
|
|
|
URLs_EN = {
|
|
# 下载超英文轻量级PP-OCRv3模型的检测模型并解压
|
|
"en_PP-OCRv3_det_infer": "https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tar",
|
|
# 下载英文轻量级PP-OCRv3模型的识别模型并解压
|
|
"en_PP-OCRv3_rec_infer": "https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_infer.tar",
|
|
# 下载超轻量级英文表格英文模型并解压
|
|
"en_ppstructure_mobile_v2.0_SLANet_infer": "https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_infer.tar",
|
|
# 英文版面分析模型
|
|
"picodet_lcnet_x1_0_fgd_layout_infer": "https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout_infer.tar",
|
|
}
|
|
DICT_EN = {
|
|
"rec_char_dict_path": "en_dict.txt",
|
|
"layout_dict_path": "layout_publaynet_dict.txt",
|
|
}
|
|
|
|
URLs_CN = {
|
|
# 下载超中文轻量级PP-OCRv3模型的检测模型并解压
|
|
"cn_PP-OCRv3_det_infer": "https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar",
|
|
# 下载中文轻量级PP-OCRv3模型的识别模型并解压
|
|
"cn_PP-OCRv3_rec_infer": "https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar",
|
|
# 下载超轻量级英文表格英文模型并解压
|
|
"cn_ppstructure_mobile_v2.0_SLANet_infer": "https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_infer.tar",
|
|
# 中文版面分析模型
|
|
"picodet_lcnet_x1_0_fgd_layout_cdla_infer": "https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout_cdla_infer.tar",
|
|
}
|
|
DICT_CN = {
|
|
"rec_char_dict_path": "ppocr_keys_v1.txt",
|
|
"layout_dict_path": "layout_cdla_dict.txt",
|
|
}
|
|
|
|
|
|
|
|
def QImageToCvMat(incomingImage) -> np.array:
|
|
'''
|
|
Converts a QImage into an opencv MAT format
|
|
'''
|
|
|
|
incomingImage = incomingImage.convertToFormat(QImage.Format.Format_RGBA8888)
|
|
|
|
width = incomingImage.width()
|
|
height = incomingImage.height()
|
|
|
|
ptr = incomingImage.bits()
|
|
ptr.setsize(height * width * 4)
|
|
arr = np.frombuffer(ptr, np.uint8).reshape((height, width, 4))
|
|
return arr
|
|
|
|
|
|
def readImage(image_file) -> list:
|
|
if os.path.basename(image_file)[-3:] in ['pdf']:
|
|
import fitz
|
|
from PIL import Image
|
|
imgs = []
|
|
with fitz.open(image_file) as pdf:
|
|
for pg in range(0, pdf.pageCount):
|
|
page = pdf[pg]
|
|
mat = fitz.Matrix(2, 2)
|
|
pm = page.getPixmap(matrix=mat, alpha=False)
|
|
|
|
# if width or height > 2000 pixels, don't enlarge the image
|
|
if pm.width > 2000 or pm.height > 2000:
|
|
pm = page.getPixmap(matrix=fitz.Matrix(1, 1), alpha=False)
|
|
|
|
img = Image.frombytes("RGB", [pm.width, pm.height], pm.samples)
|
|
img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
|
|
imgs.append(img)
|
|
else:
|
|
img = cv2.imread(image_file, cv2.IMREAD_COLOR)
|
|
if img is not None:
|
|
imgs = [img]
|
|
|
|
return imgs
|
|
|
|
|
|
class Worker(QThread):
|
|
progressBarValue = Signal(int)
|
|
endsignal = Signal()
|
|
loopFlag = True
|
|
|
|
def __init__(self, predictors, save_pdf, vis_font_path):
|
|
super(Worker, self).__init__()
|
|
self.predictors = predictors
|
|
self.save_pdf = save_pdf
|
|
self.vis_font_path = vis_font_path
|
|
self.lang = 'EN'
|
|
self.imagePaths = []
|
|
self.outputDir = None
|
|
self.setStackSize(1024*1024)
|
|
|
|
def setImagePath(self, imagePaths):
|
|
self.imagePaths = imagePaths
|
|
|
|
def setLang(self, lang):
|
|
self.lang = lang
|
|
|
|
def setOutputDir(self, outputDir):
|
|
self.outputDir = outputDir
|
|
|
|
def predictAndSave(self, imgs, img_name):
|
|
all_res = []
|
|
for index, img in enumerate(imgs):
|
|
res, time_dict = self.predictors[self.lang](img)
|
|
|
|
# save output
|
|
save_structure_res(res, self.outputDir, img_name)
|
|
draw_img = draw_structure_result(img, res, self.vis_font_path)
|
|
img_save_path = os.path.join(self.outputDir, img_name, 'show_{}.jpg'.format(index))
|
|
if res != []:
|
|
cv2.imwrite(img_save_path, draw_img)
|
|
|
|
# recovery
|
|
h, w, _ = img.shape
|
|
res = sorted_layout_boxes(res, w)
|
|
all_res += res
|
|
|
|
try:
|
|
convert_info_docx(img, all_res, self.outputDir, img_name, self.save_pdf)
|
|
except Exception as ex:
|
|
print(self,
|
|
"error in layout recovery image:{}, err msg: {}".format(
|
|
img_name, ex))
|
|
|
|
print('result save to {}'.format(self.outputDir))
|
|
|
|
def run(self):
|
|
try:
|
|
findex = 0
|
|
os.makedirs(self.outputDir, exist_ok=True)
|
|
for i, image_file in enumerate(self.imagePaths):
|
|
if self.loopFlag == True:
|
|
imgs = readImage(image_file)
|
|
if len(imgs) == 0:
|
|
continue
|
|
img_name = os.path.basename(image_file).split('.')[0]
|
|
os.makedirs(os.path.join(self.outputDir, img_name), exist_ok=True)
|
|
self.predictAndSave(imgs, img_name)
|
|
findex += 1
|
|
self.progressBarValue.emit(findex)
|
|
else:
|
|
break
|
|
self.endsignal.emit()
|
|
self.exec()
|
|
except Exception as e:
|
|
print(e)
|
|
raise
|
|
|
|
|
|
class APP_Image2Doc(QWidget):
|
|
def __init__(self):
|
|
super().__init__()
|
|
self.setFixedHeight(90)
|
|
self.setFixedWidth(400)
|
|
|
|
# settings
|
|
self.imagePaths = []
|
|
# self.screenShotWg = ScreenShotWidget()
|
|
self.screenShot = None
|
|
self.save_pdf = False
|
|
self.output_dir = None
|
|
self.vis_font_path = os.path.join(root,
|
|
"doc", "fonts", "simfang.ttf")
|
|
|
|
# ProgressBar
|
|
self.pb = QProgressBar()
|
|
self.pb.setRange(0, 100)
|
|
self.pb.setValue(0)
|
|
|
|
# 初始化界面
|
|
self.setupUi()
|
|
|
|
# 下载模型
|
|
self.downloadModels(URLs_EN)
|
|
self.downloadModels(URLs_CN)
|
|
|
|
# 初始化模型
|
|
predictors = {
|
|
'EN': self.initPredictor('EN'),
|
|
'CN': self.initPredictor('CN'),
|
|
}
|
|
|
|
# 设置工作进程
|
|
self._thread = Worker(predictors, self.save_pdf, self.vis_font_path)
|
|
self._thread.progressBarValue.connect(self.handleProgressBarSingal)
|
|
self._thread.endsignal.connect(self.handleEndsignalSignal)
|
|
self._thread.finished.connect(QObject.deleteLater)
|
|
self.time_start = 0 # save start time
|
|
|
|
def setupUi(self):
|
|
self.setObjectName("MainWindow")
|
|
self.setWindowTitle(__APPNAME__ + " " + __VERSION__)
|
|
|
|
layout = QGridLayout()
|
|
|
|
self.openFileButton = QPushButton("打开文件")
|
|
self.openFileButton.setIcon(QIcon(QPixmap("./icons/folder-plus.png")))
|
|
layout.addWidget(self.openFileButton, 0, 0, 1, 1)
|
|
self.openFileButton.clicked.connect(self.handleOpenFileSignal)
|
|
|
|
# screenShotButton = QPushButton("截图识别")
|
|
# layout.addWidget(screenShotButton, 0, 1, 1, 1)
|
|
# screenShotButton.clicked.connect(self.screenShotSlot)
|
|
# screenShotButton.setEnabled(False) # temporarily disenble
|
|
|
|
self.startCNButton = QPushButton("中文转换")
|
|
self.startCNButton.setIcon(QIcon(QPixmap("./icons/chinese.png")))
|
|
layout.addWidget(self.startCNButton, 0, 1, 1, 1)
|
|
self.startCNButton.clicked.connect(
|
|
functools.partial(self.handleStartSignal, 'CN'))
|
|
|
|
self.startENButton = QPushButton("英文转换")
|
|
self.startENButton.setIcon(QIcon(QPixmap("./icons/english.png")))
|
|
layout.addWidget(self.startENButton, 0, 2, 1, 1)
|
|
self.startENButton.clicked.connect(
|
|
functools.partial(self.handleStartSignal, 'EN'))
|
|
|
|
self.showResultButton = QPushButton("显示结果")
|
|
self.showResultButton.setIcon(QIcon(QPixmap("./icons/folder-open.png")))
|
|
layout.addWidget(self.showResultButton, 0, 3, 1, 1)
|
|
self.showResultButton.clicked.connect(self.handleShowResultSignal)
|
|
|
|
# ProgressBar
|
|
layout.addWidget(self.pb, 2, 0, 1, 4)
|
|
# time estimate label
|
|
self.timeEstLabel = QLabel(
|
|
("Time Left: --"))
|
|
layout.addWidget(self.timeEstLabel, 3, 0, 1, 4)
|
|
|
|
self.setLayout(layout)
|
|
|
|
def downloadModels(self, URLs):
|
|
# using custom model
|
|
tar_file_name_list = [
|
|
'inference.pdiparams',
|
|
'inference.pdiparams.info',
|
|
'inference.pdmodel',
|
|
'model.pdiparams',
|
|
'model.pdiparams.info',
|
|
'model.pdmodel'
|
|
]
|
|
model_path = os.path.join(root, 'inference')
|
|
os.makedirs(model_path, exist_ok=True)
|
|
|
|
# download and unzip models
|
|
for name in URLs.keys():
|
|
url = URLs[name]
|
|
print("Try downloading file: {}".format(url))
|
|
tarname = url.split('/')[-1]
|
|
tarpath = os.path.join(model_path, tarname)
|
|
if os.path.exists(tarpath):
|
|
print("File have already exist. skip")
|
|
else:
|
|
try:
|
|
download_with_progressbar(url, tarpath)
|
|
except Exception as e:
|
|
print("Error occurred when downloading file, error message:")
|
|
print(e)
|
|
|
|
# unzip model tar
|
|
try:
|
|
with tarfile.open(tarpath, 'r') as tarObj:
|
|
storage_dir = os.path.join(model_path, name)
|
|
os.makedirs(storage_dir, exist_ok=True)
|
|
for member in tarObj.getmembers():
|
|
filename = None
|
|
for tar_file_name in tar_file_name_list:
|
|
if tar_file_name in member.name:
|
|
filename = tar_file_name
|
|
if filename is None:
|
|
continue
|
|
file = tarObj.extractfile(member)
|
|
with open(
|
|
os.path.join(storage_dir, filename),
|
|
'wb') as f:
|
|
f.write(file.read())
|
|
except Exception as e:
|
|
print("Error occurred when unziping file, error message:")
|
|
print(e)
|
|
|
|
def initPredictor(self, lang='EN'):
|
|
# init predictor args
|
|
args = parse_args()
|
|
args.table_max_len = 488
|
|
args.ocr = True
|
|
args.recovery = True
|
|
args.save_pdf = self.save_pdf
|
|
args.table_char_dict_path = os.path.join(root,
|
|
"ppocr", "utils", "dict", "table_structure_dict.txt")
|
|
if lang == 'EN':
|
|
args.det_model_dir = os.path.join(root, # 此处从这里找到模型存放位置
|
|
"inference", "en_PP-OCRv3_det_infer")
|
|
args.rec_model_dir = os.path.join(root,
|
|
"inference", "en_PP-OCRv3_rec_infer")
|
|
args.table_model_dir = os.path.join(root,
|
|
"inference", "en_ppstructure_mobile_v2.0_SLANet_infer")
|
|
args.output = os.path.join(root, "output") # 结果保存路径
|
|
args.layout_model_dir = os.path.join(root,
|
|
"inference", "picodet_lcnet_x1_0_fgd_layout_infer")
|
|
lang_dict = DICT_EN
|
|
elif lang == 'CN':
|
|
args.det_model_dir = os.path.join(root, # 此处从这里找到模型存放位置
|
|
"inference", "cn_PP-OCRv3_det_infer")
|
|
args.rec_model_dir = os.path.join(root,
|
|
"inference", "cn_PP-OCRv3_rec_infer")
|
|
args.table_model_dir = os.path.join(root,
|
|
"inference", "cn_ppstructure_mobile_v2.0_SLANet_infer")
|
|
args.output = os.path.join(root, "output") # 结果保存路径
|
|
args.layout_model_dir = os.path.join(root,
|
|
"inference", "picodet_lcnet_x1_0_fgd_layout_cdla_infer")
|
|
lang_dict = DICT_CN
|
|
else:
|
|
raise ValueError("Unsupported language")
|
|
args.rec_char_dict_path = os.path.join(root,
|
|
"ppocr", "utils",
|
|
lang_dict['rec_char_dict_path'])
|
|
args.layout_dict_path = os.path.join(root,
|
|
"ppocr", "utils", "dict", "layout_dict",
|
|
lang_dict['layout_dict_path'])
|
|
# init predictor
|
|
return StructureSystem(args)
|
|
|
|
def handleOpenFileSignal(self):
|
|
'''
|
|
可以多选图像文件
|
|
'''
|
|
selectedFiles = QFileDialog.getOpenFileNames(self,
|
|
"多文件选择", "/", "图片文件 (*.png *.jpeg *.jpg *.bmp *.pdf)")[0]
|
|
if len(selectedFiles) > 0:
|
|
self.imagePaths = selectedFiles
|
|
self.screenShot = None # discard screenshot temp image
|
|
self.pb.setRange(0, len(self.imagePaths))
|
|
self.pb.setValue(0)
|
|
|
|
# def screenShotSlot(self):
|
|
# '''
|
|
# 选定图像文件和截图的转换过程只能同时进行一个
|
|
# 截图只能同时转换一个
|
|
# '''
|
|
# self.screenShotWg.start()
|
|
# if self.screenShotWg.captureImage:
|
|
# self.screenShot = self.screenShotWg.captureImage
|
|
# self.imagePaths.clear() # discard openfile temp list
|
|
# self.pb.setRange(0, 1)
|
|
# self.pb.setValue(0)
|
|
|
|
def handleStartSignal(self, lang):
|
|
if self.screenShot: # for screenShot
|
|
img_name = 'screenshot_' + time.strftime("%Y%m%d%H%M%S", time.localtime())
|
|
image = QImageToCvMat(self.screenShot)
|
|
self.predictAndSave(image, img_name, lang)
|
|
# update Progress Bar
|
|
self.pb.setValue(1)
|
|
QMessageBox.information(self,
|
|
u'Information', "文档提取完成")
|
|
elif len(self.imagePaths) > 0 : # for image file selection
|
|
# Must set image path list and language before start
|
|
self.output_dir = os.path.join(
|
|
os.path.dirname(self.imagePaths[0]), "output") # output_dir shold be same as imagepath
|
|
self._thread.setOutputDir(self.output_dir)
|
|
self._thread.setImagePath(self.imagePaths)
|
|
self._thread.setLang(lang)
|
|
# disenble buttons
|
|
self.openFileButton.setEnabled(False)
|
|
self.startCNButton.setEnabled(False)
|
|
self.startENButton.setEnabled(False)
|
|
# 启动工作进程
|
|
self._thread.start()
|
|
self.time_start = time.time() # log start time
|
|
QMessageBox.information(self,
|
|
u'Information', "开始转换")
|
|
else:
|
|
QMessageBox.warning(self,
|
|
u'Information', "请选择要识别的文件或截图")
|
|
|
|
def handleShowResultSignal(self):
|
|
if self.output_dir is None:
|
|
return
|
|
if os.path.exists(self.output_dir):
|
|
if platform.system() == 'Windows':
|
|
os.startfile(self.output_dir)
|
|
else:
|
|
os.system('open ' + os.path.normpath(self.output_dir))
|
|
else:
|
|
QMessageBox.information(self,
|
|
u'Information', "输出文件不存在")
|
|
|
|
def handleProgressBarSingal(self, i):
|
|
self.pb.setValue(i)
|
|
# calculate time left of recognition
|
|
lenbar = self.pb.maximum()
|
|
avg_time = (time.time() - self.time_start) / i # Use average time to prevent time fluctuations
|
|
time_left = str(datetime.timedelta(seconds=avg_time * (lenbar - i))).split(".")[0] # Remove microseconds
|
|
self.timeEstLabel.setText(f"Time Left: {time_left}") # show time left
|
|
|
|
def handleEndsignalSignal(self):
|
|
# enble buttons
|
|
self.openFileButton.setEnabled(True)
|
|
self.startCNButton.setEnabled(True)
|
|
self.startENButton.setEnabled(True)
|
|
QMessageBox.information(self, u'Information', "转换结束")
|
|
|
|
|
|
def main():
|
|
app = QApplication(sys.argv)
|
|
|
|
window = APP_Image2Doc() # 创建对象
|
|
window.show() # 全屏显示窗口
|
|
|
|
QApplication.processEvents()
|
|
sys.exit(app.exec())
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|