commit
533f276a9e
|
@ -4,4 +4,4 @@ seqeval
|
|||
pypandoc
|
||||
attrdict
|
||||
python_docx
|
||||
https://paddleocr.bj.bcebos.com/ppstructure/whl/paddlenlp-2.3.0.dev0-py3-none-any.whl
|
||||
paddlenlp>=2.4.1
|
||||
|
|
|
@ -68,6 +68,7 @@ def build_pre_process_list(args):
|
|||
|
||||
class TableStructurer(object):
|
||||
def __init__(self, args):
|
||||
self.args = args
|
||||
self.use_onnx = args.use_onnx
|
||||
pre_process_list = build_pre_process_list(args)
|
||||
if args.table_algorithm not in ['TableMaster']:
|
||||
|
@ -89,8 +90,31 @@ class TableStructurer(object):
|
|||
self.predictor, self.input_tensor, self.output_tensors, self.config = \
|
||||
utility.create_predictor(args, 'table', logger)
|
||||
|
||||
if args.benchmark:
|
||||
import auto_log
|
||||
pid = os.getpid()
|
||||
gpu_id = utility.get_infer_gpuid()
|
||||
self.autolog = auto_log.AutoLogger(
|
||||
model_name="table",
|
||||
model_precision=args.precision,
|
||||
batch_size=1,
|
||||
data_shape="dynamic",
|
||||
save_path=None, #args.save_log_path,
|
||||
inference_config=self.config,
|
||||
pids=pid,
|
||||
process_name=None,
|
||||
gpu_ids=gpu_id if args.use_gpu else None,
|
||||
time_keys=[
|
||||
'preprocess_time', 'inference_time', 'postprocess_time'
|
||||
],
|
||||
warmup=0,
|
||||
logger=logger)
|
||||
|
||||
def __call__(self, img):
|
||||
starttime = time.time()
|
||||
if self.args.benchmark:
|
||||
self.autolog.times.start()
|
||||
|
||||
ori_im = img.copy()
|
||||
data = {'image': img}
|
||||
data = transform(data, self.preprocess_op)
|
||||
|
@ -99,6 +123,8 @@ class TableStructurer(object):
|
|||
return None, 0
|
||||
img = np.expand_dims(img, axis=0)
|
||||
img = img.copy()
|
||||
if self.args.benchmark:
|
||||
self.autolog.times.stamp()
|
||||
if self.use_onnx:
|
||||
input_dict = {}
|
||||
input_dict[self.input_tensor.name] = img
|
||||
|
@ -110,6 +136,8 @@ class TableStructurer(object):
|
|||
for output_tensor in self.output_tensors:
|
||||
output = output_tensor.copy_to_cpu()
|
||||
outputs.append(output)
|
||||
if self.args.benchmark:
|
||||
self.autolog.times.stamp()
|
||||
|
||||
preds = {}
|
||||
preds['structure_probs'] = outputs[1]
|
||||
|
@ -125,6 +153,8 @@ class TableStructurer(object):
|
|||
'<html>', '<body>', '<table>'
|
||||
] + structure_str_list + ['</table>', '</body>', '</html>']
|
||||
elapse = time.time() - starttime
|
||||
if self.args.benchmark:
|
||||
self.autolog.times.end(stamp=True)
|
||||
return (structure_str_list, bbox_list), elapse
|
||||
|
||||
|
||||
|
@ -164,6 +194,8 @@ def main(args):
|
|||
total_time += elapse
|
||||
count += 1
|
||||
logger.info("Predict time of {}: {}".format(image_file, elapse))
|
||||
if args.benchmark:
|
||||
table_structurer.autolog.report()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
@ -14,7 +14,6 @@
|
|||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
|
||||
__dir__ = os.path.dirname(os.path.abspath(__file__))
|
||||
sys.path.append(__dir__)
|
||||
|
@ -58,48 +57,28 @@ def expand(pix, det_box, shape):
|
|||
|
||||
class TableSystem(object):
|
||||
def __init__(self, args, text_detector=None, text_recognizer=None):
|
||||
self.args = args
|
||||
if not args.show_log:
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
self.text_detector = predict_det.TextDetector(
|
||||
args) if text_detector is None else text_detector
|
||||
self.text_recognizer = predict_rec.TextRecognizer(
|
||||
args) if text_recognizer is None else text_recognizer
|
||||
|
||||
args.benchmark = False
|
||||
self.text_detector = predict_det.TextDetector(copy.deepcopy(
|
||||
args)) if text_detector is None else text_detector
|
||||
self.text_recognizer = predict_rec.TextRecognizer(copy.deepcopy(
|
||||
args)) if text_recognizer is None else text_recognizer
|
||||
args.benchmark = True
|
||||
self.table_structurer = predict_strture.TableStructurer(args)
|
||||
if args.table_algorithm in ['TableMaster']:
|
||||
self.match = TableMasterMatcher()
|
||||
else:
|
||||
self.match = TableMatch(filter_ocr_result=True)
|
||||
|
||||
self.benchmark = args.benchmark
|
||||
self.predictor, self.input_tensor, self.output_tensors, self.config = utility.create_predictor(
|
||||
args, 'table', logger)
|
||||
if args.benchmark:
|
||||
import auto_log
|
||||
pid = os.getpid()
|
||||
gpu_id = utility.get_infer_gpuid()
|
||||
self.autolog = auto_log.AutoLogger(
|
||||
model_name="table",
|
||||
model_precision=args.precision,
|
||||
batch_size=1,
|
||||
data_shape="dynamic",
|
||||
save_path=None, #args.save_log_path,
|
||||
inference_config=self.config,
|
||||
pids=pid,
|
||||
process_name=None,
|
||||
gpu_ids=gpu_id if args.use_gpu else None,
|
||||
time_keys=[
|
||||
'preprocess_time', 'inference_time', 'postprocess_time'
|
||||
],
|
||||
warmup=0,
|
||||
logger=logger)
|
||||
|
||||
def __call__(self, img, return_ocr_result_in_table=False):
|
||||
result = dict()
|
||||
time_dict = {'det': 0, 'rec': 0, 'table': 0, 'all': 0, 'match': 0}
|
||||
start = time.time()
|
||||
|
||||
structure_res, elapse = self._structure(copy.deepcopy(img))
|
||||
result['cell_bbox'] = structure_res[1].tolist()
|
||||
time_dict['table'] = elapse
|
||||
|
@ -118,24 +97,16 @@ class TableSystem(object):
|
|||
toc = time.time()
|
||||
time_dict['match'] = toc - tic
|
||||
result['html'] = pred_html
|
||||
if self.benchmark:
|
||||
self.autolog.times.end(stamp=True)
|
||||
end = time.time()
|
||||
time_dict['all'] = end - start
|
||||
if self.benchmark:
|
||||
self.autolog.times.stamp()
|
||||
return result, time_dict
|
||||
|
||||
def _structure(self, img):
|
||||
if self.benchmark:
|
||||
self.autolog.times.start()
|
||||
structure_res, elapse = self.table_structurer(copy.deepcopy(img))
|
||||
return structure_res, elapse
|
||||
|
||||
def _ocr(self, img):
|
||||
h, w = img.shape[:2]
|
||||
if self.benchmark:
|
||||
self.autolog.times.stamp()
|
||||
dt_boxes, det_elapse = self.text_detector(copy.deepcopy(img))
|
||||
dt_boxes = sorted_boxes(dt_boxes)
|
||||
|
||||
|
@ -233,12 +204,13 @@ def main(args):
|
|||
f_html.close()
|
||||
|
||||
if args.benchmark:
|
||||
text_sys.autolog.report()
|
||||
table_sys.table_structurer.autolog.report()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = parse_args()
|
||||
if args.use_mp:
|
||||
import subprocess
|
||||
p_list = []
|
||||
total_process_num = args.total_process_num
|
||||
for process_id in range(total_process_num):
|
||||
|
|
|
@ -7,14 +7,14 @@ Global.auto_cast:fp32
|
|||
Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=17
|
||||
Global.save_model_dir:./output/
|
||||
Train.loader.batch_size_per_card:lite_train_lite_infer=4|whole_train_whole_infer=8
|
||||
Architecture.Backbone.checkpoints:pretrain_models/ser_LayoutXLM_xfun_zh
|
||||
Architecture.Backbone.pretrained:pretrain_models/ser_LayoutXLM_xfun_zh
|
||||
train_model_name:latest
|
||||
train_infer_img_dir:ppstructure/docs/kie/input/zh_val_42.jpg
|
||||
null:null
|
||||
##
|
||||
trainer:pact_train
|
||||
norm_train:null
|
||||
pact_train:deploy/slim/quantization/quant.py -c test_tipc/configs/layoutxlm_ser/ser_layoutxlm_xfund_zh.yml -o
|
||||
pact_train:deploy/slim/quantization/quant.py -c test_tipc/configs/layoutxlm_ser/ser_layoutxlm_xfund_zh.yml -o Global.eval_batch_step=[2000,10]
|
||||
fpgm_train:null
|
||||
distill_train:null
|
||||
null:null
|
||||
|
|
|
@ -34,7 +34,7 @@ distill_export:null
|
|||
export1:null
|
||||
export2:null
|
||||
##
|
||||
infer_model:./inference/en_ppocr_mobile_v2.0_table_structure_infer
|
||||
infer_model:./inference/en_ppstructure_mobile_v2.0_SLANet_infer
|
||||
infer_export:null
|
||||
infer_quant:True
|
||||
inference:ppstructure/table/predict_table.py --det_model_dir=./inference/en_ppocr_mobile_v2.0_table_det_infer --rec_model_dir=./inference/en_ppocr_mobile_v2.0_table_rec_infer --rec_char_dict_path=./ppocr/utils/dict/table_dict.txt --table_char_dict_path=./ppocr/utils/dict/table_structure_dict.txt --image_dir=./ppstructure/docs/table/table.jpg --det_limit_side_len=736 --det_limit_type=min --output ./output/table
|
||||
|
|
|
@ -146,6 +146,7 @@ if [ ${MODE} = "lite_train_lite_infer" ];then
|
|||
python_name=${array[0]}
|
||||
${python_name} -m pip install -r requirements.txt
|
||||
${python_name} -m pip install https://paddleocr.bj.bcebos.com/libs/auto_log-1.2.0-py3-none-any.whl
|
||||
${python_name} -m pip install paddleslim==2.3.4
|
||||
# pretrain lite train data
|
||||
wget -nc -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_5_pretrained.pdparams --no-check-certificate
|
||||
wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar --no-check-certificate
|
||||
|
@ -260,7 +261,7 @@ if [ ${MODE} = "lite_train_lite_infer" ];then
|
|||
wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/rec_r32_gaspin_bilstm_att_train.tar --no-check-certificate
|
||||
cd ./pretrain_models/ && tar xf rec_r32_gaspin_bilstm_att_train.tar && cd ../
|
||||
fi
|
||||
if [ ${model_name} == "layoutxlm_ser" ]; then
|
||||
if [[ ${model_name} =~ "layoutxlm_ser" ]]; then
|
||||
${python_name} -m pip install -r ppstructure/kie/requirements.txt
|
||||
${python_name} -m pip install opencv-python -U
|
||||
wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/ppstructure/dataset/XFUND.tar --no-check-certificate
|
||||
|
|
Loading…
Reference in New Issue