move imgs to doc
|
@ -6,12 +6,12 @@ Global:
|
|||
save_model_dir: ./output/re_layoutxlm/
|
||||
save_epoch_step: 2000
|
||||
# evaluation is run every 10 iterations after the 0th iteration
|
||||
eval_batch_step: [ 0, 38 ]
|
||||
eval_batch_step: [ 0, 19 ]
|
||||
cal_metric_during_train: False
|
||||
pretrained_model: &pretrained_model layoutxlm-base-uncased
|
||||
pretrained_model: &pretrained_model layoutxlm-base-uncased # This field can only be changed by modifying the configuration file
|
||||
save_inference_dir:
|
||||
use_visualdl: False
|
||||
infer_img: ppstructure/vqa/images/input/zh_val_21.jpg
|
||||
infer_img: doc/vqa/input/zh_val_21.jpg
|
||||
save_res_path: ./output/re/
|
||||
|
||||
Architecture:
|
||||
|
|
|
@ -8,10 +8,10 @@ Global:
|
|||
# evaluation is run every 10 iterations after the 0th iteration
|
||||
eval_batch_step: [ 0, 19 ]
|
||||
cal_metric_during_train: False
|
||||
pretrained_model: &pretrained_model layoutlm-base-uncased
|
||||
pretrained_model: &pretrained_model layoutlm-base-uncased # This field can only be changed by modifying the configuration file
|
||||
save_inference_dir:
|
||||
use_visualdl: False
|
||||
infer_img: ppstructure/vqa/images/input/zh_val_0.jpg
|
||||
infer_img: doc/vqa/input/zh_val_0.jpg
|
||||
save_res_path: ./output/ser/predicts_layoutlm.txt
|
||||
|
||||
Architecture:
|
||||
|
|
|
@ -8,10 +8,10 @@ Global:
|
|||
# evaluation is run every 10 iterations after the 0th iteration
|
||||
eval_batch_step: [ 0, 19 ]
|
||||
cal_metric_during_train: False
|
||||
pretrained_model: &pretrained_model layoutxlm-base-uncased
|
||||
pretrained_model: &pretrained_model layoutxlm-base-uncased # This field can only be changed by modifying the configuration file
|
||||
save_inference_dir:
|
||||
use_visualdl: False
|
||||
infer_img: ppstructure/vqa/images/input/zh_val_42.jpg
|
||||
infer_img: doc/vqa/input/zh_val_42.jpg
|
||||
save_res_path: ./output/ser
|
||||
|
||||
Architecture:
|
||||
|
|
Before Width: | Height: | Size: 1.5 MiB After Width: | Height: | Size: 1.5 MiB |
Before Width: | Height: | Size: 1.4 MiB After Width: | Height: | Size: 1.4 MiB |
Before Width: | Height: | Size: 1.1 MiB After Width: | Height: | Size: 1.1 MiB |
Before Width: | Height: | Size: 1.8 MiB After Width: | Height: | Size: 1.8 MiB |
Before Width: | Height: | Size: 1.1 MiB After Width: | Height: | Size: 1.1 MiB |
Before Width: | Height: | Size: 1005 KiB After Width: | Height: | Size: 1005 KiB |
Before Width: | Height: | Size: 1.2 MiB After Width: | Height: | Size: 1.2 MiB |
Before Width: | Height: | Size: 1.6 MiB After Width: | Height: | Size: 1.6 MiB |
|
@ -787,7 +787,7 @@ class SARLabelEncode(BaseRecLabelEncode):
|
|||
|
||||
class VQATokenLabelEncode(object):
|
||||
"""
|
||||
基于NLP的标签编码
|
||||
Label encode for NLP VQA methods
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
|
|
|
@ -122,7 +122,7 @@ class SimpleDataSet(Dataset):
|
|||
self.logger.error(
|
||||
"When parsing line {}, error happened with msg: {}".format(
|
||||
data_line, traceback.format_exc()))
|
||||
# outs = None
|
||||
outs = None
|
||||
if outs is None:
|
||||
# during evaluation, we should fix the idx to get same results for many times of evaluation.
|
||||
rnd_idx = np.random.randint(self.__len__(
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
|
||||
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
|
@ -34,7 +34,7 @@ class VQAReTokenLayoutLMPostProcess(object):
|
|||
entity_idx_dict_batch = kwargs['entity_idx_dict_batch']
|
||||
pred_relations = preds['pred_relations']
|
||||
|
||||
# 进行 relations 到 ocr信息的转换
|
||||
# merge relations and ocr info
|
||||
results = []
|
||||
for pred_relation, ser_result, entity_idx_dict in zip(
|
||||
pred_relations, ser_results, entity_idx_dict_batch):
|
||||
|
|
|
@ -34,7 +34,7 @@ PP-Structure 里的 DOC-VQA算法基于PaddleNLP自然语言处理算法库进
|
|||
|
||||
### 2.1 SER
|
||||
|
||||
 | 
|
||||
 | 
|
||||
---|---
|
||||
|
||||
图中不同颜色的框表示不同的类别,对于XFUN数据集,有`QUESTION`, `ANSWER`, `HEADER` 3种类别
|
||||
|
@ -48,7 +48,7 @@ PP-Structure 里的 DOC-VQA算法基于PaddleNLP自然语言处理算法库进
|
|||
|
||||
### 2.2 RE
|
||||
|
||||
 | 
|
||||
 | 
|
||||
---|---
|
||||
|
||||
|
||||
|
@ -164,7 +164,7 @@ CUDA_VISIBLE_DEVICES=0 python3 tools/eval.py -c configs/vqa/ser/layoutxlm.yml -o
|
|||
使用如下命令即可完成`OCR引擎 + SER`的串联预测
|
||||
|
||||
```shell
|
||||
CUDA_VISIBLE_DEVICES=0 python3 tools/infer_vqa_token_ser.py -c configs/vqa/ser/layoutxlm.yml -o Architecture.Backbone.checkpoints=PP-Layout_v1.0_ser_pretrained/ Global.infer_img=ppstructure/vqa/images/input/zh_val_42.jpg
|
||||
CUDA_VISIBLE_DEVICES=0 python3 tools/infer_vqa_token_ser.py -c configs/vqa/ser/layoutxlm.yml -o Architecture.Backbone.checkpoints=PP-Layout_v1.0_ser_pretrained/ Global.infer_img=doc/vqa/input/zh_val_42.jpg
|
||||
```
|
||||
|
||||
最终会在`config.Global.save_res_path`字段所配置的目录下保存预测结果可视化图像以及预测结果文本文件,预测结果文本文件名为`infer_results.txt`。
|
||||
|
@ -219,7 +219,7 @@ CUDA_VISIBLE_DEVICES=0 python3 tools/eval.py -c configs/vqa/re/layoutxlm.yml -o
|
|||
使用如下命令即可完成`OCR引擎 + SER + RE`的串联预测
|
||||
```shell
|
||||
export CUDA_VISIBLE_DEVICES=0
|
||||
python3 tools/infer_vqa_token_ser_re.py -c configs/vqa/re/layoutxlm.yml -o Architecture.Backbone.checkpoints=PP-Layout_v1.0_re_pretrained/ Global.infer_img=ppstructure/vqa/images/input/zh_val_21.jpg -c_ser configs/vqa/ser/layoutxlm.yml -o_ser Architecture.Backbone.checkpoints=PP-Layout_v1.0_ser_pretrained/
|
||||
python3 tools/infer_vqa_token_ser_re.py -c configs/vqa/re/layoutxlm.yml -o Architecture.Backbone.checkpoints=PP-Layout_v1.0_re_pretrained/ Global.infer_img=doc/vqa/input/zh_val_21.jpg -c_ser configs/vqa/ser/layoutxlm.yml -o_ser Architecture.Backbone.checkpoints=PP-Layout_v1.0_ser_pretrained/
|
||||
```
|
||||
|
||||
最终会在`config.Global.save_res_path`字段所配置的目录下保存预测结果可视化图像以及预测结果文本文件,预测结果文本文件名为`infer_results.txt`。
|
||||
|
|
|
@ -104,7 +104,7 @@ def make_input(ser_inputs, ser_results):
|
|||
|
||||
ser_inputs[8] = entities_batch
|
||||
ser_inputs.append(relations_batch)
|
||||
|
||||
# remove ocr_info segment_offset_id and label in ser input
|
||||
ser_inputs.pop(7)
|
||||
ser_inputs.pop(6)
|
||||
ser_inputs.pop(1)
|
||||
|
|