From a6e6208d37e2dfe9b2fcb16a731a2f417511fc40 Mon Sep 17 00:00:00 2001
From: WenmuZhou <572459439@qq.com>
Date: Sat, 20 Aug 2022 07:18:30 +0000
Subject: [PATCH 01/13] update model size
---
ppstructure/docs/models_list.md | 4 ++--
ppstructure/docs/models_list_en.md | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/ppstructure/docs/models_list.md b/ppstructure/docs/models_list.md
index ef2994cab..0b2f41deb 100644
--- a/ppstructure/docs/models_list.md
+++ b/ppstructure/docs/models_list.md
@@ -34,8 +34,8 @@
|模型名称|模型简介|推理模型大小|下载地址|
| --- | --- | --- | --- |
-|en_ppocr_mobile_v2.0_table_structure|基于TableRec-RARE在PubTabNet数据集上训练的英文表格识别模型|18.6M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.1/table/en_ppocr_mobile_v2.0_table_structure_train.tar) |
-|en_ppstructure_mobile_v2.0_SLANet|基于SLANet在PubTabNet数据集上训练的英文表格识别模型|9M|[推理模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_train.tar) |
+|en_ppocr_mobile_v2.0_table_structure|基于TableRec-RARE在PubTabNet数据集上训练的英文表格识别模型|6.8M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.1/table/en_ppocr_mobile_v2.0_table_structure_train.tar) |
+|en_ppstructure_mobile_v2.0_SLANet|基于SLANet在PubTabNet数据集上训练的英文表格识别模型|9.2M|[推理模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_train.tar) |
|ch_ppstructure_mobile_v2.0_SLANet|基于SLANet在PubTabNet数据集上训练的中文表格识别模型|9.3M|[推理模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_train.tar) |
diff --git a/ppstructure/docs/models_list_en.md b/ppstructure/docs/models_list_en.md
index 64a7cdebc..91f3286bf 100644
--- a/ppstructure/docs/models_list_en.md
+++ b/ppstructure/docs/models_list_en.md
@@ -35,8 +35,8 @@ If you need to use other OCR models, you can download the model in [PP-OCR model
|model| description |inference model size|download|
| --- |-----------------------------------------------------------------------------| --- | --- |
-|en_ppocr_mobile_v2.0_table_structure| English table recognition model trained on PubTabNet dataset based on TableRec-RARE |18.6M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.1/table/en_ppocr_mobile_v2.0_table_structure_train.tar) |
-|en_ppstructure_mobile_v2.0_SLANet|English table recognition model trained on PubTabNet dataset based on SLANet|9M|[inference model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_train.tar) |
+|en_ppocr_mobile_v2.0_table_structure| English table recognition model trained on PubTabNet dataset based on TableRec-RARE |6.8M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.1/table/en_ppocr_mobile_v2.0_table_structure_train.tar) |
+|en_ppstructure_mobile_v2.0_SLANet|English table recognition model trained on PubTabNet dataset based on SLANet|9.2M|[inference model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_train.tar) |
|ch_ppstructure_mobile_v2.0_SLANet|Chinese table recognition model trained on PubTabNet dataset based on SLANet|9.3M|[inference model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_train.tar) |
From 1cc7ad34cae92f92ab63b75a59454ac220ef2b85 Mon Sep 17 00:00:00 2001
From: WenmuZhou <572459439@qq.com>
Date: Sat, 20 Aug 2022 07:46:42 +0000
Subject: [PATCH 02/13] update layout dict in whl
---
paddleocr.py | 3 ++-
tools/infer/utility.py | 19 +++++++++++++------
2 files changed, 15 insertions(+), 7 deletions(-)
diff --git a/paddleocr.py b/paddleocr.py
index 8e34c4fbc..f6318ad20 100644
--- a/paddleocr.py
+++ b/paddleocr.py
@@ -289,7 +289,8 @@ MODEL_URLS = {
'ch': {
'url':
'https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_layout_infer.tar',
- 'dict_path': 'ppocr/utils/dict/layout_publaynet_dict.txt'
+ 'dict_path':
+ 'ppocr/utils/dict/layout_dict/layout_publaynet_dict.txt'
}
}
}
diff --git a/tools/infer/utility.py b/tools/infer/utility.py
index 1eebc73f3..1355ca62e 100644
--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
@@ -181,14 +181,21 @@ def create_predictor(args, mode, logger):
return sess, sess.get_inputs()[0], None, None
else:
- model_file_path = model_dir + "/inference.pdmodel"
- params_file_path = model_dir + "/inference.pdiparams"
+ file_names = ['model', 'inference']
+ for file_name in file_names:
+ model_file_path = '{}/{}.pdmodel'.format(model_dir, file_name)
+ params_file_path = '{}/{}.pdiparams'.format(model_dir, file_name)
+ if os.path.exists(model_file_path) and os.path.exists(
+ params_file_path):
+ break
if not os.path.exists(model_file_path):
- raise ValueError("not find model file path {}".format(
- model_file_path))
+ raise ValueError(
+ "not find model.pdmodel or inference.pdmodel in {}".format(
+ model_dir))
if not os.path.exists(params_file_path):
- raise ValueError("not find params file path {}".format(
- params_file_path))
+ raise ValueError(
+ "not find model.pdiparams or inference.pdiparams in {}".format(
+ model_dir))
config = inference.Config(model_file_path, params_file_path)
From f6ba46b396c6be405184824518663f033206f4b2 Mon Sep 17 00:00:00 2001
From: WenmuZhou <572459439@qq.com>
Date: Sat, 20 Aug 2022 08:40:40 +0000
Subject: [PATCH 03/13] update metric
---
ppstructure/table/README.md | 2 +-
ppstructure/table/README_ch.md | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/ppstructure/table/README.md b/ppstructure/table/README.md
index 3732a89c5..3bf868561 100644
--- a/ppstructure/table/README.md
+++ b/ppstructure/table/README.md
@@ -33,7 +33,7 @@ We evaluated the algorithm on the PubTabNet[1] eval dataset, and the
|Method|Acc|[TEDS(Tree-Edit-Distance-based Similarity)](https://github.com/ibm-aur-nlp/PubTabNet/tree/master/src)|Speed|
| --- | --- | --- | ---|
| EDD[2] |x| 88.3 |x|
-| TableRec-RARE(ours) |73.8%| 95.3% |1550ms|
+| TableRec-RARE(ours) | 71.73%| 93.88% |779ms|
| SLANet(ours) | 76.2%| 95.85% |766ms|
The performance indicators are explained as follows:
diff --git a/ppstructure/table/README_ch.md b/ppstructure/table/README_ch.md
index cc73f8bce..cdbea5810 100644
--- a/ppstructure/table/README_ch.md
+++ b/ppstructure/table/README_ch.md
@@ -39,7 +39,7 @@
|算法|Acc|[TEDS(Tree-Edit-Distance-based Similarity)](https://github.com/ibm-aur-nlp/PubTabNet/tree/master/src)|Speed|
| --- | --- | --- | ---|
| EDD[2] |x| 88.3% |x|
-| TableRec-RARE(ours) |73.8%| 95.3% |1550ms|
+| TableRec-RARE(ours) | 71.73%| 93.88% |779ms|
| SLANet(ours) | 76.2%| 95.85% |766ms|
性能指标解释如下:
From 40a3a1cfc21f95bf68be4b000356f7798a11aa83 Mon Sep 17 00:00:00 2001
From: WenmuZhou <572459439@qq.com>
Date: Sat, 20 Aug 2022 08:48:01 +0000
Subject: [PATCH 04/13] update metric
---
ppstructure/table/README.md | 2 +-
ppstructure/table/README_ch.md | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/ppstructure/table/README.md b/ppstructure/table/README.md
index 3bf868561..a5d0da3cc 100644
--- a/ppstructure/table/README.md
+++ b/ppstructure/table/README.md
@@ -34,7 +34,7 @@ We evaluated the algorithm on the PubTabNet[1] eval dataset, and the
| --- | --- | --- | ---|
| EDD[2] |x| 88.3 |x|
| TableRec-RARE(ours) | 71.73%| 93.88% |779ms|
-| SLANet(ours) | 76.2%| 95.85% |766ms|
+| SLANet(ours) | 76.31%| 95.89%|766ms|
The performance indicators are explained as follows:
- Acc: The accuracy of the table structure in each image, a wrong token is considered an error.
diff --git a/ppstructure/table/README_ch.md b/ppstructure/table/README_ch.md
index cdbea5810..e83c81bef 100644
--- a/ppstructure/table/README_ch.md
+++ b/ppstructure/table/README_ch.md
@@ -40,7 +40,7 @@
| --- | --- | --- | ---|
| EDD[2] |x| 88.3% |x|
| TableRec-RARE(ours) | 71.73%| 93.88% |779ms|
-| SLANet(ours) | 76.2%| 95.85% |766ms|
+| SLANet(ours) |76.31%| 95.89%|766ms|
性能指标解释如下:
- Acc: 模型对每张图像里表格结构的识别准确率,错一个token就算错误。
From 7e4e87dd6db8704cc209944cbd2152dc78e202eb Mon Sep 17 00:00:00 2001
From: Evezerest <50011306+Evezerest@users.noreply.github.com>
Date: Sun, 21 Aug 2022 18:03:57 +0800
Subject: [PATCH 05/13] cherry pick PRs from community (#7273)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
* Merge pull request #6824 from ChenNima/release/2.5-kie-save-res
[kie]add write_kie_result to kie infer tool
* Merge pull request #6677 from TonyJiangWJ/release/2.5
修复内存泄露问题
* Update native.cpp (#6650)
fix issue 6640
* Merge pull request #6625 from ynjang/ynjang
update sorted_boxes
* fix DeprecationWarning, (#6604)
DeprecationWarning: The binary mode of fromstring is deprecated, as it behaves surprisingly on unicode inputs. Use frombuffer instead
* Merge pull request #6585 from maxbachmann/release/2.5
replace GPL licensed components
* Merge pull request #6575 from Eling486/release/2.5
update win doc
* Merge pull request #6477 from MikoyChinese/fix-copy-paste
Fix copy_paste no texts augment.
* Merge pull request #6361 from mohamadmansourX/patch-9
Update README_en.md
Co-authored-by: Double_V
Co-authored-by: shawn <1021362695@qq.com>
Co-authored-by: paopjian <672034519@qq.com>
---
.../android_demo/app/src/main/cpp/native.cpp | 4 +-
.../lite/demo/ocr/OCRPredictorNative.java | 2 +-
deploy/cpp_infer/docs/windows_vs2019_build.md | 6 ++-
deploy/slim/quantization/README_en.md | 2 +-
ppocr/data/imaug/copy_paste.py | 6 ++-
ppocr/metrics/rec_metric.py | 5 +--
.../kie/tools/eval_with_label_end2end.py | 2 +-
.../table/table_metric/table_metric.py | 41 ++-----------------
requirements.txt | 2 +-
tools/infer/predict_system.py | 13 +++---
tools/infer/utility.py | 2 +-
tools/infer_kie.py | 27 +++++++++++-
12 files changed, 56 insertions(+), 56 deletions(-)
diff --git a/deploy/android_demo/app/src/main/cpp/native.cpp b/deploy/android_demo/app/src/main/cpp/native.cpp
index ced932556..4961e5ecf 100644
--- a/deploy/android_demo/app/src/main/cpp/native.cpp
+++ b/deploy/android_demo/app/src/main/cpp/native.cpp
@@ -47,7 +47,7 @@ str_to_cpu_mode(const std::string &cpu_mode) {
std::string upper_key;
std::transform(cpu_mode.cbegin(), cpu_mode.cend(), upper_key.begin(),
::toupper);
- auto index = cpu_mode_map.find(upper_key);
+ auto index = cpu_mode_map.find(upper_key.c_str());
if (index == cpu_mode_map.end()) {
LOGE("cpu_mode not found %s", upper_key.c_str());
return paddle::lite_api::LITE_POWER_HIGH;
@@ -116,4 +116,4 @@ Java_com_baidu_paddle_lite_demo_ocr_OCRPredictorNative_release(
ppredictor::OCR_PPredictor *ppredictor =
(ppredictor::OCR_PPredictor *)java_pointer;
delete ppredictor;
-}
\ No newline at end of file
+}
diff --git a/deploy/android_demo/app/src/main/java/com/baidu/paddle/lite/demo/ocr/OCRPredictorNative.java b/deploy/android_demo/app/src/main/java/com/baidu/paddle/lite/demo/ocr/OCRPredictorNative.java
index 622da2a3f..41fa183de 100644
--- a/deploy/android_demo/app/src/main/java/com/baidu/paddle/lite/demo/ocr/OCRPredictorNative.java
+++ b/deploy/android_demo/app/src/main/java/com/baidu/paddle/lite/demo/ocr/OCRPredictorNative.java
@@ -54,7 +54,7 @@ public class OCRPredictorNative {
}
public void destory() {
- if (nativePointer > 0) {
+ if (nativePointer != 0) {
release(nativePointer);
nativePointer = 0;
}
diff --git a/deploy/cpp_infer/docs/windows_vs2019_build.md b/deploy/cpp_infer/docs/windows_vs2019_build.md
index 4f391d925..bcaefa46f 100644
--- a/deploy/cpp_infer/docs/windows_vs2019_build.md
+++ b/deploy/cpp_infer/docs/windows_vs2019_build.md
@@ -109,8 +109,10 @@ CUDA_LIB、CUDNN_LIB、TENSORRT_DIR、WITH_GPU、WITH_TENSORRT
运行之前,将下面文件拷贝到`build/Release/`文件夹下
1. `paddle_inference/paddle/lib/paddle_inference.dll`
-2. `opencv/build/x64/vc15/bin/opencv_world455.dll`
-3. 如果使用openblas版本的预测库还需要拷贝 `paddle_inference/third_party/install/openblas/lib/openblas.dll`
+2. `paddle_inference/third_party/install/onnxruntime/lib/onnxruntime.dll`
+3. `paddle_inference/third_party/install/paddle2onnx/lib/paddle2onnx.dll`
+4. `opencv/build/x64/vc15/bin/opencv_world455.dll`
+5. 如果使用openblas版本的预测库还需要拷贝 `paddle_inference/third_party/install/openblas/lib/openblas.dll`
### Step4: 预测
diff --git a/deploy/slim/quantization/README_en.md b/deploy/slim/quantization/README_en.md
index 33b2c4784..c6796ae9d 100644
--- a/deploy/slim/quantization/README_en.md
+++ b/deploy/slim/quantization/README_en.md
@@ -73,4 +73,4 @@ python deploy/slim/quantization/export_model.py -c configs/det/ch_ppocr_v2.0/ch_
The numerical range of the quantized model parameters derived from the above steps is still FP32, but the numerical range of the parameters is int8.
The derived model can be converted through the `opt tool` of PaddleLite.
-For quantitative model deployment, please refer to [Mobile terminal model deployment](../../lite/readme_en.md)
+For quantitative model deployment, please refer to [Mobile terminal model deployment](../../lite/readme.md)
diff --git a/ppocr/data/imaug/copy_paste.py b/ppocr/data/imaug/copy_paste.py
index 0b3386c89..79343da60 100644
--- a/ppocr/data/imaug/copy_paste.py
+++ b/ppocr/data/imaug/copy_paste.py
@@ -35,10 +35,12 @@ class CopyPaste(object):
point_num = data['polys'].shape[1]
src_img = data['image']
src_polys = data['polys'].tolist()
+ src_texts = data['texts']
src_ignores = data['ignore_tags'].tolist()
ext_data = data['ext_data'][0]
ext_image = ext_data['image']
ext_polys = ext_data['polys']
+ ext_texts = ext_data['texts']
ext_ignores = ext_data['ignore_tags']
indexs = [i for i in range(len(ext_ignores)) if not ext_ignores[i]]
@@ -53,7 +55,7 @@ class CopyPaste(object):
src_img = cv2.cvtColor(src_img, cv2.COLOR_BGR2RGB)
ext_image = cv2.cvtColor(ext_image, cv2.COLOR_BGR2RGB)
src_img = Image.fromarray(src_img).convert('RGBA')
- for poly, tag in zip(select_polys, select_ignores):
+ for idx, poly, tag in zip(select_idxs, select_polys, select_ignores):
box_img = get_rotate_crop_image(ext_image, poly)
src_img, box = self.paste_img(src_img, box_img, src_polys)
@@ -62,6 +64,7 @@ class CopyPaste(object):
for _ in range(len(box), point_num):
box.append(box[-1])
src_polys.append(box)
+ src_texts.append(ext_texts[idx])
src_ignores.append(tag)
src_img = cv2.cvtColor(np.array(src_img), cv2.COLOR_RGB2BGR)
h, w = src_img.shape[:2]
@@ -70,6 +73,7 @@ class CopyPaste(object):
src_polys[:, :, 1] = np.clip(src_polys[:, :, 1], 0, h)
data['image'] = src_img
data['polys'] = src_polys
+ data['texts'] = src_texts
data['ignore_tags'] = np.array(src_ignores)
return data
diff --git a/ppocr/metrics/rec_metric.py b/ppocr/metrics/rec_metric.py
index d858ae28e..986397811 100644
--- a/ppocr/metrics/rec_metric.py
+++ b/ppocr/metrics/rec_metric.py
@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import Levenshtein
+from rapidfuzz.distance import Levenshtein
import string
@@ -46,8 +46,7 @@ class RecMetric(object):
if self.is_filter:
pred = self._normalize_text(pred)
target = self._normalize_text(target)
- norm_edit_dis += Levenshtein.distance(pred, target) / max(
- len(pred), len(target), 1)
+ norm_edit_dis += Levenshtein.normalized_distance(pred, target)
if pred == target:
correct_num += 1
all_num += 1
diff --git a/ppstructure/kie/tools/eval_with_label_end2end.py b/ppstructure/kie/tools/eval_with_label_end2end.py
index b13ffb568..b0fd84363 100644
--- a/ppstructure/kie/tools/eval_with_label_end2end.py
+++ b/ppstructure/kie/tools/eval_with_label_end2end.py
@@ -20,7 +20,7 @@ from shapely.geometry import Polygon
import numpy as np
from collections import defaultdict
import operator
-import Levenshtein
+from rapidfuzz.distance import Levenshtein
import argparse
import json
import copy
diff --git a/ppstructure/table/table_metric/table_metric.py b/ppstructure/table/table_metric/table_metric.py
index 9aca98ad7..923a9c007 100755
--- a/ppstructure/table/table_metric/table_metric.py
+++ b/ppstructure/table/table_metric/table_metric.py
@@ -9,7 +9,7 @@
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Apache 2.0 License for more details.
-import distance
+from rapidfuzz.distance import Levenshtein
from apted import APTED, Config
from apted.helpers import Tree
from lxml import etree, html
@@ -39,17 +39,6 @@ class TableTree(Tree):
class CustomConfig(Config):
- @staticmethod
- def maximum(*sequences):
- """Get maximum possible value
- """
- return max(map(len, sequences))
-
- def normalized_distance(self, *sequences):
- """Get distance from 0 to 1
- """
- return float(distance.levenshtein(*sequences)) / self.maximum(*sequences)
-
def rename(self, node1, node2):
"""Compares attributes of trees"""
#print(node1.tag)
@@ -58,23 +47,12 @@ class CustomConfig(Config):
if node1.tag == 'td':
if node1.content or node2.content:
#print(node1.content, )
- return self.normalized_distance(node1.content, node2.content)
+ return Levenshtein.normalized_distance(node1.content, node2.content)
return 0.
class CustomConfig_del_short(Config):
- @staticmethod
- def maximum(*sequences):
- """Get maximum possible value
- """
- return max(map(len, sequences))
-
- def normalized_distance(self, *sequences):
- """Get distance from 0 to 1
- """
- return float(distance.levenshtein(*sequences)) / self.maximum(*sequences)
-
def rename(self, node1, node2):
"""Compares attributes of trees"""
if (node1.tag != node2.tag) or (node1.colspan != node2.colspan) or (node1.rowspan != node2.rowspan):
@@ -90,21 +68,10 @@ class CustomConfig_del_short(Config):
node1_content = ['####']
if len(node2_content) < 3:
node2_content = ['####']
- return self.normalized_distance(node1_content, node2_content)
+ return Levenshtein.normalized_distance(node1_content, node2_content)
return 0.
class CustomConfig_del_block(Config):
- @staticmethod
- def maximum(*sequences):
- """Get maximum possible value
- """
- return max(map(len, sequences))
-
- def normalized_distance(self, *sequences):
- """Get distance from 0 to 1
- """
- return float(distance.levenshtein(*sequences)) / self.maximum(*sequences)
-
def rename(self, node1, node2):
"""Compares attributes of trees"""
if (node1.tag != node2.tag) or (node1.colspan != node2.colspan) or (node1.rowspan != node2.rowspan):
@@ -120,7 +87,7 @@ class CustomConfig_del_block(Config):
while ' ' in node2_content:
print(node2_content.index(' '))
node2_content.pop(node2_content.index(' '))
- return self.normalized_distance(node1_content, node2_content)
+ return Levenshtein.normalized_distance(node1_content, node2_content)
return 0.
class TEDS(object):
diff --git a/requirements.txt b/requirements.txt
index b15176db3..976d29192 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,7 +6,7 @@ lmdb
tqdm
numpy
visualdl
-python-Levenshtein
+rapidfuzz
opencv-contrib-python==4.4.0.46
cython
lxml
diff --git a/tools/infer/predict_system.py b/tools/infer/predict_system.py
index 252ed1aaf..e0f2c41fa 100755
--- a/tools/infer/predict_system.py
+++ b/tools/infer/predict_system.py
@@ -120,11 +120,14 @@ def sorted_boxes(dt_boxes):
_boxes = list(sorted_boxes)
for i in range(num_boxes - 1):
- if abs(_boxes[i + 1][0][1] - _boxes[i][0][1]) < 10 and \
- (_boxes[i + 1][0][0] < _boxes[i][0][0]):
- tmp = _boxes[i]
- _boxes[i] = _boxes[i + 1]
- _boxes[i + 1] = tmp
+ for j in range(i, 0, -1):
+ if abs(_boxes[j + 1][0][1] - _boxes[j][0][1]) < 10 and \
+ (_boxes[j + 1][0][0] < _boxes[j][0][0]):
+ tmp = _boxes[j]
+ _boxes[j] = _boxes[j + 1]
+ _boxes[j + 1] = tmp
+ else:
+ break
return _boxes
diff --git a/tools/infer/utility.py b/tools/infer/utility.py
index 1355ca62e..a547bbdba 100644
--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
@@ -549,7 +549,7 @@ def text_visual(texts,
def base64_to_cv2(b64str):
import base64
data = base64.b64decode(b64str.encode('utf8'))
- data = np.fromstring(data, np.uint8)
+ data = np.frombuffer(data, np.uint8)
data = cv2.imdecode(data, cv2.IMREAD_COLOR)
return data
diff --git a/tools/infer_kie.py b/tools/infer_kie.py
index 346e2e0ae..9375434cc 100755
--- a/tools/infer_kie.py
+++ b/tools/infer_kie.py
@@ -88,6 +88,29 @@ def draw_kie_result(batch, node, idx_to_cls, count):
cv2.imwrite(save_path, vis_img)
logger.info("The Kie Image saved in {}".format(save_path))
+def write_kie_result(fout, node, data):
+ """
+ Write infer result to output file, sorted by the predict label of each line.
+ The format keeps the same as the input with additional score attribute.
+ """
+ import json
+ label = data['label']
+ annotations = json.loads(label)
+ max_value, max_idx = paddle.max(node, -1), paddle.argmax(node, -1)
+ node_pred_label = max_idx.numpy().tolist()
+ node_pred_score = max_value.numpy().tolist()
+ res = []
+ for i, label in enumerate(node_pred_label):
+ pred_score = '{:.2f}'.format(node_pred_score[i])
+ pred_res = {
+ 'label': label,
+ 'transcription': annotations[i]['transcription'],
+ 'score': pred_score,
+ 'points': annotations[i]['points'],
+ }
+ res.append(pred_res)
+ res.sort(key=lambda x: x['label'])
+ fout.writelines([json.dumps(res, ensure_ascii=False) + '\n'])
def main():
global_config = config['Global']
@@ -114,7 +137,7 @@ def main():
warmup_times = 0
count_t = []
- with open(save_res_path, "wb") as fout:
+ with open(save_res_path, "w") as fout:
with open(config['Global']['infer_img'], "rb") as f:
lines = f.readlines()
for index, data_line in enumerate(lines):
@@ -139,6 +162,8 @@ def main():
node = F.softmax(node, -1)
count_t.append(time.time() - st)
draw_kie_result(batch, node, idx_to_cls, index)
+ write_kie_result(fout, node, data)
+ fout.close()
logger.info("success!")
logger.info("It took {} s for predict {} images.".format(
np.sum(count_t), len(count_t)))
From 988f6cadac214b1a25cfeaeeadf89547545cd2a0 Mon Sep 17 00:00:00 2001
From: littletomatodonkey
Date: Mon, 22 Aug 2022 09:52:23 +0800
Subject: [PATCH 06/13] fix kie doc (#7275)
* fix kie doc
* fix en
---
.../utils/dict/kie_dict/xfund_class_list.txt | 4 +++
ppstructure/docs/inference.md | 22 ++++++------
ppstructure/docs/inference_en.md | 21 ++++++-----
ppstructure/docs/installation.md | 15 +++-----
ppstructure/docs/installation_en.md | 14 +++-----
ppstructure/docs/models_list_en.md | 35 +++++++++++--------
ppstructure/docs/quickstart.md | 22 ++++++------
ppstructure/docs/quickstart_en.md | 24 ++++++-------
ppstructure/kie/README.md | 2 +-
9 files changed, 81 insertions(+), 78 deletions(-)
create mode 100644 ppocr/utils/dict/kie_dict/xfund_class_list.txt
diff --git a/ppocr/utils/dict/kie_dict/xfund_class_list.txt b/ppocr/utils/dict/kie_dict/xfund_class_list.txt
new file mode 100644
index 000000000..faded9f9b
--- /dev/null
+++ b/ppocr/utils/dict/kie_dict/xfund_class_list.txt
@@ -0,0 +1,4 @@
+OTHER
+QUESTION
+ANSWER
+HEADER
diff --git a/ppstructure/docs/inference.md b/ppstructure/docs/inference.md
index b05090076..3f92a6046 100644
--- a/ppstructure/docs/inference.md
+++ b/ppstructure/docs/inference.md
@@ -4,7 +4,7 @@
- [1.1 版面分析+表格识别](#1.1)
- [1.2 版面分析](#1.2)
- [1.3 表格识别](#1.3)
-- [2. DocVQA](#2)
+- [2. 关键信息抽取](#2)
## 1. Structure
@@ -61,20 +61,22 @@ python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv2_det_slim_quant_i
运行完成后,每张图片会在`output`字段指定的目录下的`structure`目录下有一个同名目录,表格会存储为一个excel,excel文件名为`[0,0,img_h,img_w]`。
-## 2. DocVQA
+## 2. 关键信息抽取
```bash
cd ppstructure
-# 下载模型
mkdir inference && cd inference
-# 下载SER xfun 模型并解压
-wget https://paddleocr.bj.bcebos.com/pplayout/PP-Layout_v1.0_ser_pretrained.tar && tar xf PP-Layout_v1.0_ser_pretrained.tar
+# 下载SER XFUND 模型并解压
+wget https://paddleocr.bj.bcebos.com/ppstructure/models/vi_layoutxlm/ser_vi_layoutxlm_xfund_infer.tar && tar -xf ser_vi_layoutxlm_xfund_infer.tar
cd ..
-
-python3 predict_system.py --model_name_or_path=kie/PP-Layout_v1.0_ser_pretrained/ \
- --mode=kie \
- --image_dir=kie/images/input/zh_val_0.jpg \
- --vis_font_path=../doc/fonts/simfang.ttf
+python3 kie/predict_kie_token_ser.py \
+ --kie_algorithm=LayoutXLM \
+ --ser_model_dir=../inference/ser_vi_layoutxlm_xfund_infer \
+ --image_dir=./docs/kie/input/zh_val_42.jpg \
+ --ser_dict_path=../ppocr/utils/dict/kie_dict/xfund_class_list.txt \
+ --vis_font_path=../doc/fonts/simfang.ttf \
+ --ocr_order_method="tb-yx"
```
+
运行完成后,每张图片会在`output`字段指定的目录下的`kie`目录下存放可视化之后的图片,图片名和输入图片名一致。
diff --git a/ppstructure/docs/inference_en.md b/ppstructure/docs/inference_en.md
index ad16f048e..126878378 100644
--- a/ppstructure/docs/inference_en.md
+++ b/ppstructure/docs/inference_en.md
@@ -4,7 +4,7 @@
- [1.1 layout analysis + table recognition](#1.1)
- [1.2 layout analysis](#1.2)
- [1.3 table recognition](#1.3)
-- [2. DocVQA](#2)
+- [2. KIE](#2)
## 1. Structure
@@ -63,19 +63,22 @@ python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv2_det_slim_quant_i
After the operation is completed, each image will have a directory with the same name in the `structure` directory under the directory specified by the `output` field. Each table in the image will be stored as an excel. The filename of excel is their coordinates in the image.
-## 2. DocVQA
+## 2. KIE
```bash
cd ppstructure
-# download model
mkdir inference && cd inference
-wget https://paddleocr.bj.bcebos.com/pplayout/PP-Layout_v1.0_ser_pretrained.tar && tar xf PP-Layout_v1.0_ser_pretrained.tar
+# download model
+wget https://paddleocr.bj.bcebos.com/ppstructure/models/vi_layoutxlm/ser_vi_layoutxlm_xfund_infer.tar && tar -xf ser_vi_layoutxlm_xfund_infer.tar
cd ..
-
-python3 predict_system.py --model_name_or_path=kie/PP-Layout_v1.0_ser_pretrained/ \
- --mode=kie \
- --image_dir=kie/images/input/zh_val_0.jpg \
- --vis_font_path=../doc/fonts/simfang.ttf
+python3 kie/predict_kie_token_ser.py \
+ --kie_algorithm=LayoutXLM \
+ --ser_model_dir=../inference/ser_vi_layoutxlm_xfund_infer \
+ --image_dir=./docs/kie/input/zh_val_42.jpg \
+ --ser_dict_path=../ppocr/utils/dict/kie_dict/xfund_class_list.txt \
+ --vis_font_path=../doc/fonts/simfang.ttf \
+ --ocr_order_method="tb-yx"
```
+
After the operation is completed, each image will store the visualized image in the `kie` directory under the directory specified by the `output` field, and the image name is the same as the input image name.
diff --git a/ppstructure/docs/installation.md b/ppstructure/docs/installation.md
index 3649e729d..063558023 100644
--- a/ppstructure/docs/installation.md
+++ b/ppstructure/docs/installation.md
@@ -1,7 +1,7 @@
- [快速安装](#快速安装)
- [1. PaddlePaddle 和 PaddleOCR](#1-paddlepaddle-和-paddleocr)
- [2. 安装其他依赖](#2-安装其他依赖)
- - [2.1 VQA所需依赖](#21--kie所需依赖)
+ - [2.1 KIE所需依赖](#21-kie所需依赖)
# 快速安装
@@ -11,16 +11,11 @@
## 2. 安装其他依赖
-### 2.1 VQA所需依赖
+### 2.1 KIE所需依赖
+
* paddleocr
```bash
-pip3 install paddleocr
-```
-
-* PaddleNLP
-```bash
-git clone https://github.com/PaddlePaddle/PaddleNLP -b develop
-cd PaddleNLP
-pip3 install -e .
+pip install paddleocr -U
+pip install -r ./kie/requirements.txt
```
diff --git a/ppstructure/docs/installation_en.md b/ppstructure/docs/installation_en.md
index 02b02db0c..de8bb5f6f 100644
--- a/ppstructure/docs/installation_en.md
+++ b/ppstructure/docs/installation_en.md
@@ -2,7 +2,7 @@
- [1. PaddlePaddle 和 PaddleOCR](#1)
- [2. Install other dependencies](#2)
- - [2.1 VQA](#21)
+ - [2.1 KIE](#21)
@@ -14,17 +14,11 @@ Please refer to [PaddleOCR installation documentation](../../doc/doc_en/installa
## 2. Install other dependencies
-### 2.1 VQA
+### 2.1 KIE
* paddleocr
```bash
-pip3 install paddleocr
-```
-
-* PaddleNLP
-```bash
-git clone https://github.com/PaddlePaddle/PaddleNLP -b develop
-cd PaddleNLP
-pip3 install -e .
+pip install paddleocr -U
+pip install -r ./kie/requirements.txt
```
diff --git a/ppstructure/docs/models_list_en.md b/ppstructure/docs/models_list_en.md
index 7ba1d3046..cb6857f62 100644
--- a/ppstructure/docs/models_list_en.md
+++ b/ppstructure/docs/models_list_en.md
@@ -4,8 +4,7 @@
- [2. OCR and Table Recognition](#2-ocr-and-table-recognition)
- [2.1 OCR](#21-ocr)
- [2.2 Table Recognition](#22-table-recognition)
-- [3. VQA](#3-kie)
-- [4. KIE](#4-kie)
+- [3. KIE](#3-kie)
@@ -40,19 +39,25 @@ If you need to use other OCR models, you can download the model in [PP-OCR model
|ch_ppstructure_mobile_v2.0_SLANet|Chinese table recognition model trained on PubTabNet dataset based on SLANet|9.3M|[inference model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_train.tar) |
-## 3. VQA
+## 3. KIE
-|model| description |inference model size|download|
-| --- |----------------------------------------------------------------| --- | --- |
-|ser_LayoutXLM_xfun_zh| SER model trained on xfun Chinese dataset based on LayoutXLM |1.4G|[inference model](https://paddleocr.bj.bcebos.com/pplayout/ser_LayoutXLM_xfun_zh_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/pplayout/ser_LayoutXLM_xfun_zh.tar) |
-|re_LayoutXLM_xfun_zh| Re model trained on xfun Chinese dataset based on LayoutXLM |1.4G|[inference model coming soon]() / [trained model](https://paddleocr.bj.bcebos.com/pplayout/re_LayoutXLM_xfun_zh.tar) |
-|ser_LayoutLMv2_xfun_zh| SER model trained on xfun Chinese dataset based on LayoutXLMv2 |778M|[inference model](https://paddleocr.bj.bcebos.com/pplayout/ser_LayoutLMv2_xfun_zh_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/pplayout/ser_LayoutLMv2_xfun_zh.tar) |
-|re_LayoutLMv2_xfun_zh| Re model trained on xfun Chinese dataset based on LayoutXLMv2 |765M|[inference model coming soon]() / [trained model](https://paddleocr.bj.bcebos.com/pplayout/re_LayoutLMv2_xfun_zh.tar) |
-|ser_LayoutLM_xfun_zh| SER model trained on xfun Chinese dataset based on LayoutLM |430M|[inference model](https://paddleocr.bj.bcebos.com/pplayout/ser_LayoutLM_xfun_zh_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/pplayout/ser_LayoutLM_xfun_zh.tar) |
+On XFUND_zh dataset, Accuracy and time cost of different models on V100 GPU are as follows.
-
-## 4. KIE
+|Model|Backbone|Task|Config|Hmean|Time cost(ms)|Download link|
+| --- | --- | --- | --- | --- | --- |--- |
+|VI-LayoutXLM| VI-LayoutXLM-base | SER | [ser_vi_layoutxlm_xfund_zh_udml.yml](../../configs/kie/vi_layoutxlm/ser_vi_layoutxlm_xfund_zh_udml.yml)|**93.19%**| 15.49| [trained model](https://paddleocr.bj.bcebos.com/ppstructure/models/vi_layoutxlm/ser_vi_layoutxlm_xfund_pretrained.tar)|
+|LayoutXLM| LayoutXLM-base | SER | [ser_layoutxlm_xfund_zh.yml](../../configs/kie/layoutlm_series/ser_layoutxlm_xfund_zh.yml)|90.38%| 19.49 |[trained model](https://paddleocr.bj.bcebos.com/pplayout/ser_LayoutXLM_xfun_zh.tar)|
+|LayoutLM| LayoutLM-base | SER | [ser_layoutlm_xfund_zh.yml](../../configs/kie/layoutlm_series/ser_layoutlm_xfund_zh.yml)|77.31%|-|[trained model](https://paddleocr.bj.bcebos.com/pplayout/ser_LayoutLM_xfun_zh.tar)|
+|LayoutLMv2| LayoutLMv2-base | SER | [ser_layoutlmv2_xfund_zh.yml](../../configs/kie/layoutlm_series/ser_layoutlmv2_xfund_zh.yml)|85.44%|31.46|[trained model](https://paddleocr.bj.bcebos.com/pplayout/ser_LayoutLMv2_xfun_zh.tar)|
+|VI-LayoutXLM| VI-LayoutXLM-base | RE | [re_vi_layoutxlm_xfund_zh_udml.yml](../../configs/kie/vi_layoutxlm/re_vi_layoutxlm_xfund_zh_udml.yml)|**83.92%**|15.49|[trained model](https://paddleocr.bj.bcebos.com/ppstructure/models/vi_layoutxlm/re_vi_layoutxlm_xfund_pretrained.tar)|
+|LayoutXLM| LayoutXLM-base | RE | [re_layoutxlm_xfund_zh.yml](../../configs/kie/layoutlm_series/re_layoutxlm_xfund_zh.yml)|74.83%|19.49|[trained model](https://paddleocr.bj.bcebos.com/pplayout/re_LayoutXLM_xfun_zh.tar)|
+|LayoutLMv2| LayoutLMv2-base | RE | [re_layoutlmv2_xfund_zh.yml](../../configs/kie/layoutlm_series/re_layoutlmv2_xfund_zh.yml)|67.77%|31.46|[trained model](https://paddleocr.bj.bcebos.com/pplayout/re_LayoutLMv2_xfun_zh.tar)|
-|model|description|model size|download|
-| --- | --- | --- | --- |
-|SDMGR|Key Information Extraction Model|78M|[inference model coming soon]() / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.1/kie/kie_vgg16.tar)|
+* Note: The above time cost information just considers inference time without preprocess or postprocess, test environment: `V100 GPU + CUDA 10.2 + CUDNN 8.1.1 + TRT 7.2.3.4`
+
+
+On wildreceipt dataset, the algorithm result is as follows:
+
+|Model|Backbone|Config|Hmean|Download link|
+| --- | --- | --- | --- | --- |
+|SDMGR|VGG6|[configs/kie/sdmgr/kie_unet_sdmgr.yml](../../configs/kie/sdmgr/kie_unet_sdmgr.yml)|86.7%|[trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.1/kie/kie_vgg16.tar)|
diff --git a/ppstructure/docs/quickstart.md b/ppstructure/docs/quickstart.md
index 9a538a6f1..517703d7b 100644
--- a/ppstructure/docs/quickstart.md
+++ b/ppstructure/docs/quickstart.md
@@ -7,16 +7,16 @@
- [2.1.2 版面分析+表格识别](#212-版面分析表格识别)
- [2.1.3 版面分析](#213-版面分析)
- [2.1.4 表格识别](#214-表格识别)
- - [2.1.5 DocVQA](#215-dockie)
+ - [2.1.5 关键信息抽取](#215-关键信息抽取)
- [2.2 代码使用](#22-代码使用)
- [2.2.1 图像方向分类版面分析表格识别](#221-图像方向分类版面分析表格识别)
- [2.2.2 版面分析+表格识别](#222-版面分析表格识别)
- [2.2.3 版面分析](#223-版面分析)
- [2.2.4 表格识别](#224-表格识别)
- - [2.2.5 DocVQA](#225-dockie)
+ - [2.2.5 关键信息抽取](#225-关键信息抽取)
- [2.3 返回结果说明](#23-返回结果说明)
- [2.3.1 版面分析+表格识别](#231-版面分析表格识别)
- - [2.3.2 DocVQA](#232-dockie)
+ - [2.3.2 关键信息抽取](#232-关键信息抽取)
- [2.4 参数说明](#24-参数说明)
@@ -26,8 +26,8 @@
```bash
# 安装 paddleocr,推荐使用2.5+版本
pip3 install "paddleocr>=2.5"
-# 安装 DocVQA依赖包paddlenlp(如不需要DocVQA功能,可跳过)
-pip install paddlenlp
+# 安装 关键信息抽取 依赖包(如不需要KIE功能,可跳过)
+pip install -r kie/requirements.txt
```
@@ -62,9 +62,9 @@ paddleocr --image_dir=PaddleOCR/ppstructure/docs/table/table.jpg --type=structur
```
-#### 2.1.5 DocVQA
+#### 2.1.5 关键信息抽取
-请参考:[文档视觉问答](../kie/README.md)。
+请参考:[关键信息抽取教程](../kie/README_ch.md)。
### 2.2 代码使用
@@ -170,9 +170,9 @@ for line in result:
```
-#### 2.2.5 DocVQA
+#### 2.2.5 关键信息抽取
-请参考:[文档视觉问答](../kie/README.md)。
+请参考:[关键信息抽取教程](../kie/README_ch.md)。
### 2.3 返回结果说明
@@ -208,9 +208,9 @@ dict 里各个字段说明如下
```
-#### 2.3.2 DocVQA
+#### 2.3.2 关键信息抽取
-请参考:[文档视觉问答](../kie/README.md)。
+请参考:[关键信息抽取教程](../kie/README_ch.md)。
### 2.4 参数说明
diff --git a/ppstructure/docs/quickstart_en.md b/ppstructure/docs/quickstart_en.md
index cf9d12ff9..3a4e7a2d6 100644
--- a/ppstructure/docs/quickstart_en.md
+++ b/ppstructure/docs/quickstart_en.md
@@ -7,16 +7,16 @@
- [2.1.2 layout analysis + table recognition](#212-layout-analysis--table-recognition)
- [2.1.3 layout analysis](#213-layout-analysis)
- [2.1.4 table recognition](#214-table-recognition)
- - [2.1.5 DocVQA](#215-dockie)
+ - [2.1.5 Key Information Extraction](#215-Key-Information-Extraction)
- [2.2 Use by code](#22-use-by-code)
- [2.2.1 image orientation + layout analysis + table recognition](#221-image-orientation--layout-analysis--table-recognition)
- [2.2.2 layout analysis + table recognition](#222-layout-analysis--table-recognition)
- [2.2.3 layout analysis](#223-layout-analysis)
- [2.2.4 table recognition](#224-table-recognition)
- - [2.2.5 DocVQA](#225-dockie)
+ - [2.2.5 Key Information Extraction](#225-Key-Information-Extraction)
- [2.3 Result description](#23-result-description)
- [2.3.1 layout analysis + table recognition](#231-layout-analysis--table-recognition)
- - [2.3.2 DocVQA](#232-dockie)
+ - [2.3.2 Key Information Extraction](#232-Key-Information-Extraction)
- [2.4 Parameter Description](#24-parameter-description)
@@ -26,8 +26,8 @@
```bash
# Install paddleocr, version 2.5+ is recommended
pip3 install "paddleocr>=2.5"
-# Install the DocVQA dependency package paddlenlp (if you do not use the DocVQA, you can skip it)
-pip install paddlenlp
+# Install the KIE dependency packages (if you do not use the KIE, you can skip it)
+pip install -r kie/requirements.txt
```
@@ -62,9 +62,9 @@ paddleocr --image_dir=PaddleOCR/ppstructure/docs/table/table.jpg --type=structur
```
-#### 2.1.5 DocVQA
+#### 2.1.5 Key Information Extraction
-Please refer to: [Documentation Visual Q&A](../kie/README.md) .
+Please refer to: [Key Information Extraction](../kie/README.md) .
### 2.2 Use by code
@@ -120,7 +120,7 @@ for line in result:
from PIL import Image
-font_path = 'PaddleOCR/doc/fonts/simfang.ttf' # PaddleOCR下提供字体包
+font_path = 'PaddleOCR/doc/fonts/simfang.ttf' # font provieded in PaddleOCR
image = Image.open(img_path).convert('RGB')
im_show = draw_structure_result(image, result,font_path=font_path)
im_show = Image.fromarray(im_show)
@@ -170,9 +170,9 @@ for line in result:
```
-#### 2.2.5 DocVQA
+#### 2.2.5 Key Information Extraction
-Please refer to: [Documentation Visual Q&A](../kie/README.md) .
+Please refer to: [Key Information Extraction](../kie/README.md) .
### 2.3 Result description
@@ -208,9 +208,9 @@ After the recognition is completed, each image will have a directory with the sa
```
-#### 2.3.2 DocVQA
+#### 2.3.2 Key Information Extraction
-Please refer to: [Documentation Visual Q&A](../kie/README.md) .
+Please refer to: [Key Information Extraction](../kie/README.md) .
### 2.4 Parameter Description
diff --git a/ppstructure/kie/README.md b/ppstructure/kie/README.md
index 9e1b72e77..adb19a3ca 100644
--- a/ppstructure/kie/README.md
+++ b/ppstructure/kie/README.md
@@ -246,7 +246,7 @@ For training, evaluation and inference tutorial for text recognition models, ple
If you want to finish the KIE tasks in your scene, and don't know what to prepare, please refer to [End cdoc](../../doc/doc_en/recognition.md).
-关于怎样在自己的场景中完成关键信息抽取任务,请参考:[Guide to End-to-end KIE](./how_to_do_kie_en.md)。
+To complete the key information extraction task in your own scenario from data preparation to model selection, please refer to: [Guide to End-to-end KIE](./how_to_do_kie_en.md)。
## 5. Reference
From 66f4ae0261e42ca94f6ba19f73f76bd452873d98 Mon Sep 17 00:00:00 2001
From: WenmuZhou <572459439@qq.com>
Date: Mon, 22 Aug 2022 02:25:24 +0000
Subject: [PATCH 07/13] fix is_nlp_model not define error in save_model
---
ppocr/utils/save_load.py | 3 +++
1 file changed, 3 insertions(+)
diff --git a/ppocr/utils/save_load.py b/ppocr/utils/save_load.py
index 0c652c8fd..f86125521 100644
--- a/ppocr/utils/save_load.py
+++ b/ppocr/utils/save_load.py
@@ -194,6 +194,9 @@ def save_model(model,
_mkdir_if_not_exist(model_path, logger)
model_prefix = os.path.join(model_path, prefix)
paddle.save(optimizer.state_dict(), model_prefix + '.pdopt')
+
+ is_nlp_model = config['Architecture']["model_type"] == 'kie' and config[
+ "Architecture"]["algorithm"] not in ["SDMGR"]
if is_nlp_model is not True:
paddle.save(model.state_dict(), model_prefix + '.pdparams')
metric_prefix = model_prefix
From 242f0adbcd8abb9f96544d8a2c370831212a56d8 Mon Sep 17 00:00:00 2001
From: WenmuZhou <572459439@qq.com>
Date: Mon, 22 Aug 2022 06:58:09 +0000
Subject: [PATCH 08/13] update inference_en.md
---
ppstructure/docs/inference.md | 37 +++++++++++++++++++-------------
ppstructure/docs/inference_en.md | 37 +++++++++++++++++++-------------
ppstructure/utility.py | 2 +-
3 files changed, 45 insertions(+), 31 deletions(-)
diff --git a/ppstructure/docs/inference.md b/ppstructure/docs/inference.md
index 3f92a6046..150471795 100644
--- a/ppstructure/docs/inference.md
+++ b/ppstructure/docs/inference.md
@@ -16,23 +16,26 @@ cd ppstructure
下载模型
```bash
mkdir inference && cd inference
-# 下载PP-OCRv2文本检测模型并解压
-wget https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_slim_quant_infer.tar && tar xf ch_PP-OCRv2_det_slim_quant_infer.tar
-# 下载PP-OCRv2文本识别模型并解压
-wget https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_slim_quant_infer.tar && tar xf ch_PP-OCRv2_rec_slim_quant_infer.tar
-# 下载超轻量级英文表格预测模型并解压
-wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar && tar xf en_ppocr_mobile_v2.0_table_structure_infer.tar
+# 下载PP-Structurev2版面分析模型并解压
+wget https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_layout_infer.tar && tar xf picodet_lcnet_x1_0_layout_infer.tar
+# 下载PP-OCRv3文本检测模型并解压
+wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_infer.tar && tar xf ch_PP-OCRv3_det_slim_infer.tar
+# 下载PP-OCRv3文本识别模型并解压
+wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.tar && tar xf ch_PP-OCRv3_rec_slim_infer.tar
+# 下载PP-Structurev2表格识别模型并解压
+wget https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar && tar xf ch_ppstructure_mobile_v2.0_SLANet_infer.tar
cd ..
```
### 1.1 版面分析+表格识别
```bash
-python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv2_det_slim_quant_infer \
- --rec_model_dir=inference/ch_PP-OCRv2_rec_slim_quant_infer \
- --table_model_dir=inference/en_ppocr_mobile_v2.0_table_structure_infer \
+python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv3_det_slim_infer \
+ --rec_model_dir=inference/ch_PP-OCRv3_rec_slim_infer \
+ --table_model_dir=inference/ch_ppstructure_mobile_v2.0_SLANet_infer \
+ --layout_model_dir=inference/picodet_lcnet_x1_0_layout_infer \
--image_dir=./docs/table/1.png \
--rec_char_dict_path=../ppocr/utils/ppocr_keys_v1.txt \
- --table_char_dict_path=../ppocr/utils/dict/table_structure_dict.txt \
+ --table_char_dict_path=../ppocr/utils/dict/table_structure_dict_ch.txt \
--output=../output \
--vis_font_path=../doc/fonts/simfang.ttf
```
@@ -41,19 +44,23 @@ python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv2_det_slim_quant_i
### 1.2 版面分析
```bash
-python3 predict_system.py --image_dir=./docs/table/1.png --table=false --ocr=false --output=../output/
+python3 predict_system.py --layout_model_dir=inference/picodet_lcnet_x1_0_layout_infer \
+ --image_dir=./docs/table/1.png \
+ --output=../output \
+ --table=false \
+ --ocr=false
```
运行完成后,每张图片会在`output`字段指定的目录下的`structure`目录下有一个同名目录,图片区域会被裁剪之后保存下来,图片名为表格在图片里的坐标。版面分析结果会存储在`res.txt`文件中。
### 1.3 表格识别
```bash
-python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv2_det_slim_quant_infer \
- --rec_model_dir=inference/ch_PP-OCRv2_rec_slim_quant_infer \
- --table_model_dir=inference/en_ppocr_mobile_v2.0_table_structure_infer \
+python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv3_det_slim_infer \
+ --rec_model_dir=inference/ch_PP-OCRv3_rec_slim_infer \
+ --table_model_dir=inference/ch_ppstructure_mobile_v2.0_SLANet_infer \
--image_dir=./docs/table/table.jpg \
--rec_char_dict_path=../ppocr/utils/ppocr_keys_v1.txt \
- --table_char_dict_path=../ppocr/utils/dict/table_structure_dict.txt \
+ --table_char_dict_path=../ppocr/utils/dict/table_structure_dict_ch.txt \
--output=../output \
--vis_font_path=../doc/fonts/simfang.ttf \
--layout=false
diff --git a/ppstructure/docs/inference_en.md b/ppstructure/docs/inference_en.md
index 126878378..1e35e62f8 100644
--- a/ppstructure/docs/inference_en.md
+++ b/ppstructure/docs/inference_en.md
@@ -18,23 +18,26 @@ download model
```bash
mkdir inference && cd inference
-# Download the PP-OCRv2 text detection model and unzip it
-wget https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_slim_quant_infer.tar && tar xf ch_PP-OCRv2_det_slim_quant_infer.tar
-# Download the PP-OCRv2 text recognition model and unzip it
-wget https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_slim_quant_infer.tar && tar xf ch_PP-OCRv2_rec_slim_quant_infer.tar
-# Download the ultra-lightweight English table structure model and unzip it
-wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar && tar xf en_ppocr_mobile_v2.0_table_structure_infer.tar
+# Download the PP-Structurev2 layout analysis model and unzip it
+wget https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_layout_infer.tar && tar xf picodet_lcnet_x1_0_layout_infer.tar
+# Download the PP-OCRv3 text detection model and unzip it
+wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_infer.tar && tar xf ch_PP-OCRv3_det_slim_infer.tar
+# Download the PP-OCRv3 text recognition model and unzip it
+wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.tar && tar xf ch_PP-OCRv3_rec_slim_infer.tar
+# Download the PP-Structurev2 form recognition model and unzip it
+wget https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar && tar xf ch_ppstructure_mobile_v2.0_SLANet_infer.tar
cd ..
```
### 1.1 layout analysis + table recognition
```bash
-python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv2_det_slim_quant_infer \
- --rec_model_dir=inference/ch_PP-OCRv2_rec_slim_quant_infer \
- --table_model_dir=inference/en_ppocr_mobile_v2.0_table_structure_infer \
+python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv3_det_slim_infer \
+ --rec_model_dir=inference/ch_PP-OCRv3_rec_slim_infer \
+ --table_model_dir=inference/ch_ppstructure_mobile_v2.0_SLANet_infer \
+ --layout_model_dir=inference/picodet_lcnet_x1_0_layout_infer \
--image_dir=./docs/table/1.png \
--rec_char_dict_path=../ppocr/utils/ppocr_keys_v1.txt \
- --table_char_dict_path=../ppocr/utils/dict/table_structure_dict.txt \
+ --table_char_dict_path=../ppocr/utils/dict/table_structure_dict_ch.txt \
--output=../output \
--vis_font_path=../doc/fonts/simfang.ttf
```
@@ -43,19 +46,23 @@ After the operation is completed, each image will have a directory with the same
### 1.2 layout analysis
```bash
-python3 predict_system.py --image_dir=./docs/table/1.png --table=false --ocr=false --output=../output/
+python3 predict_system.py --layout_model_dir=inference/picodet_lcnet_x1_0_layout_infer \
+ --image_dir=./docs/table/1.png \
+ --output=../output \
+ --table=false \
+ --ocr=false
```
After the operation is completed, each image will have a directory with the same name in the `structure` directory under the directory specified by the `output` field. Each picture in image will be cropped and saved. The filename of picture area is their coordinates in the image. Layout analysis results will be stored in the `res.txt` file
### 1.3 table recognition
```bash
-python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv2_det_slim_quant_infer \
- --rec_model_dir=inference/ch_PP-OCRv2_rec_slim_quant_infer \
- --table_model_dir=inference/en_ppocr_mobile_v2.0_table_structure_infer \
+python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv3_det_slim_infer \
+ --rec_model_dir=inference/ch_PP-OCRv3_rec_slim_infer \
+ --table_model_dir=inference/ch_ppstructure_mobile_v2.0_SLANet_infer \
--image_dir=./docs/table/table.jpg \
--rec_char_dict_path=../ppocr/utils/ppocr_keys_v1.txt \
- --table_char_dict_path=../ppocr/utils/dict/table_structure_dict.txt \
+ --table_char_dict_path=../ppocr/utils/dict/table_structure_dict_ch.txt \
--output=../output \
--vis_font_path=../doc/fonts/simfang.ttf \
--layout=false
diff --git a/ppstructure/utility.py b/ppstructure/utility.py
index 270ee3aef..3bc275eba 100644
--- a/ppstructure/utility.py
+++ b/ppstructure/utility.py
@@ -38,7 +38,7 @@ def init_args():
parser.add_argument(
"--layout_dict_path",
type=str,
- default="../ppocr/utils/dict/layout_dict/layout_pubalynet_dict.txt")
+ default="../ppocr/utils/dict/layout_dict/layout_publaynet_dict.txt")
parser.add_argument(
"--layout_score_threshold",
type=float,
From cb6c4f81662e975647329a70c94fb60b4a5d8add Mon Sep 17 00:00:00 2001
From: WenmuZhou <572459439@qq.com>
Date: Mon, 22 Aug 2022 07:11:30 +0000
Subject: [PATCH 09/13] update en rec model desc
---
ppstructure/docs/models_list.md | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/ppstructure/docs/models_list.md b/ppstructure/docs/models_list.md
index 0b2f41deb..90c0bbe95 100644
--- a/ppstructure/docs/models_list.md
+++ b/ppstructure/docs/models_list.md
@@ -24,8 +24,8 @@
|模型名称|模型简介|推理模型大小|下载地址|
| --- | --- | --- | --- |
-|en_ppocr_mobile_v2.0_table_det|PubLayNet数据集训练的英文表格场景的文字检测|4.7M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.1/table/en_ppocr_mobile_v2.0_table_det_train.tar) |
-|en_ppocr_mobile_v2.0_table_rec|PubLayNet数据集训练的英文表格场景的文字识别|6.9M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.1/table/en_ppocr_mobile_v2.0_table_rec_train.tar) |
+|en_ppocr_mobile_v2.0_table_det|PubTabNet数据集训练的英文表格场景的文字检测|4.7M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.1/table/en_ppocr_mobile_v2.0_table_det_train.tar) |
+|en_ppocr_mobile_v2.0_table_rec|PubTabNet数据集训练的英文表格场景的文字识别|6.9M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.1/table/en_ppocr_mobile_v2.0_table_rec_train.tar) |
如需要使用其他OCR模型,可以在 [PP-OCR model_list](../../doc/doc_ch/models_list.md) 下载模型或者使用自己训练好的模型配置到 `det_model_dir`, `rec_model_dir`两个字段即可。
From 396eae907c22e4b357713098e1ae7393b0004cf3 Mon Sep 17 00:00:00 2001
From: WenmuZhou <572459439@qq.com>
Date: Mon, 22 Aug 2022 07:24:37 +0000
Subject: [PATCH 10/13] update SLANet ch model desc
---
ppstructure/README.md | 2 +-
ppstructure/README_ch.md | 2 +-
ppstructure/docs/models_list.md | 2 +-
ppstructure/docs/models_list_en.md | 2 +-
4 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/ppstructure/README.md b/ppstructure/README.md
index cff057e81..079db53fd 100644
--- a/ppstructure/README.md
+++ b/ppstructure/README.md
@@ -108,7 +108,7 @@ PP-Structure Series Model List (Updating)
| --- | --- | --- | --- |
|ch_PP-OCRv3_det_slim|[New] slim quantization with distillation lightweight model, supporting Chinese, English, multilingual text detection| 1.1M |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_distill_train.tar)|
|ch_PP-OCRv3_rec_slim |[New] Slim qunatization with distillation lightweight model, supporting Chinese, English text recognition| 4.9M |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_train.tar) |
-|ch_ppstructure_mobile_v2.0_SLANet|Chinese table recognition model trained on PubTabNet dataset based on SLANet|9.3M|[inference model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_train.tar) |
+|ch_ppstructure_mobile_v2.0_SLANet|Chinese table recognition model based on SLANet|9.3M|[inference model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_train.tar) |
### 7.3 KIE model
diff --git a/ppstructure/README_ch.md b/ppstructure/README_ch.md
index efd25eb2c..f30a7345f 100644
--- a/ppstructure/README_ch.md
+++ b/ppstructure/README_ch.md
@@ -122,7 +122,7 @@ PP-Structure系列模型列表(更新中)
| --- | --- | --- | --- |
|ch_PP-OCRv3_det_slim|【最新】slim量化+蒸馏版超轻量模型,支持中英文、多语种文本检测| 1.1M |[推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_distill_train.tar)|
|ch_PP-OCRv3_rec_slim |【最新】slim量化版超轻量模型,支持中英文、数字识别| 4.9M |[推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_train.tar) |
-|ch_ppstructure_mobile_v2.0_SLANet|基于SLANet在PubTabNet数据集上训练的中文表格识别模型|9.3M|[推理模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_train.tar) |
+|ch_ppstructure_mobile_v2.0_SLANet|基于SLANet的中文表格识别模型|9.3M|[推理模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_train.tar) |
diff --git a/ppstructure/docs/models_list.md b/ppstructure/docs/models_list.md
index 90c0bbe95..ef7048faa 100644
--- a/ppstructure/docs/models_list.md
+++ b/ppstructure/docs/models_list.md
@@ -36,7 +36,7 @@
| --- | --- | --- | --- |
|en_ppocr_mobile_v2.0_table_structure|基于TableRec-RARE在PubTabNet数据集上训练的英文表格识别模型|6.8M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.1/table/en_ppocr_mobile_v2.0_table_structure_train.tar) |
|en_ppstructure_mobile_v2.0_SLANet|基于SLANet在PubTabNet数据集上训练的英文表格识别模型|9.2M|[推理模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_train.tar) |
-|ch_ppstructure_mobile_v2.0_SLANet|基于SLANet在PubTabNet数据集上训练的中文表格识别模型|9.3M|[推理模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_train.tar) |
+|ch_ppstructure_mobile_v2.0_SLANet|基于SLANet的中文表格识别模型|9.3M|[推理模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_train.tar) |
diff --git a/ppstructure/docs/models_list_en.md b/ppstructure/docs/models_list_en.md
index cb6857f62..27b444d5a 100644
--- a/ppstructure/docs/models_list_en.md
+++ b/ppstructure/docs/models_list_en.md
@@ -36,7 +36,7 @@ If you need to use other OCR models, you can download the model in [PP-OCR model
| --- |-----------------------------------------------------------------------------| --- | --- |
|en_ppocr_mobile_v2.0_table_structure| English table recognition model trained on PubTabNet dataset based on TableRec-RARE |6.8M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.1/table/en_ppocr_mobile_v2.0_table_structure_train.tar) |
|en_ppstructure_mobile_v2.0_SLANet|English table recognition model trained on PubTabNet dataset based on SLANet|9.2M|[inference model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_train.tar) |
-|ch_ppstructure_mobile_v2.0_SLANet|Chinese table recognition model trained on PubTabNet dataset based on SLANet|9.3M|[inference model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_train.tar) |
+|ch_ppstructure_mobile_v2.0_SLANet|Chinese table recognition model based on SLANet|9.3M|[inference model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_train.tar) |
## 3. KIE
From f7b35bbbc418d4e4dcfedd3313eb514e11a60859 Mon Sep 17 00:00:00 2001
From: WenmuZhou <572459439@qq.com>
Date: Mon, 22 Aug 2022 08:00:49 +0000
Subject: [PATCH 11/13] do not print table result
---
paddleocr.py | 2 ++
1 file changed, 2 insertions(+)
diff --git a/paddleocr.py b/paddleocr.py
index d78046802..f6fb095af 100644
--- a/paddleocr.py
+++ b/paddleocr.py
@@ -636,4 +636,6 @@ def main():
for item in result:
item.pop('img')
+ item.pop('res')
logger.info(item)
+ logger.info('result save to {}'.format(args.output))
From 6eca179e966b20eeb6633b539f93b639ad50d86e Mon Sep 17 00:00:00 2001
From: WenmuZhou <572459439@qq.com>
Date: Mon, 22 Aug 2022 08:20:20 +0000
Subject: [PATCH 12/13] update rec det model to fp32
---
ppstructure/README.md | 4 ++--
ppstructure/README_ch.md | 4 ++--
ppstructure/docs/inference.md | 12 ++++++------
ppstructure/docs/inference_en.md | 12 ++++++------
ppstructure/table/README.md | 8 ++++----
ppstructure/table/README_ch.md | 8 ++++----
6 files changed, 24 insertions(+), 24 deletions(-)
diff --git a/ppstructure/README.md b/ppstructure/README.md
index 079db53fd..66df10b2e 100644
--- a/ppstructure/README.md
+++ b/ppstructure/README.md
@@ -106,8 +106,8 @@ PP-Structure Series Model List (Updating)
|model name|description|model size|download|
| --- | --- | --- | --- |
-|ch_PP-OCRv3_det_slim|[New] slim quantization with distillation lightweight model, supporting Chinese, English, multilingual text detection| 1.1M |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_distill_train.tar)|
-|ch_PP-OCRv3_rec_slim |[New] Slim qunatization with distillation lightweight model, supporting Chinese, English text recognition| 4.9M |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_train.tar) |
+|ch_PP-OCRv3_det| [New] Lightweight model, supporting Chinese, English, multilingual text detection | 3.8M |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_distill_train.tar)|
+|ch_PP-OCRv3_rec| [New] Lightweight model, supporting Chinese, English, multilingual text recognition | 12.4M |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_train.tar) |
|ch_ppstructure_mobile_v2.0_SLANet|Chinese table recognition model based on SLANet|9.3M|[inference model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_train.tar) |
### 7.3 KIE model
diff --git a/ppstructure/README_ch.md b/ppstructure/README_ch.md
index f30a7345f..597cceafd 100644
--- a/ppstructure/README_ch.md
+++ b/ppstructure/README_ch.md
@@ -120,8 +120,8 @@ PP-Structure系列模型列表(更新中)
|模型名称|模型简介|模型大小|下载地址|
| --- | --- | --- | --- |
-|ch_PP-OCRv3_det_slim|【最新】slim量化+蒸馏版超轻量模型,支持中英文、多语种文本检测| 1.1M |[推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_distill_train.tar)|
-|ch_PP-OCRv3_rec_slim |【最新】slim量化版超轻量模型,支持中英文、数字识别| 4.9M |[推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_train.tar) |
+|ch_PP-OCRv3_det| 【最新】超轻量模型,支持中英文、多语种文本检测 | 3.8M |[推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_distill_train.tar)|
+|ch_PP-OCRv3_rec|【最新】超轻量模型,支持中英文、数字识别|12.4M |[推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_train.tar) |
|ch_ppstructure_mobile_v2.0_SLANet|基于SLANet的中文表格识别模型|9.3M|[推理模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_train.tar) |
diff --git a/ppstructure/docs/inference.md b/ppstructure/docs/inference.md
index 150471795..cf11960c1 100644
--- a/ppstructure/docs/inference.md
+++ b/ppstructure/docs/inference.md
@@ -19,9 +19,9 @@ mkdir inference && cd inference
# 下载PP-Structurev2版面分析模型并解压
wget https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_layout_infer.tar && tar xf picodet_lcnet_x1_0_layout_infer.tar
# 下载PP-OCRv3文本检测模型并解压
-wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_infer.tar && tar xf ch_PP-OCRv3_det_slim_infer.tar
+wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar && tar xf ch_PP-OCRv3_det_infer.tar
# 下载PP-OCRv3文本识别模型并解压
-wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.tar && tar xf ch_PP-OCRv3_rec_slim_infer.tar
+wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar && tar xf ch_PP-OCRv3_rec_infer.tar
# 下载PP-Structurev2表格识别模型并解压
wget https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar && tar xf ch_ppstructure_mobile_v2.0_SLANet_infer.tar
cd ..
@@ -29,8 +29,8 @@ cd ..
### 1.1 版面分析+表格识别
```bash
-python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv3_det_slim_infer \
- --rec_model_dir=inference/ch_PP-OCRv3_rec_slim_infer \
+python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv3_det_infer \
+ --rec_model_dir=inference/ch_PP-OCRv3_rec_infer \
--table_model_dir=inference/ch_ppstructure_mobile_v2.0_SLANet_infer \
--layout_model_dir=inference/picodet_lcnet_x1_0_layout_infer \
--image_dir=./docs/table/1.png \
@@ -55,8 +55,8 @@ python3 predict_system.py --layout_model_dir=inference/picodet_lcnet_x1_0_layout
### 1.3 表格识别
```bash
-python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv3_det_slim_infer \
- --rec_model_dir=inference/ch_PP-OCRv3_rec_slim_infer \
+python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv3_det_infer \
+ --rec_model_dir=inference/ch_PP-OCRv3_rec_infer \
--table_model_dir=inference/ch_ppstructure_mobile_v2.0_SLANet_infer \
--image_dir=./docs/table/table.jpg \
--rec_char_dict_path=../ppocr/utils/ppocr_keys_v1.txt \
diff --git a/ppstructure/docs/inference_en.md b/ppstructure/docs/inference_en.md
index 1e35e62f8..357e26a11 100644
--- a/ppstructure/docs/inference_en.md
+++ b/ppstructure/docs/inference_en.md
@@ -21,9 +21,9 @@ mkdir inference && cd inference
# Download the PP-Structurev2 layout analysis model and unzip it
wget https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_layout_infer.tar && tar xf picodet_lcnet_x1_0_layout_infer.tar
# Download the PP-OCRv3 text detection model and unzip it
-wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_infer.tar && tar xf ch_PP-OCRv3_det_slim_infer.tar
+wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar && tar xf ch_PP-OCRv3_det_infer.tar
# Download the PP-OCRv3 text recognition model and unzip it
-wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.tar && tar xf ch_PP-OCRv3_rec_slim_infer.tar
+wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar && tar xf ch_PP-OCRv3_rec_infer.tar
# Download the PP-Structurev2 form recognition model and unzip it
wget https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar && tar xf ch_ppstructure_mobile_v2.0_SLANet_infer.tar
cd ..
@@ -31,8 +31,8 @@ cd ..
### 1.1 layout analysis + table recognition
```bash
-python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv3_det_slim_infer \
- --rec_model_dir=inference/ch_PP-OCRv3_rec_slim_infer \
+python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv3_det_infer \
+ --rec_model_dir=inference/ch_PP-OCRv3_rec_infer \
--table_model_dir=inference/ch_ppstructure_mobile_v2.0_SLANet_infer \
--layout_model_dir=inference/picodet_lcnet_x1_0_layout_infer \
--image_dir=./docs/table/1.png \
@@ -57,8 +57,8 @@ After the operation is completed, each image will have a directory with the same
### 1.3 table recognition
```bash
-python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv3_det_slim_infer \
- --rec_model_dir=inference/ch_PP-OCRv3_rec_slim_infer \
+python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv3_det_infer \
+ --rec_model_dir=inference/ch_PP-OCRv3_rec_infer \
--table_model_dir=inference/ch_ppstructure_mobile_v2.0_SLANet_infer \
--image_dir=./docs/table/table.jpg \
--rec_char_dict_path=../ppocr/utils/ppocr_keys_v1.txt \
diff --git a/ppstructure/table/README.md b/ppstructure/table/README.md
index a5d0da3cc..e5c85eb96 100644
--- a/ppstructure/table/README.md
+++ b/ppstructure/table/README.md
@@ -59,16 +59,16 @@ cd PaddleOCR/ppstructure
# download model
mkdir inference && cd inference
# Download the PP-OCRv3 text detection model and unzip it
-wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_infer.tar && tar xf ch_PP-OCRv3_det_slim_infer.tar
+wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar && tar xf ch_PP-OCRv3_det_infer.tar
# Download the PP-OCRv3 text recognition model and unzip it
-wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.tar && tar xf ch_PP-OCRv3_rec_slim_infer.tar
+wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar && tar xf ch_PP-OCRv3_rec_infer.tar
# Download the PP-Structurev2 form recognition model and unzip it
wget https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar && tar xf ch_ppstructure_mobile_v2.0_SLANet_infer.tar
cd ..
# run
python3.7 table/predict_table.py \
- --det_model_dir=inference/ch_PP-OCRv3_det_slim_infer \
- --rec_model_dir=inference/ch_PP-OCRv3_rec_slim_infer \
+ --det_model_dir=inference/ch_PP-OCRv3_det_infer \
+ --rec_model_dir=inference/ch_PP-OCRv3_rec_infer \
--table_model_dir=inference/ch_ppstructure_mobile_v2.0_SLANet_infer \
--rec_char_dict_path=../ppocr/utils/ppocr_keys_v1.txt \
--table_char_dict_path=../ppocr/utils/dict/table_structure_dict_ch.txt \
diff --git a/ppstructure/table/README_ch.md b/ppstructure/table/README_ch.md
index e83c81bef..086e39348 100644
--- a/ppstructure/table/README_ch.md
+++ b/ppstructure/table/README_ch.md
@@ -64,16 +64,16 @@ cd PaddleOCR/ppstructure
# 下载模型
mkdir inference && cd inference
# 下载PP-OCRv3文本检测模型并解压
-wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_infer.tar && tar xf ch_PP-OCRv3_det_slim_infer.tar
+wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar && tar xf ch_PP-OCRv3_det_infer.tar
# 下载PP-OCRv3文本识别模型并解压
-wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.tar && tar xf ch_PP-OCRv3_rec_slim_infer.tar
+wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar && tar xf ch_PP-OCRv3_rec_infer.tar
# 下载PP-Structurev2表格识别模型并解压
wget https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar && tar xf ch_ppstructure_mobile_v2.0_SLANet_infer.tar
cd ..
# 执行表格识别
python table/predict_table.py \
- --det_model_dir=inference/ch_PP-OCRv3_det_slim_infer \
- --rec_model_dir=inference/ch_PP-OCRv3_rec_slim_infer \
+ --det_model_dir=inference/ch_PP-OCRv3_det_infer \
+ --rec_model_dir=inference/ch_PP-OCRv3_rec_infer \
--table_model_dir=inference/ch_ppstructure_mobile_v2.0_SLANet_infer \
--rec_char_dict_path=../ppocr/utils/ppocr_keys_v1.txt \
--table_char_dict_path=../ppocr/utils/dict/table_structure_dict_ch.txt \
From aedeb28a56c51fb9c007bc711385da11e76f3173 Mon Sep 17 00:00:00 2001
From: littletomatodonkey
Date: Mon, 22 Aug 2022 16:31:54 +0800
Subject: [PATCH 13/13] fix re bug (#7288)
---
tools/infer/utility.py | 29 +++++++++++++++--------------
tools/infer_kie_token_ser_re.py | 2 +-
2 files changed, 16 insertions(+), 15 deletions(-)
diff --git a/tools/infer/utility.py b/tools/infer/utility.py
index a547bbdba..8d3e93992 100644
--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
@@ -225,23 +225,24 @@ def create_predictor(args, mode, logger):
min_subgraph_size, # skip the minmum trt subgraph
use_calib_mode=False)
- # collect shape
- if args.shape_info_filename is not None:
- if not os.path.exists(args.shape_info_filename):
- config.collect_shape_range_info(args.shape_info_filename)
- logger.info(
- f"collect dynamic shape info into : {args.shape_info_filename}"
- )
+ # collect shape
+ if args.shape_info_filename is not None:
+ if not os.path.exists(args.shape_info_filename):
+ config.collect_shape_range_info(
+ args.shape_info_filename)
+ logger.info(
+ f"collect dynamic shape info into : {args.shape_info_filename}"
+ )
+ else:
+ logger.info(
+ f"dynamic shape info file( {args.shape_info_filename} ) already exists, not need to generate again."
+ )
+ config.enable_tuned_tensorrt_dynamic_shape(
+ args.shape_info_filename, True)
else:
logger.info(
- f"dynamic shape info file( {args.shape_info_filename} ) already exists, not need to generate again."
+ f"when using tensorrt, dynamic shape is a suggested option, you can use '--shape_info_filename=shape.txt' for offline dygnamic shape tuning"
)
- config.enable_tuned_tensorrt_dynamic_shape(
- args.shape_info_filename, True)
- else:
- logger.info(
- f"when using tensorrt, dynamic shape is a suggested option, you can use '--shape_info_filename=shape.txt' for offline dygnamic shape tuning"
- )
elif args.use_xpu:
config.enable_xpu(10 * 1024 * 1024)
diff --git a/tools/infer_kie_token_ser_re.py b/tools/infer_kie_token_ser_re.py
index 40784e39b..3ee696f28 100755
--- a/tools/infer_kie_token_ser_re.py
+++ b/tools/infer_kie_token_ser_re.py
@@ -39,7 +39,7 @@ from ppocr.utils.visual import draw_re_results
from ppocr.utils.logging import get_logger
from ppocr.utils.utility import get_image_file_list, load_vqa_bio_label_maps, print_dict
from tools.program import ArgsParser, load_config, merge_config
-from tools.infer_vqa_token_ser import SerPredictor
+from tools.infer_kie_token_ser import SerPredictor
class ReArgsParser(ArgsParser):