update doc
commit
27215c6581
|
@ -47,7 +47,7 @@ str_to_cpu_mode(const std::string &cpu_mode) {
|
|||
std::string upper_key;
|
||||
std::transform(cpu_mode.cbegin(), cpu_mode.cend(), upper_key.begin(),
|
||||
::toupper);
|
||||
auto index = cpu_mode_map.find(upper_key);
|
||||
auto index = cpu_mode_map.find(upper_key.c_str());
|
||||
if (index == cpu_mode_map.end()) {
|
||||
LOGE("cpu_mode not found %s", upper_key.c_str());
|
||||
return paddle::lite_api::LITE_POWER_HIGH;
|
||||
|
|
|
@ -54,7 +54,7 @@ public class OCRPredictorNative {
|
|||
}
|
||||
|
||||
public void destory() {
|
||||
if (nativePointer > 0) {
|
||||
if (nativePointer != 0) {
|
||||
release(nativePointer);
|
||||
nativePointer = 0;
|
||||
}
|
||||
|
|
|
@ -109,8 +109,10 @@ CUDA_LIB、CUDNN_LIB、TENSORRT_DIR、WITH_GPU、WITH_TENSORRT
|
|||
|
||||
运行之前,将下面文件拷贝到`build/Release/`文件夹下
|
||||
1. `paddle_inference/paddle/lib/paddle_inference.dll`
|
||||
2. `opencv/build/x64/vc15/bin/opencv_world455.dll`
|
||||
3. 如果使用openblas版本的预测库还需要拷贝 `paddle_inference/third_party/install/openblas/lib/openblas.dll`
|
||||
2. `paddle_inference/third_party/install/onnxruntime/lib/onnxruntime.dll`
|
||||
3. `paddle_inference/third_party/install/paddle2onnx/lib/paddle2onnx.dll`
|
||||
4. `opencv/build/x64/vc15/bin/opencv_world455.dll`
|
||||
5. 如果使用openblas版本的预测库还需要拷贝 `paddle_inference/third_party/install/openblas/lib/openblas.dll`
|
||||
|
||||
### Step4: 预测
|
||||
|
||||
|
|
|
@ -73,4 +73,4 @@ python deploy/slim/quantization/export_model.py -c configs/det/ch_ppocr_v2.0/ch_
|
|||
The numerical range of the quantized model parameters derived from the above steps is still FP32, but the numerical range of the parameters is int8.
|
||||
The derived model can be converted through the `opt tool` of PaddleLite.
|
||||
|
||||
For quantitative model deployment, please refer to [Mobile terminal model deployment](../../lite/readme_en.md)
|
||||
For quantitative model deployment, please refer to [Mobile terminal model deployment](../../lite/readme.md)
|
||||
|
|
|
@ -636,4 +636,6 @@ def main():
|
|||
|
||||
for item in result:
|
||||
item.pop('img')
|
||||
item.pop('res')
|
||||
logger.info(item)
|
||||
logger.info('result save to {}'.format(args.output))
|
||||
|
|
|
@ -35,10 +35,12 @@ class CopyPaste(object):
|
|||
point_num = data['polys'].shape[1]
|
||||
src_img = data['image']
|
||||
src_polys = data['polys'].tolist()
|
||||
src_texts = data['texts']
|
||||
src_ignores = data['ignore_tags'].tolist()
|
||||
ext_data = data['ext_data'][0]
|
||||
ext_image = ext_data['image']
|
||||
ext_polys = ext_data['polys']
|
||||
ext_texts = ext_data['texts']
|
||||
ext_ignores = ext_data['ignore_tags']
|
||||
|
||||
indexs = [i for i in range(len(ext_ignores)) if not ext_ignores[i]]
|
||||
|
@ -53,7 +55,7 @@ class CopyPaste(object):
|
|||
src_img = cv2.cvtColor(src_img, cv2.COLOR_BGR2RGB)
|
||||
ext_image = cv2.cvtColor(ext_image, cv2.COLOR_BGR2RGB)
|
||||
src_img = Image.fromarray(src_img).convert('RGBA')
|
||||
for poly, tag in zip(select_polys, select_ignores):
|
||||
for idx, poly, tag in zip(select_idxs, select_polys, select_ignores):
|
||||
box_img = get_rotate_crop_image(ext_image, poly)
|
||||
|
||||
src_img, box = self.paste_img(src_img, box_img, src_polys)
|
||||
|
@ -62,6 +64,7 @@ class CopyPaste(object):
|
|||
for _ in range(len(box), point_num):
|
||||
box.append(box[-1])
|
||||
src_polys.append(box)
|
||||
src_texts.append(ext_texts[idx])
|
||||
src_ignores.append(tag)
|
||||
src_img = cv2.cvtColor(np.array(src_img), cv2.COLOR_RGB2BGR)
|
||||
h, w = src_img.shape[:2]
|
||||
|
@ -70,6 +73,7 @@ class CopyPaste(object):
|
|||
src_polys[:, :, 1] = np.clip(src_polys[:, :, 1], 0, h)
|
||||
data['image'] = src_img
|
||||
data['polys'] = src_polys
|
||||
data['texts'] = src_texts
|
||||
data['ignore_tags'] = np.array(src_ignores)
|
||||
return data
|
||||
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import Levenshtein
|
||||
from rapidfuzz.distance import Levenshtein
|
||||
import string
|
||||
|
||||
|
||||
|
@ -46,8 +46,7 @@ class RecMetric(object):
|
|||
if self.is_filter:
|
||||
pred = self._normalize_text(pred)
|
||||
target = self._normalize_text(target)
|
||||
norm_edit_dis += Levenshtein.distance(pred, target) / max(
|
||||
len(pred), len(target), 1)
|
||||
norm_edit_dis += Levenshtein.normalized_distance(pred, target)
|
||||
if pred == target:
|
||||
correct_num += 1
|
||||
all_num += 1
|
||||
|
|
|
@ -0,0 +1,4 @@
|
|||
OTHER
|
||||
QUESTION
|
||||
ANSWER
|
||||
HEADER
|
|
@ -194,6 +194,9 @@ def save_model(model,
|
|||
_mkdir_if_not_exist(model_path, logger)
|
||||
model_prefix = os.path.join(model_path, prefix)
|
||||
paddle.save(optimizer.state_dict(), model_prefix + '.pdopt')
|
||||
|
||||
is_nlp_model = config['Architecture']["model_type"] == 'kie' and config[
|
||||
"Architecture"]["algorithm"] not in ["SDMGR"]
|
||||
if is_nlp_model is not True:
|
||||
paddle.save(model.state_dict(), model_prefix + '.pdparams')
|
||||
metric_prefix = model_prefix
|
||||
|
|
|
@ -106,9 +106,9 @@ PP-Structure Series Model List (Updating)
|
|||
|
||||
|model name|description|model size|download|
|
||||
| --- | --- | --- | --- |
|
||||
|ch_PP-OCRv3_det_slim|[New] slim quantization with distillation lightweight model, supporting Chinese, English, multilingual text detection| 1.1M |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_distill_train.tar)|
|
||||
|ch_PP-OCRv3_rec_slim |[New] Slim qunatization with distillation lightweight model, supporting Chinese, English text recognition| 4.9M |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_train.tar) |
|
||||
|ch_ppstructure_mobile_v2.0_SLANet|Chinese table recognition model trained on PubTabNet dataset based on SLANet|9.3M|[inference model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_train.tar) |
|
||||
|ch_PP-OCRv3_det| [New] Lightweight model, supporting Chinese, English, multilingual text detection | 3.8M |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_distill_train.tar)|
|
||||
|ch_PP-OCRv3_rec| [New] Lightweight model, supporting Chinese, English, multilingual text recognition | 12.4M |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_train.tar) |
|
||||
|ch_ppstructure_mobile_v2.0_SLANet|Chinese table recognition model based on SLANet|9.3M|[inference model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_train.tar) |
|
||||
|
||||
### 7.3 KIE model
|
||||
|
||||
|
|
|
@ -120,9 +120,9 @@ PP-Structure系列模型列表(更新中)
|
|||
|
||||
|模型名称|模型简介|模型大小|下载地址|
|
||||
| --- | --- | --- | --- |
|
||||
|ch_PP-OCRv3_det_slim|【最新】slim量化+蒸馏版超轻量模型,支持中英文、多语种文本检测| 1.1M |[推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_distill_train.tar)|
|
||||
|ch_PP-OCRv3_rec_slim |【最新】slim量化版超轻量模型,支持中英文、数字识别| 4.9M |[推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_train.tar) |
|
||||
|ch_ppstructure_mobile_v2.0_SLANet|基于SLANet在PubTabNet数据集上训练的中文表格识别模型|9.3M|[推理模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_train.tar) |
|
||||
|ch_PP-OCRv3_det| 【最新】超轻量模型,支持中英文、多语种文本检测 | 3.8M |[推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_distill_train.tar)|
|
||||
|ch_PP-OCRv3_rec|【最新】超轻量模型,支持中英文、数字识别|12.4M |[推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_train.tar) |
|
||||
|ch_ppstructure_mobile_v2.0_SLANet|基于SLANet的中文表格识别模型|9.3M|[推理模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_train.tar) |
|
||||
|
||||
|
||||
<a name="73"></a>
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
- [1.1 版面分析+表格识别](#1.1)
|
||||
- [1.2 版面分析](#1.2)
|
||||
- [1.3 表格识别](#1.3)
|
||||
- [2. DocVQA](#2)
|
||||
- [2. 关键信息抽取](#2)
|
||||
|
||||
<a name="1"></a>
|
||||
## 1. Structure
|
||||
|
@ -16,23 +16,26 @@ cd ppstructure
|
|||
下载模型
|
||||
```bash
|
||||
mkdir inference && cd inference
|
||||
# 下载PP-OCRv2文本检测模型并解压
|
||||
wget https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_slim_quant_infer.tar && tar xf ch_PP-OCRv2_det_slim_quant_infer.tar
|
||||
# 下载PP-OCRv2文本识别模型并解压
|
||||
wget https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_slim_quant_infer.tar && tar xf ch_PP-OCRv2_rec_slim_quant_infer.tar
|
||||
# 下载超轻量级英文表格预测模型并解压
|
||||
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar && tar xf en_ppocr_mobile_v2.0_table_structure_infer.tar
|
||||
# 下载PP-Structurev2版面分析模型并解压
|
||||
wget https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_layout_infer.tar && tar xf picodet_lcnet_x1_0_layout_infer.tar
|
||||
# 下载PP-OCRv3文本检测模型并解压
|
||||
wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar && tar xf ch_PP-OCRv3_det_infer.tar
|
||||
# 下载PP-OCRv3文本识别模型并解压
|
||||
wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar && tar xf ch_PP-OCRv3_rec_infer.tar
|
||||
# 下载PP-Structurev2表格识别模型并解压
|
||||
wget https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar && tar xf ch_ppstructure_mobile_v2.0_SLANet_infer.tar
|
||||
cd ..
|
||||
```
|
||||
<a name="1.1"></a>
|
||||
### 1.1 版面分析+表格识别
|
||||
```bash
|
||||
python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv2_det_slim_quant_infer \
|
||||
--rec_model_dir=inference/ch_PP-OCRv2_rec_slim_quant_infer \
|
||||
--table_model_dir=inference/en_ppocr_mobile_v2.0_table_structure_infer \
|
||||
python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv3_det_infer \
|
||||
--rec_model_dir=inference/ch_PP-OCRv3_rec_infer \
|
||||
--table_model_dir=inference/ch_ppstructure_mobile_v2.0_SLANet_infer \
|
||||
--layout_model_dir=inference/picodet_lcnet_x1_0_layout_infer \
|
||||
--image_dir=./docs/table/1.png \
|
||||
--rec_char_dict_path=../ppocr/utils/ppocr_keys_v1.txt \
|
||||
--table_char_dict_path=../ppocr/utils/dict/table_structure_dict.txt \
|
||||
--table_char_dict_path=../ppocr/utils/dict/table_structure_dict_ch.txt \
|
||||
--output=../output \
|
||||
--vis_font_path=../doc/fonts/simfang.ttf
|
||||
```
|
||||
|
@ -41,19 +44,23 @@ python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv2_det_slim_quant_i
|
|||
<a name="1.2"></a>
|
||||
### 1.2 版面分析
|
||||
```bash
|
||||
python3 predict_system.py --image_dir=./docs/table/1.png --table=false --ocr=false --output=../output/
|
||||
python3 predict_system.py --layout_model_dir=inference/picodet_lcnet_x1_0_layout_infer \
|
||||
--image_dir=./docs/table/1.png \
|
||||
--output=../output \
|
||||
--table=false \
|
||||
--ocr=false
|
||||
```
|
||||
运行完成后,每张图片会在`output`字段指定的目录下的`structure`目录下有一个同名目录,图片区域会被裁剪之后保存下来,图片名为表格在图片里的坐标。版面分析结果会存储在`res.txt`文件中。
|
||||
|
||||
<a name="1.3"></a>
|
||||
### 1.3 表格识别
|
||||
```bash
|
||||
python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv2_det_slim_quant_infer \
|
||||
--rec_model_dir=inference/ch_PP-OCRv2_rec_slim_quant_infer \
|
||||
--table_model_dir=inference/en_ppocr_mobile_v2.0_table_structure_infer \
|
||||
python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv3_det_infer \
|
||||
--rec_model_dir=inference/ch_PP-OCRv3_rec_infer \
|
||||
--table_model_dir=inference/ch_ppstructure_mobile_v2.0_SLANet_infer \
|
||||
--image_dir=./docs/table/table.jpg \
|
||||
--rec_char_dict_path=../ppocr/utils/ppocr_keys_v1.txt \
|
||||
--table_char_dict_path=../ppocr/utils/dict/table_structure_dict.txt \
|
||||
--table_char_dict_path=../ppocr/utils/dict/table_structure_dict_ch.txt \
|
||||
--output=../output \
|
||||
--vis_font_path=../doc/fonts/simfang.ttf \
|
||||
--layout=false
|
||||
|
@ -61,20 +68,22 @@ python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv2_det_slim_quant_i
|
|||
运行完成后,每张图片会在`output`字段指定的目录下的`structure`目录下有一个同名目录,表格会存储为一个excel,excel文件名为`[0,0,img_h,img_w]`。
|
||||
|
||||
<a name="2"></a>
|
||||
## 2. DocVQA
|
||||
## 2. 关键信息抽取
|
||||
|
||||
```bash
|
||||
cd ppstructure
|
||||
|
||||
# 下载模型
|
||||
mkdir inference && cd inference
|
||||
# 下载SER xfun 模型并解压
|
||||
wget https://paddleocr.bj.bcebos.com/pplayout/PP-Layout_v1.0_ser_pretrained.tar && tar xf PP-Layout_v1.0_ser_pretrained.tar
|
||||
# 下载SER XFUND 模型并解压
|
||||
wget https://paddleocr.bj.bcebos.com/ppstructure/models/vi_layoutxlm/ser_vi_layoutxlm_xfund_infer.tar && tar -xf ser_vi_layoutxlm_xfund_infer.tar
|
||||
cd ..
|
||||
|
||||
python3 predict_system.py --model_name_or_path=kie/PP-Layout_v1.0_ser_pretrained/ \
|
||||
--mode=kie \
|
||||
--image_dir=kie/images/input/zh_val_0.jpg \
|
||||
--vis_font_path=../doc/fonts/simfang.ttf
|
||||
python3 kie/predict_kie_token_ser.py \
|
||||
--kie_algorithm=LayoutXLM \
|
||||
--ser_model_dir=../inference/ser_vi_layoutxlm_xfund_infer \
|
||||
--image_dir=./docs/kie/input/zh_val_42.jpg \
|
||||
--ser_dict_path=../ppocr/utils/dict/kie_dict/xfund_class_list.txt \
|
||||
--vis_font_path=../doc/fonts/simfang.ttf \
|
||||
--ocr_order_method="tb-yx"
|
||||
```
|
||||
|
||||
运行完成后,每张图片会在`output`字段指定的目录下的`kie`目录下存放可视化之后的图片,图片名和输入图片名一致。
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
- [1.1 layout analysis + table recognition](#1.1)
|
||||
- [1.2 layout analysis](#1.2)
|
||||
- [1.3 table recognition](#1.3)
|
||||
- [2. DocVQA](#2)
|
||||
- [2. KIE](#2)
|
||||
|
||||
<a name="1"></a>
|
||||
## 1. Structure
|
||||
|
@ -18,23 +18,26 @@ download model
|
|||
|
||||
```bash
|
||||
mkdir inference && cd inference
|
||||
# Download the PP-OCRv2 text detection model and unzip it
|
||||
wget https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_slim_quant_infer.tar && tar xf ch_PP-OCRv2_det_slim_quant_infer.tar
|
||||
# Download the PP-OCRv2 text recognition model and unzip it
|
||||
wget https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_slim_quant_infer.tar && tar xf ch_PP-OCRv2_rec_slim_quant_infer.tar
|
||||
# Download the ultra-lightweight English table structure model and unzip it
|
||||
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar && tar xf en_ppocr_mobile_v2.0_table_structure_infer.tar
|
||||
# Download the PP-Structurev2 layout analysis model and unzip it
|
||||
wget https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_layout_infer.tar && tar xf picodet_lcnet_x1_0_layout_infer.tar
|
||||
# Download the PP-OCRv3 text detection model and unzip it
|
||||
wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar && tar xf ch_PP-OCRv3_det_infer.tar
|
||||
# Download the PP-OCRv3 text recognition model and unzip it
|
||||
wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar && tar xf ch_PP-OCRv3_rec_infer.tar
|
||||
# Download the PP-Structurev2 form recognition model and unzip it
|
||||
wget https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar && tar xf ch_ppstructure_mobile_v2.0_SLANet_infer.tar
|
||||
cd ..
|
||||
```
|
||||
<a name="1.1"></a>
|
||||
### 1.1 layout analysis + table recognition
|
||||
```bash
|
||||
python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv2_det_slim_quant_infer \
|
||||
--rec_model_dir=inference/ch_PP-OCRv2_rec_slim_quant_infer \
|
||||
--table_model_dir=inference/en_ppocr_mobile_v2.0_table_structure_infer \
|
||||
python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv3_det_infer \
|
||||
--rec_model_dir=inference/ch_PP-OCRv3_rec_infer \
|
||||
--table_model_dir=inference/ch_ppstructure_mobile_v2.0_SLANet_infer \
|
||||
--layout_model_dir=inference/picodet_lcnet_x1_0_layout_infer \
|
||||
--image_dir=./docs/table/1.png \
|
||||
--rec_char_dict_path=../ppocr/utils/ppocr_keys_v1.txt \
|
||||
--table_char_dict_path=../ppocr/utils/dict/table_structure_dict.txt \
|
||||
--table_char_dict_path=../ppocr/utils/dict/table_structure_dict_ch.txt \
|
||||
--output=../output \
|
||||
--vis_font_path=../doc/fonts/simfang.ttf
|
||||
```
|
||||
|
@ -43,19 +46,23 @@ After the operation is completed, each image will have a directory with the same
|
|||
<a name="1.2"></a>
|
||||
### 1.2 layout analysis
|
||||
```bash
|
||||
python3 predict_system.py --image_dir=./docs/table/1.png --table=false --ocr=false --output=../output/
|
||||
python3 predict_system.py --layout_model_dir=inference/picodet_lcnet_x1_0_layout_infer \
|
||||
--image_dir=./docs/table/1.png \
|
||||
--output=../output \
|
||||
--table=false \
|
||||
--ocr=false
|
||||
```
|
||||
After the operation is completed, each image will have a directory with the same name in the `structure` directory under the directory specified by the `output` field. Each picture in image will be cropped and saved. The filename of picture area is their coordinates in the image. Layout analysis results will be stored in the `res.txt` file
|
||||
|
||||
<a name="1.3"></a>
|
||||
### 1.3 table recognition
|
||||
```bash
|
||||
python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv2_det_slim_quant_infer \
|
||||
--rec_model_dir=inference/ch_PP-OCRv2_rec_slim_quant_infer \
|
||||
--table_model_dir=inference/en_ppocr_mobile_v2.0_table_structure_infer \
|
||||
python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv3_det_infer \
|
||||
--rec_model_dir=inference/ch_PP-OCRv3_rec_infer \
|
||||
--table_model_dir=inference/ch_ppstructure_mobile_v2.0_SLANet_infer \
|
||||
--image_dir=./docs/table/table.jpg \
|
||||
--rec_char_dict_path=../ppocr/utils/ppocr_keys_v1.txt \
|
||||
--table_char_dict_path=../ppocr/utils/dict/table_structure_dict.txt \
|
||||
--table_char_dict_path=../ppocr/utils/dict/table_structure_dict_ch.txt \
|
||||
--output=../output \
|
||||
--vis_font_path=../doc/fonts/simfang.ttf \
|
||||
--layout=false
|
||||
|
@ -63,19 +70,22 @@ python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv2_det_slim_quant_i
|
|||
After the operation is completed, each image will have a directory with the same name in the `structure` directory under the directory specified by the `output` field. Each table in the image will be stored as an excel. The filename of excel is their coordinates in the image.
|
||||
|
||||
<a name="2"></a>
|
||||
## 2. DocVQA
|
||||
## 2. KIE
|
||||
|
||||
```bash
|
||||
cd ppstructure
|
||||
|
||||
# download model
|
||||
mkdir inference && cd inference
|
||||
wget https://paddleocr.bj.bcebos.com/pplayout/PP-Layout_v1.0_ser_pretrained.tar && tar xf PP-Layout_v1.0_ser_pretrained.tar
|
||||
# download model
|
||||
wget https://paddleocr.bj.bcebos.com/ppstructure/models/vi_layoutxlm/ser_vi_layoutxlm_xfund_infer.tar && tar -xf ser_vi_layoutxlm_xfund_infer.tar
|
||||
cd ..
|
||||
|
||||
python3 predict_system.py --model_name_or_path=kie/PP-Layout_v1.0_ser_pretrained/ \
|
||||
--mode=kie \
|
||||
--image_dir=kie/images/input/zh_val_0.jpg \
|
||||
--vis_font_path=../doc/fonts/simfang.ttf
|
||||
python3 kie/predict_kie_token_ser.py \
|
||||
--kie_algorithm=LayoutXLM \
|
||||
--ser_model_dir=../inference/ser_vi_layoutxlm_xfund_infer \
|
||||
--image_dir=./docs/kie/input/zh_val_42.jpg \
|
||||
--ser_dict_path=../ppocr/utils/dict/kie_dict/xfund_class_list.txt \
|
||||
--vis_font_path=../doc/fonts/simfang.ttf \
|
||||
--ocr_order_method="tb-yx"
|
||||
```
|
||||
|
||||
After the operation is completed, each image will store the visualized image in the `kie` directory under the directory specified by the `output` field, and the image name is the same as the input image name.
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
- [快速安装](#快速安装)
|
||||
- [1. PaddlePaddle 和 PaddleOCR](#1-paddlepaddle-和-paddleocr)
|
||||
- [2. 安装其他依赖](#2-安装其他依赖)
|
||||
- [2.1 VQA所需依赖](#21--kie所需依赖)
|
||||
- [2.1 KIE所需依赖](#21-kie所需依赖)
|
||||
|
||||
# 快速安装
|
||||
|
||||
|
@ -11,16 +11,11 @@
|
|||
|
||||
## 2. 安装其他依赖
|
||||
|
||||
### 2.1 VQA所需依赖
|
||||
### 2.1 KIE所需依赖
|
||||
|
||||
* paddleocr
|
||||
|
||||
```bash
|
||||
pip3 install paddleocr
|
||||
```
|
||||
|
||||
* PaddleNLP
|
||||
```bash
|
||||
git clone https://github.com/PaddlePaddle/PaddleNLP -b develop
|
||||
cd PaddleNLP
|
||||
pip3 install -e .
|
||||
pip install paddleocr -U
|
||||
pip install -r ./kie/requirements.txt
|
||||
```
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
- [1. PaddlePaddle 和 PaddleOCR](#1)
|
||||
- [2. Install other dependencies](#2)
|
||||
- [2.1 VQA](#21)
|
||||
- [2.1 KIE](#21)
|
||||
|
||||
|
||||
<a name="1"></a>
|
||||
|
@ -14,17 +14,11 @@ Please refer to [PaddleOCR installation documentation](../../doc/doc_en/installa
|
|||
## 2. Install other dependencies
|
||||
|
||||
<a name="21"></a>
|
||||
### 2.1 VQA
|
||||
### 2.1 KIE
|
||||
|
||||
* paddleocr
|
||||
|
||||
```bash
|
||||
pip3 install paddleocr
|
||||
```
|
||||
|
||||
* PaddleNLP
|
||||
```bash
|
||||
git clone https://github.com/PaddlePaddle/PaddleNLP -b develop
|
||||
cd PaddleNLP
|
||||
pip3 install -e .
|
||||
pip install paddleocr -U
|
||||
pip install -r ./kie/requirements.txt
|
||||
```
|
||||
|
|
|
@ -28,8 +28,8 @@
|
|||
|
||||
|模型名称|模型简介|推理模型大小|下载地址|
|
||||
| --- | --- | --- | --- |
|
||||
|en_ppocr_mobile_v2.0_table_det|PubLayNet数据集训练的英文表格场景的文字检测|4.7M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.1/table/en_ppocr_mobile_v2.0_table_det_train.tar) |
|
||||
|en_ppocr_mobile_v2.0_table_rec|PubLayNet数据集训练的英文表格场景的文字识别|6.9M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.1/table/en_ppocr_mobile_v2.0_table_rec_train.tar) |
|
||||
|en_ppocr_mobile_v2.0_table_det|PubTabNet数据集训练的英文表格场景的文字检测|4.7M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.1/table/en_ppocr_mobile_v2.0_table_det_train.tar) |
|
||||
|en_ppocr_mobile_v2.0_table_rec|PubTabNet数据集训练的英文表格场景的文字识别|6.9M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.1/table/en_ppocr_mobile_v2.0_table_rec_train.tar) |
|
||||
|
||||
如需要使用其他OCR模型,可以在 [PP-OCR model_list](../../doc/doc_ch/models_list.md) 下载模型或者使用自己训练好的模型配置到 `det_model_dir`, `rec_model_dir`两个字段即可。
|
||||
|
||||
|
@ -40,7 +40,7 @@
|
|||
| --- | --- | --- | --- |
|
||||
|en_ppocr_mobile_v2.0_table_structure|基于TableRec-RARE在PubTabNet数据集上训练的英文表格识别模型|6.8M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.1/table/en_ppocr_mobile_v2.0_table_structure_train.tar) |
|
||||
|en_ppstructure_mobile_v2.0_SLANet|基于SLANet在PubTabNet数据集上训练的英文表格识别模型|9.2M|[推理模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_train.tar) |
|
||||
|ch_ppstructure_mobile_v2.0_SLANet|基于SLANet在PubTabNet数据集上训练的中文表格识别模型|9.3M|[推理模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_train.tar) |
|
||||
|ch_ppstructure_mobile_v2.0_SLANet|基于SLANet的中文表格识别模型|9.3M|[推理模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_train.tar) |
|
||||
|
||||
<a name="3"></a>
|
||||
|
||||
|
|
|
@ -39,7 +39,7 @@ If you need to use other OCR models, you can download the model in [PP-OCR model
|
|||
| --- |-----------------------------------------------------------------------------| --- | --- |
|
||||
|en_ppocr_mobile_v2.0_table_structure| English table recognition model trained on PubTabNet dataset based on TableRec-RARE |6.8M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.1/table/en_ppocr_mobile_v2.0_table_structure_train.tar) |
|
||||
|en_ppstructure_mobile_v2.0_SLANet|English table recognition model trained on PubTabNet dataset based on SLANet|9.2M|[inference model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_train.tar) |
|
||||
|ch_ppstructure_mobile_v2.0_SLANet|Chinese table recognition model trained on PubTabNet dataset based on SLANet|9.3M|[inference model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_train.tar) |
|
||||
|ch_ppstructure_mobile_v2.0_SLANet|Chinese table recognition model based on SLANet|9.3M|[inference model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_train.tar) |
|
||||
|
||||
<a name="3"></a>
|
||||
## 3. KIE
|
||||
|
|
|
@ -7,18 +7,22 @@
|
|||
- [2.1.2 版面分析+表格识别](#212-版面分析表格识别)
|
||||
- [2.1.3 版面分析](#213-版面分析)
|
||||
- [2.1.4 表格识别](#214-表格识别)
|
||||
- [2.1.5 DocVQA](#215-dockie)
|
||||
- [2.1.5 关键信息抽取](#215-关键信息抽取)
|
||||
- [2.1.6 版面恢复](#216-版面恢复)
|
||||
- [2.2 代码使用](#22-代码使用)
|
||||
- [2.2.1 图像方向分类版面分析表格识别](#221-图像方向分类版面分析表格识别)
|
||||
|
||||
- [2.2.1 图像方向+分类版面分析+表格识别](#221-图像方向分类版面分析表格识别)
|
||||
- [2.2.2 版面分析+表格识别](#222-版面分析表格识别)
|
||||
- [2.2.3 版面分析](#223-版面分析)
|
||||
- [2.2.4 表格识别](#224-表格识别)
|
||||
- [2.2.5 DocVQA](#225-dockie)
|
||||
|
||||
- [2.2.5 关键信息抽取](#225-关键信息抽取)
|
||||
- [2.2.6 版面恢复](#226-版面恢复)
|
||||
|
||||
- [2.3 返回结果说明](#23-返回结果说明)
|
||||
- [2.3.1 版面分+表格识别](#231-版面分析表格识别)
|
||||
- [2.3.2 DocVQA](#232-dockie)
|
||||
- [2.3.1 版面分析+表格识别](#231-版面分析表格识别)
|
||||
- [2.3.2 关键信息抽取](#232-关键信息抽取)
|
||||
|
||||
- [2.4 参数说明](#24-参数说明)
|
||||
|
||||
|
||||
|
@ -28,8 +32,8 @@
|
|||
```bash
|
||||
# 安装 paddleocr,推荐使用2.6版本
|
||||
pip3 install "paddleocr>=2.6"
|
||||
# 安装 DocVQA依赖包paddlenlp(如不需要DocVQA功能,可跳过)
|
||||
pip3 install paddlenlp
|
||||
# 安装 关键信息抽取 依赖包(如不需要KIE功能,可跳过)
|
||||
pip install -r kie/requirements.txt
|
||||
# 安装 图像方向分类依赖包paddleclas(如不需要图像方向分类功能,可跳过)
|
||||
pip3 install paddleclas
|
||||
```
|
||||
|
@ -66,9 +70,8 @@ paddleocr --image_dir=PaddleOCR/ppstructure/docs/table/table.jpg --type=structur
|
|||
|
||||
<a name="215"></a>
|
||||
|
||||
#### 2.1.5 DocVQA
|
||||
|
||||
请参考:[文档视觉问答](../kie/README.md)。
|
||||
#### 2.1.5 关键信息抽取
|
||||
请参考:[关键信息抽取教程](../kie/README_ch.md)。
|
||||
|
||||
<a name="216"></a>
|
||||
|
||||
|
@ -184,9 +187,9 @@ for line in result:
|
|||
```
|
||||
|
||||
<a name="225"></a>
|
||||
#### 2.2.5 DocVQA
|
||||
#### 2.2.5 关键信息抽取
|
||||
|
||||
请参考:[文档视觉问答](../kie/README.md)。
|
||||
请参考:[关键信息抽取教程](../kie/README_ch.md)。
|
||||
|
||||
<a name="226"></a>
|
||||
|
||||
|
@ -249,9 +252,9 @@ dict 里各个字段说明如下
|
|||
```
|
||||
|
||||
<a name="232"></a>
|
||||
#### 2.3.2 DocVQA
|
||||
#### 2.3.2 关键信息抽取
|
||||
|
||||
请参考:[文档视觉问答](../kie/README.md)。
|
||||
请参考:[关键信息抽取教程](../kie/README_ch.md)。
|
||||
|
||||
<a name="24"></a>
|
||||
### 2.4 参数说明
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
- [2.1.2 layout analysis + table recognition](#212-layout-analysis--table-recognition)
|
||||
- [2.1.3 layout analysis](#213-layout-analysis)
|
||||
- [2.1.4 table recognition](#214-table-recognition)
|
||||
- [2.1.5 DocVQA](#215-dockie)
|
||||
- [2.1.5 Key Information Extraction](#215-Key-Information-Extraction)
|
||||
- [2.1.6 layout recovery](#216-layout-recovery)
|
||||
- [2.2 Use by code](#22-use-by-code)
|
||||
- [2.2.1 image orientation + layout analysis + table recognition](#221-image-orientation--layout-analysis--table-recognition)
|
||||
|
@ -15,10 +15,11 @@
|
|||
- [2.2.3 layout analysis](#223-layout-analysis)
|
||||
- [2.2.4 table recognition](#224-table-recognition)
|
||||
- [2.2.5 DocVQA](#225-dockie)
|
||||
- [2.2.5 Key Information Extraction](#225-Key-Information-Extraction)
|
||||
- [2.2.6 layout recovery](#226-layout-recovery)
|
||||
- [2.3 Result description](#23-result-description)
|
||||
- [2.3.1 layout analysis + table recognition](#231-layout-analysis--table-recognition)
|
||||
- [2.3.2 DocVQA](#232-dockie)
|
||||
- [2.3.2 Key Information Extraction](#232-Key-Information-Extraction)
|
||||
- [2.4 Parameter Description](#24-parameter-description)
|
||||
|
||||
|
||||
|
@ -28,14 +29,14 @@
|
|||
```bash
|
||||
# Install paddleocr, version 2.6 is recommended
|
||||
pip3 install "paddleocr>=2.6"
|
||||
# Install the DocVQA dependency package paddlenlp (if you do not use the DocVQA, you can skip it)
|
||||
pip3 install paddlenlp
|
||||
# Install the KIE dependency packages (if you do not use the KIE, you can skip it)
|
||||
pip install -r kie/requirements.txt
|
||||
# Install the image direction classification dependency package paddleclas (if you do not use the image direction classification, you can skip it)
|
||||
pip3 install paddleclas
|
||||
|
||||
```
|
||||
|
||||
<a name="2"></a>
|
||||
|
||||
## 2. Use
|
||||
|
||||
<a name="21"></a>
|
||||
|
@ -66,9 +67,9 @@ paddleocr --image_dir=PaddleOCR/ppstructure/docs/table/table.jpg --type=structur
|
|||
```
|
||||
|
||||
<a name="215"></a>
|
||||
#### 2.1.5 DocVQA
|
||||
#### 2.1.5 Key Information Extraction
|
||||
|
||||
Please refer to: [Documentation Visual Q&A](../kie/README.md) .
|
||||
Please refer to: [Key Information Extraction](../kie/README.md) .
|
||||
|
||||
<a name="216"></a>
|
||||
#### 2.1.6 layout recovery
|
||||
|
@ -130,7 +131,7 @@ for line in result:
|
|||
|
||||
from PIL import Image
|
||||
|
||||
font_path = 'PaddleOCR/doc/fonts/simfang.ttf' # PaddleOCR下提供字体包
|
||||
font_path = 'PaddleOCR/doc/fonts/simfang.ttf' # font provieded in PaddleOCR
|
||||
image = Image.open(img_path).convert('RGB')
|
||||
im_show = draw_structure_result(image, result,font_path=font_path)
|
||||
im_show = Image.fromarray(im_show)
|
||||
|
@ -180,9 +181,9 @@ for line in result:
|
|||
```
|
||||
|
||||
<a name="225"></a>
|
||||
#### 2.2.5 DocVQA
|
||||
#### 2.2.5 Key Information Extraction
|
||||
|
||||
Please refer to: [Documentation Visual Q&A](../kie/README.md) .
|
||||
Please refer to: [Key Information Extraction](../kie/README.md) .
|
||||
|
||||
<a name="226"></a>
|
||||
#### 2.2.6 layout recovery
|
||||
|
@ -244,9 +245,9 @@ After the recognition is completed, each image will have a directory with the sa
|
|||
```
|
||||
|
||||
<a name="232"></a>
|
||||
#### 2.3.2 DocVQA
|
||||
#### 2.3.2 Key Information Extraction
|
||||
|
||||
Please refer to: [Documentation Visual Q&A](../kie/README.md) .
|
||||
Please refer to: [Key Information Extraction](../kie/README.md) .
|
||||
|
||||
<a name="24"></a>
|
||||
### 2.4 Parameter Description
|
||||
|
|
|
@ -246,7 +246,7 @@ For training, evaluation and inference tutorial for text recognition models, ple
|
|||
|
||||
If you want to finish the KIE tasks in your scene, and don't know what to prepare, please refer to [End cdoc](../../doc/doc_en/recognition.md).
|
||||
|
||||
关于怎样在自己的场景中完成关键信息抽取任务,请参考:[Guide to End-to-end KIE](./how_to_do_kie_en.md)。
|
||||
To complete the key information extraction task in your own scenario from data preparation to model selection, please refer to: [Guide to End-to-end KIE](./how_to_do_kie_en.md)。
|
||||
|
||||
|
||||
## 5. Reference
|
||||
|
|
|
@ -20,7 +20,7 @@ from shapely.geometry import Polygon
|
|||
import numpy as np
|
||||
from collections import defaultdict
|
||||
import operator
|
||||
import Levenshtein
|
||||
from rapidfuzz.distance import Levenshtein
|
||||
import argparse
|
||||
import json
|
||||
import copy
|
||||
|
|
|
@ -59,16 +59,16 @@ cd PaddleOCR/ppstructure
|
|||
# download model
|
||||
mkdir inference && cd inference
|
||||
# Download the PP-OCRv3 text detection model and unzip it
|
||||
wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_infer.tar && tar xf ch_PP-OCRv3_det_slim_infer.tar
|
||||
wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar && tar xf ch_PP-OCRv3_det_infer.tar
|
||||
# Download the PP-OCRv3 text recognition model and unzip it
|
||||
wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.tar && tar xf ch_PP-OCRv3_rec_slim_infer.tar
|
||||
wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar && tar xf ch_PP-OCRv3_rec_infer.tar
|
||||
# Download the PP-Structurev2 form recognition model and unzip it
|
||||
wget https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar && tar xf ch_ppstructure_mobile_v2.0_SLANet_infer.tar
|
||||
cd ..
|
||||
# run
|
||||
python3.7 table/predict_table.py \
|
||||
--det_model_dir=inference/ch_PP-OCRv3_det_slim_infer \
|
||||
--rec_model_dir=inference/ch_PP-OCRv3_rec_slim_infer \
|
||||
--det_model_dir=inference/ch_PP-OCRv3_det_infer \
|
||||
--rec_model_dir=inference/ch_PP-OCRv3_rec_infer \
|
||||
--table_model_dir=inference/ch_ppstructure_mobile_v2.0_SLANet_infer \
|
||||
--rec_char_dict_path=../ppocr/utils/ppocr_keys_v1.txt \
|
||||
--table_char_dict_path=../ppocr/utils/dict/table_structure_dict_ch.txt \
|
||||
|
|
|
@ -64,16 +64,16 @@ cd PaddleOCR/ppstructure
|
|||
# 下载模型
|
||||
mkdir inference && cd inference
|
||||
# 下载PP-OCRv3文本检测模型并解压
|
||||
wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_infer.tar && tar xf ch_PP-OCRv3_det_slim_infer.tar
|
||||
wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar && tar xf ch_PP-OCRv3_det_infer.tar
|
||||
# 下载PP-OCRv3文本识别模型并解压
|
||||
wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.tar && tar xf ch_PP-OCRv3_rec_slim_infer.tar
|
||||
wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar && tar xf ch_PP-OCRv3_rec_infer.tar
|
||||
# 下载PP-Structurev2表格识别模型并解压
|
||||
wget https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar && tar xf ch_ppstructure_mobile_v2.0_SLANet_infer.tar
|
||||
cd ..
|
||||
# 执行表格识别
|
||||
python table/predict_table.py \
|
||||
--det_model_dir=inference/ch_PP-OCRv3_det_slim_infer \
|
||||
--rec_model_dir=inference/ch_PP-OCRv3_rec_slim_infer \
|
||||
--det_model_dir=inference/ch_PP-OCRv3_det_infer \
|
||||
--rec_model_dir=inference/ch_PP-OCRv3_rec_infer \
|
||||
--table_model_dir=inference/ch_ppstructure_mobile_v2.0_SLANet_infer \
|
||||
--rec_char_dict_path=../ppocr/utils/ppocr_keys_v1.txt \
|
||||
--table_char_dict_path=../ppocr/utils/dict/table_structure_dict_ch.txt \
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Apache 2.0 License for more details.
|
||||
|
||||
import distance
|
||||
from rapidfuzz.distance import Levenshtein
|
||||
from apted import APTED, Config
|
||||
from apted.helpers import Tree
|
||||
from lxml import etree, html
|
||||
|
@ -39,17 +39,6 @@ class TableTree(Tree):
|
|||
|
||||
|
||||
class CustomConfig(Config):
|
||||
@staticmethod
|
||||
def maximum(*sequences):
|
||||
"""Get maximum possible value
|
||||
"""
|
||||
return max(map(len, sequences))
|
||||
|
||||
def normalized_distance(self, *sequences):
|
||||
"""Get distance from 0 to 1
|
||||
"""
|
||||
return float(distance.levenshtein(*sequences)) / self.maximum(*sequences)
|
||||
|
||||
def rename(self, node1, node2):
|
||||
"""Compares attributes of trees"""
|
||||
#print(node1.tag)
|
||||
|
@ -58,23 +47,12 @@ class CustomConfig(Config):
|
|||
if node1.tag == 'td':
|
||||
if node1.content or node2.content:
|
||||
#print(node1.content, )
|
||||
return self.normalized_distance(node1.content, node2.content)
|
||||
return Levenshtein.normalized_distance(node1.content, node2.content)
|
||||
return 0.
|
||||
|
||||
|
||||
|
||||
class CustomConfig_del_short(Config):
|
||||
@staticmethod
|
||||
def maximum(*sequences):
|
||||
"""Get maximum possible value
|
||||
"""
|
||||
return max(map(len, sequences))
|
||||
|
||||
def normalized_distance(self, *sequences):
|
||||
"""Get distance from 0 to 1
|
||||
"""
|
||||
return float(distance.levenshtein(*sequences)) / self.maximum(*sequences)
|
||||
|
||||
def rename(self, node1, node2):
|
||||
"""Compares attributes of trees"""
|
||||
if (node1.tag != node2.tag) or (node1.colspan != node2.colspan) or (node1.rowspan != node2.rowspan):
|
||||
|
@ -90,21 +68,10 @@ class CustomConfig_del_short(Config):
|
|||
node1_content = ['####']
|
||||
if len(node2_content) < 3:
|
||||
node2_content = ['####']
|
||||
return self.normalized_distance(node1_content, node2_content)
|
||||
return Levenshtein.normalized_distance(node1_content, node2_content)
|
||||
return 0.
|
||||
|
||||
class CustomConfig_del_block(Config):
|
||||
@staticmethod
|
||||
def maximum(*sequences):
|
||||
"""Get maximum possible value
|
||||
"""
|
||||
return max(map(len, sequences))
|
||||
|
||||
def normalized_distance(self, *sequences):
|
||||
"""Get distance from 0 to 1
|
||||
"""
|
||||
return float(distance.levenshtein(*sequences)) / self.maximum(*sequences)
|
||||
|
||||
def rename(self, node1, node2):
|
||||
"""Compares attributes of trees"""
|
||||
if (node1.tag != node2.tag) or (node1.colspan != node2.colspan) or (node1.rowspan != node2.rowspan):
|
||||
|
@ -120,7 +87,7 @@ class CustomConfig_del_block(Config):
|
|||
while ' ' in node2_content:
|
||||
print(node2_content.index(' '))
|
||||
node2_content.pop(node2_content.index(' '))
|
||||
return self.normalized_distance(node1_content, node2_content)
|
||||
return Levenshtein.normalized_distance(node1_content, node2_content)
|
||||
return 0.
|
||||
|
||||
class TEDS(object):
|
||||
|
|
|
@ -38,7 +38,7 @@ def init_args():
|
|||
parser.add_argument(
|
||||
"--layout_dict_path",
|
||||
type=str,
|
||||
default="../ppocr/utils/dict/layout_dict/layout_pubalynet_dict.txt")
|
||||
default="../ppocr/utils/dict/layout_dict/layout_publaynet_dict.txt")
|
||||
parser.add_argument(
|
||||
"--layout_score_threshold",
|
||||
type=float,
|
||||
|
|
|
@ -6,7 +6,7 @@ lmdb
|
|||
tqdm
|
||||
numpy
|
||||
visualdl
|
||||
python-Levenshtein
|
||||
rapidfuzz
|
||||
opencv-contrib-python==4.4.0.46
|
||||
cython
|
||||
lxml
|
||||
|
|
|
@ -120,11 +120,14 @@ def sorted_boxes(dt_boxes):
|
|||
_boxes = list(sorted_boxes)
|
||||
|
||||
for i in range(num_boxes - 1):
|
||||
if abs(_boxes[i + 1][0][1] - _boxes[i][0][1]) < 10 and \
|
||||
(_boxes[i + 1][0][0] < _boxes[i][0][0]):
|
||||
tmp = _boxes[i]
|
||||
_boxes[i] = _boxes[i + 1]
|
||||
_boxes[i + 1] = tmp
|
||||
for j in range(i, 0, -1):
|
||||
if abs(_boxes[j + 1][0][1] - _boxes[j][0][1]) < 10 and \
|
||||
(_boxes[j + 1][0][0] < _boxes[j][0][0]):
|
||||
tmp = _boxes[j]
|
||||
_boxes[j] = _boxes[j + 1]
|
||||
_boxes[j + 1] = tmp
|
||||
else:
|
||||
break
|
||||
return _boxes
|
||||
|
||||
|
||||
|
|
|
@ -225,23 +225,24 @@ def create_predictor(args, mode, logger):
|
|||
min_subgraph_size, # skip the minmum trt subgraph
|
||||
use_calib_mode=False)
|
||||
|
||||
# collect shape
|
||||
if args.shape_info_filename is not None:
|
||||
if not os.path.exists(args.shape_info_filename):
|
||||
config.collect_shape_range_info(args.shape_info_filename)
|
||||
logger.info(
|
||||
f"collect dynamic shape info into : {args.shape_info_filename}"
|
||||
)
|
||||
# collect shape
|
||||
if args.shape_info_filename is not None:
|
||||
if not os.path.exists(args.shape_info_filename):
|
||||
config.collect_shape_range_info(
|
||||
args.shape_info_filename)
|
||||
logger.info(
|
||||
f"collect dynamic shape info into : {args.shape_info_filename}"
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
f"dynamic shape info file( {args.shape_info_filename} ) already exists, not need to generate again."
|
||||
)
|
||||
config.enable_tuned_tensorrt_dynamic_shape(
|
||||
args.shape_info_filename, True)
|
||||
else:
|
||||
logger.info(
|
||||
f"dynamic shape info file( {args.shape_info_filename} ) already exists, not need to generate again."
|
||||
f"when using tensorrt, dynamic shape is a suggested option, you can use '--shape_info_filename=shape.txt' for offline dygnamic shape tuning"
|
||||
)
|
||||
config.enable_tuned_tensorrt_dynamic_shape(
|
||||
args.shape_info_filename, True)
|
||||
else:
|
||||
logger.info(
|
||||
f"when using tensorrt, dynamic shape is a suggested option, you can use '--shape_info_filename=shape.txt' for offline dygnamic shape tuning"
|
||||
)
|
||||
|
||||
elif args.use_xpu:
|
||||
config.enable_xpu(10 * 1024 * 1024)
|
||||
|
@ -549,7 +550,7 @@ def text_visual(texts,
|
|||
def base64_to_cv2(b64str):
|
||||
import base64
|
||||
data = base64.b64decode(b64str.encode('utf8'))
|
||||
data = np.fromstring(data, np.uint8)
|
||||
data = np.frombuffer(data, np.uint8)
|
||||
data = cv2.imdecode(data, cv2.IMREAD_COLOR)
|
||||
return data
|
||||
|
||||
|
|
|
@ -88,6 +88,29 @@ def draw_kie_result(batch, node, idx_to_cls, count):
|
|||
cv2.imwrite(save_path, vis_img)
|
||||
logger.info("The Kie Image saved in {}".format(save_path))
|
||||
|
||||
def write_kie_result(fout, node, data):
|
||||
"""
|
||||
Write infer result to output file, sorted by the predict label of each line.
|
||||
The format keeps the same as the input with additional score attribute.
|
||||
"""
|
||||
import json
|
||||
label = data['label']
|
||||
annotations = json.loads(label)
|
||||
max_value, max_idx = paddle.max(node, -1), paddle.argmax(node, -1)
|
||||
node_pred_label = max_idx.numpy().tolist()
|
||||
node_pred_score = max_value.numpy().tolist()
|
||||
res = []
|
||||
for i, label in enumerate(node_pred_label):
|
||||
pred_score = '{:.2f}'.format(node_pred_score[i])
|
||||
pred_res = {
|
||||
'label': label,
|
||||
'transcription': annotations[i]['transcription'],
|
||||
'score': pred_score,
|
||||
'points': annotations[i]['points'],
|
||||
}
|
||||
res.append(pred_res)
|
||||
res.sort(key=lambda x: x['label'])
|
||||
fout.writelines([json.dumps(res, ensure_ascii=False) + '\n'])
|
||||
|
||||
def main():
|
||||
global_config = config['Global']
|
||||
|
@ -114,7 +137,7 @@ def main():
|
|||
|
||||
warmup_times = 0
|
||||
count_t = []
|
||||
with open(save_res_path, "wb") as fout:
|
||||
with open(save_res_path, "w") as fout:
|
||||
with open(config['Global']['infer_img'], "rb") as f:
|
||||
lines = f.readlines()
|
||||
for index, data_line in enumerate(lines):
|
||||
|
@ -139,6 +162,8 @@ def main():
|
|||
node = F.softmax(node, -1)
|
||||
count_t.append(time.time() - st)
|
||||
draw_kie_result(batch, node, idx_to_cls, index)
|
||||
write_kie_result(fout, node, data)
|
||||
fout.close()
|
||||
logger.info("success!")
|
||||
logger.info("It took {} s for predict {} images.".format(
|
||||
np.sum(count_t), len(count_t)))
|
||||
|
|
|
@ -39,7 +39,7 @@ from ppocr.utils.visual import draw_re_results
|
|||
from ppocr.utils.logging import get_logger
|
||||
from ppocr.utils.utility import get_image_file_list, load_vqa_bio_label_maps, print_dict
|
||||
from tools.program import ArgsParser, load_config, merge_config
|
||||
from tools.infer_vqa_token_ser import SerPredictor
|
||||
from tools.infer_kie_token_ser import SerPredictor
|
||||
|
||||
|
||||
class ReArgsParser(ArgsParser):
|
||||
|
|
Loading…
Reference in New Issue