From e37779abbddd99630063f6c2c16868f3e27c1d87 Mon Sep 17 00:00:00 2001 From: Leif <4603009@qq.com> Date: Wed, 10 Nov 2021 14:51:25 +0800 Subject: [PATCH] Update config and overview.md --- doc/doc_ch/algorithm_overview.md | 7 +++++++ doc/doc_ch/config.md | 29 ++++++++++++++--------------- doc/doc_en/algorithm_overview_en.md | 11 +++++++++-- doc/doc_en/config_en.md | 29 ++++++++++++++--------------- 4 files changed, 44 insertions(+), 32 deletions(-) diff --git a/doc/doc_ch/algorithm_overview.md b/doc/doc_ch/algorithm_overview.md index 038d62679..f0c16618c 100755 --- a/doc/doc_ch/algorithm_overview.md +++ b/doc/doc_ch/algorithm_overview.md @@ -20,6 +20,7 @@ PaddleOCR开源的文本检测算法列表: - [x] DB([paper]( https://arxiv.org/abs/1911.08947)) [2](ppocr推荐) - [x] EAST([paper](https://arxiv.org/abs/1704.03155))[1] - [x] SAST([paper](https://arxiv.org/abs/1908.05498))[4] +- [x] PSENet([paper](https://arxiv.org/abs/1903.12473v2)) 在ICDAR2015文本检测公开数据集上,算法效果如下: |模型|骨干网络|precision|recall|Hmean|下载链接| @@ -29,6 +30,8 @@ PaddleOCR开源的文本检测算法列表: |DB|ResNet50_vd|86.41%|78.72%|82.38%|[训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_db_v2.0_train.tar)| |DB|MobileNetV3|77.29%|73.08%|75.12%|[训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar)| |SAST|ResNet50_vd|91.39%|83.77%|87.42%|[训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_sast_icdar15_v2.0_train.tar)| +|PSE|ResNet50_vd|85.81%|79.53%|82.55%|[训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.1/en_det/det_r50_vd_pse_v2.0_train.tar)| +|PSE|MobileNetV3|82.20%|70.48%|75.89%|[训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.1/en_det/det_mv3_pse_v2.0_train.tar)| 在Total-text文本检测公开数据集上,算法效果如下: @@ -51,6 +54,8 @@ PaddleOCR基于动态图开源的文本识别算法列表: - [x] RARE([paper](https://arxiv.org/abs/1603.03915v1))[12] - [x] SRN([paper](https://arxiv.org/abs/2003.12294))[5] - [x] NRTR([paper](https://arxiv.org/abs/1806.00926v2))[13] +- [x] SAR([paper](https://arxiv.org/abs/1811.00751v2)) +- [x] SEED([paper](https://arxiv.org/pdf/2005.10977.pdf)) 参考[DTRB][3](https://arxiv.org/abs/1904.01906)文字识别训练和评估流程,使用MJSynth和SynthText两个文字识别数据集训练,在IIIT, SVT, IC03, IC13, IC15, SVTP, CUTE数据集上进行评估,算法效果如下: @@ -66,6 +71,8 @@ PaddleOCR基于动态图开源的文本识别算法列表: |RARE|Resnet34_vd|83.6%|rec_r34_vd_tps_bilstm_att |[训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_tps_bilstm_att_v2.0_train.tar)| |SRN|Resnet50_vd_fpn| 88.52% | rec_r50fpn_vd_none_srn | [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r50_vd_srn_train.tar) | |NRTR|NRTR_MTB| 84.3% | rec_mtb_nrtr | [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mtb_nrtr_train.tar) | +|SAR|Resnet31| 87.2% | rec_r31_sar | [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.1/rec/rec_r31_sar_train.tar) | +|SEED|Aster_Resnet| 85.2% | rec_resnet_stn_bilstm_att | [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.1/rec/rec_resnet_stn_bilstm_att.tar) | diff --git a/doc/doc_ch/config.md b/doc/doc_ch/config.md index 600d5bdb1..ed486aa88 100644 --- a/doc/doc_ch/config.md +++ b/doc/doc_ch/config.md @@ -36,8 +36,8 @@ | pretrained_model | 设置加载预训练模型路径 | ./pretrain_models/CRNN/best_accuracy | \ | | checkpoints | 加载模型参数路径 | None | 用于中断后加载参数继续训练 | | use_visualdl | 设置是否启用visualdl进行可视化log展示 | False | [教程地址](https://www.paddlepaddle.org.cn/paddle/visualdl) | -| infer_img | 设置预测图像路径或文件夹路径 | ./infer_img | \| -| character_dict_path | 设置字典路径 | ./ppocr/utils/ppocr_keys_v1.txt | \ | +| infer_img | 设置预测图像路径或文件夹路径 | ./infer_img | \|| +| character_dict_path | 设置字典路径 | ./ppocr/utils/ppocr_keys_v1.txt | 如果为空,则默认使用小写字母+数字作为字典 | | max_text_length | 设置文本最大长度 | 25 | \ | | character_type | 设置字符类型 | ch | en/ch, en时将使用默认dict,ch时使用自定义dict| | use_space_char | 设置是否识别空格 | True | 仅在 character_type=ch 时支持空格 | @@ -191,7 +191,6 @@ PaddleOCR目前已支持80种(除中文外)语种识别,`configs/rec/multi use_gpu: True epoch_num: 500 ... - character_type: it # 需要识别的语种 character_dict_path: {path/of/dict} # 字典文件所在路径 Train: @@ -212,17 +211,17 @@ PaddleOCR目前已支持80种(除中文外)语种识别,`configs/rec/multi 目前PaddleOCR支持的多语言算法有: -| 配置文件 | 算法名称 | backbone | trans | seq | pred | language | character_type | -| :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | :-----: | :-----: | -| rec_chinese_cht_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 中文繁体 | chinese_cht| -| rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 英语(区分大小写) | EN | -| rec_french_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 法语 | french | -| rec_ger_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 德语 | german | -| rec_japan_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 日语 | japan | -| rec_korean_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 韩语 | korean | -| rec_latin_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 拉丁字母 | latin | -| rec_arabic_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 阿拉伯字母 | ar | -| rec_cyrillic_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 斯拉夫字母 | cyrillic | -| rec_devanagari_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 梵文字母 | devanagari | +| 配置文件 | 算法名称 | backbone | trans | seq | pred | language | +| :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | :-----: | +| rec_chinese_cht_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 中文繁体 | +| rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 英语(区分大小写) | +| rec_french_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 法语 | +| rec_ger_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 德语 | +| rec_japan_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 日语 | +| rec_korean_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 韩语 | +| rec_latin_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 拉丁字母 | +| rec_arabic_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 阿拉伯字母 | +| rec_cyrillic_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 斯拉夫字母 | +| rec_devanagari_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 梵文字母 | 更多支持语种请参考: [多语言模型](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_ch/multi_languages.md#%E8%AF%AD%E7%A7%8D%E7%BC%A9%E5%86%99) diff --git a/doc/doc_en/algorithm_overview_en.md b/doc/doc_en/algorithm_overview_en.md index 53350f308..4d521c9cd 100755 --- a/doc/doc_en/algorithm_overview_en.md +++ b/doc/doc_en/algorithm_overview_en.md @@ -24,6 +24,7 @@ PaddleOCR open source text detection algorithms list: - [x] EAST([paper](https://arxiv.org/abs/1704.03155))[2] - [x] DB([paper](https://arxiv.org/abs/1911.08947))[1] - [x] SAST([paper](https://arxiv.org/abs/1908.05498))[4] +- [x] PSENet([paper](https://arxiv.org/abs/1903.12473v2)) On the ICDAR2015 dataset, the text detection result is as follows: @@ -34,6 +35,8 @@ On the ICDAR2015 dataset, the text detection result is as follows: |DB|ResNet50_vd|86.41%|78.72%|82.38%|[trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_db_v2.0_train.tar)| |DB|MobileNetV3|77.29%|73.08%|75.12%|[trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar)| |SAST|ResNet50_vd|91.39%|83.77%|87.42%|[trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_sast_icdar15_v2.0_train.tar)| +|PSE|ResNet50_vd|85.81%|79.53%|82.55%|[trianed model](https://paddleocr.bj.bcebos.com/dygraph_v2.1/en_det/det_r50_vd_pse_v2.0_train.tar)| +|PSE|MobileNetV3|82.20%|70.48%|75.89%|[trianed model](https://paddleocr.bj.bcebos.com/dygraph_v2.1/en_det/det_mv3_pse_v2.0_train.tar)| On Total-Text dataset, the text detection result is as follows: @@ -56,7 +59,9 @@ PaddleOCR open-source text recognition algorithms list: - [x] STAR-Net([paper](http://www.bmva.org/bmvc/2016/papers/paper043/index.html))[11] - [x] RARE([paper](https://arxiv.org/abs/1603.03915v1))[12] - [x] SRN([paper](https://arxiv.org/abs/2003.12294))[5] -- [x] NRTR([paper](https://arxiv.org/abs/1806.00926v2)) +- [x] NRTR([paper](https://arxiv.org/abs/1806.00926v2))[13] +- [x] SAR([paper](https://arxiv.org/abs/1811.00751v2)) +- [x] SEED([paper](https://arxiv.org/pdf/2005.10977.pdf)) Refer to [DTRB](https://arxiv.org/abs/1904.01906), the training and evaluation result of these above text recognition (using MJSynth and SynthText for training, evaluate on IIIT, SVT, IC03, IC13, IC15, SVTP, CUTE) is as follow: @@ -71,7 +76,9 @@ Refer to [DTRB](https://arxiv.org/abs/1904.01906), the training and evaluation r |RARE|MobileNetV3|82.5%|rec_mv3_tps_bilstm_att |[trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_tps_bilstm_att_v2.0_train.tar)| |RARE|Resnet34_vd|83.6%|rec_r34_vd_tps_bilstm_att |[trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_tps_bilstm_att_v2.0_train.tar)| |SRN|Resnet50_vd_fpn| 88.52% | rec_r50fpn_vd_none_srn |[trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r50_vd_srn_train.tar)| -|NRTR|NRTR_MTB| 84.3% | rec_mtb_nrtr | [Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mtb_nrtr_train.tar) | +|NRTR|NRTR_MTB| 84.3% | rec_mtb_nrtr | [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mtb_nrtr_train.tar) | +|SAR|Resnet31| 87.2% | rec_r31_sar | [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.1/rec/rec_r31_sar_train.tar) | +|SEED|Aster_Resnet| 85.2% | rec_resnet_stn_bilstm_att | [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.1/rec/rec_resnet_stn_bilstm_att.tar) | Please refer to the document for training guide and use of PaddleOCR diff --git a/doc/doc_en/config_en.md b/doc/doc_en/config_en.md index aa78263e4..40295dff2 100644 --- a/doc/doc_en/config_en.md +++ b/doc/doc_en/config_en.md @@ -36,8 +36,8 @@ Take rec_chinese_lite_train_v2.0.yml as an example | pretrained_model | Set the path of the pre-trained model | ./pretrain_models/CRNN/best_accuracy | \ | | checkpoints | set model parameter path | None | Used to load parameters after interruption to continue training| | use_visualdl | Set whether to enable visualdl for visual log display | False | [Tutorial](https://www.paddlepaddle.org.cn/paddle/visualdl) | -| infer_img | Set inference image path or folder path | ./infer_img | \| -| character_dict_path | Set dictionary path | ./ppocr/utils/ppocr_keys_v1.txt | \ | +| infer_img | Set inference image path or folder path | ./infer_img | \|| +| character_dict_path | Set dictionary path | ./ppocr/utils/ppocr_keys_v1.txt | If the character_dict_path is None, model can only recognize number and lower letters | | max_text_length | Set the maximum length of text | 25 | \ | | character_type | Set character type | ch | en/ch, the default dict will be used for en, and the custom dict will be used for ch | | use_space_char | Set whether to recognize spaces | True | Only support in character_type=ch mode | @@ -196,7 +196,6 @@ Italian is made up of Latin letters, so after executing the command, you will ge use_gpu: True epoch_num: 500 ... - character_type: it # language character_dict_path: {path/of/dict} # path of dict Train: @@ -218,18 +217,18 @@ Italian is made up of Latin letters, so after executing the command, you will ge Currently, the multi-language algorithms supported by PaddleOCR are: -| Configuration file | Algorithm name | backbone | trans | seq | pred | language | character_type | -| :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | :-----: | :-----: | -| rec_chinese_cht_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | chinese traditional | chinese_cht| -| rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | English(Case sensitive) | EN | -| rec_french_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | French | french | -| rec_ger_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | German | german | -| rec_japan_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Japanese | japan | -| rec_korean_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Korean | korean | -| rec_latin_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Latin | latin | -| rec_arabic_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | arabic | ar | -| rec_cyrillic_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | cyrillic | cyrillic | -| rec_devanagari_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | devanagari | devanagari | +| Configuration file | Algorithm name | backbone | trans | seq | pred | language | +| :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | :-----: | +| rec_chinese_cht_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | chinese traditional | +| rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | English(Case sensitive) | +| rec_french_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | French | +| rec_ger_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | German | +| rec_japan_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Japanese | +| rec_korean_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Korean | +| rec_latin_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Latin | +| rec_arabic_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | arabic | +| rec_cyrillic_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | cyrillic | +| rec_devanagari_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | devanagari | For more supported languages, please refer to : [Multi-language model](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_en/multi_languages_en.md#4-support-languages-and-abbreviations)