From 99698aed5480587e3d28e8f253b197c9787aa144 Mon Sep 17 00:00:00 2001
From: an1018 <614803115@qq.com>
Date: Wed, 12 Oct 2022 21:28:48 +0800
Subject: [PATCH 01/20] add_pdf2docx_api

---
 paddleocr.py                          | 10 ++++
 ppstructure/docs/quickstart.md        | 13 +++++
 ppstructure/docs/quickstart_en.md     | 14 +++++
 ppstructure/predict_system.py         | 16 +++++-
 ppstructure/recovery/README.md        | 82 ++++++++++++++++++++-------
 ppstructure/recovery/README_ch.md     | 76 +++++++++++++++++++------
 ppstructure/recovery/requirements.txt |  6 +-
 ppstructure/utility.py                |  5 ++
 requirements.txt                      |  2 +-
 9 files changed, 182 insertions(+), 42 deletions(-)
diff --git a/paddleocr.py b/paddleocr.py
index 6b4de93e9..44308a823 100644
--- a/paddleocr.py
+++ b/paddleocr.py
@@ -663,6 +663,16 @@ def main():
             if not flag_gif and not flag_pdf:
                 img = cv2.imread(img_path)
 
+            if args.recovery and args.use_pdf2docx_api and flag_pdf:
+                from pdf2docx.converter import Converter
+                docx_file = os.path.join(args.output,
+                                         '{}.docx'.format(img_name))
+                cv = Converter(img_path)
+                cv.convert(docx_file)
+                cv.close()
+                logger.info('docx save to {}'.format(docx_file))
+                continue
+
             if not flag_pdf:
                 if img is None:
                     logger.error("error in loading image:{}".format(img_path))
diff --git a/ppstructure/docs/quickstart.md b/ppstructure/docs/quickstart.md
index 60642f78b..3ff325520 100644
--- a/ppstructure/docs/quickstart.md
+++ b/ppstructure/docs/quickstart.md
@@ -97,6 +97,19 @@ paddleocr --image_dir=ppstructure/docs/table/table.jpg --type=structure --layout
 
 #### 2.1.6 版面恢复
 
+版面恢复分为2种方法，详细介绍请参考：[版面恢复教程](../recovery/README_ch.md)：
+
+- PDF解析
+- OCR技术
+
+通过PDF解析(只支持pdf格式的输入)：
+
+```bash
+paddleocr --image_dir=ppstructure/recovery/UnrealText.pdf --type=structure --recovery=true --use_pdf2docx_api=true
+```
+
+通过OCR技术：
+
 ```bash
 # 中文测试图
 paddleocr --image_dir=ppstructure/docs/table/1.png --type=structure --recovery=true
diff --git a/ppstructure/docs/quickstart_en.md b/ppstructure/docs/quickstart_en.md
index e0eec4b38..e771877af 100644
--- a/ppstructure/docs/quickstart_en.md
+++ b/ppstructure/docs/quickstart_en.md
@@ -98,7 +98,21 @@ Key information extraction does not currently support use by the whl package. Fo
 
 <a name="216"></a>
 #### 2.1.6 layout recovery
+
+Two layout recovery methods are provided,  For detailed usage tutorials, please refer to: [Layout Recovery](../recovery/README.md).
+
+- PDF parse
+- OCR
+
+Recovery by using PDF parse (only support pdf as input):
+
+```bash
+paddleocr --image_dir=ppstructure/recovery/UnrealText.pdf --type=structure --recovery=true --use_pdf2docx_api=true
 ```
+
+Recovery by using OCR：
+
+```bash
 paddleocr --image_dir=ppstructure/docs/table/1.png --type=structure --recovery=true --lang='en'
 ```
 
diff --git a/ppstructure/predict_system.py b/ppstructure/predict_system.py
index 417002d1e..bb061c998 100644
--- a/ppstructure/predict_system.py
+++ b/ppstructure/predict_system.py
@@ -216,16 +216,26 @@ def main(args):
     image_file_list = image_file_list
     image_file_list = image_file_list[args.process_id::args.total_process_num]
 
-    structure_sys = StructureSystem(args)
+    if not args.use_pdf2docx_api:
+        structure_sys = StructureSystem(args)
+        save_folder = os.path.join(args.output, structure_sys.mode)
+        os.makedirs(save_folder, exist_ok=True)
     img_num = len(image_file_list)
-    save_folder = os.path.join(args.output, structure_sys.mode)
-    os.makedirs(save_folder, exist_ok=True)
 
     for i, image_file in enumerate(image_file_list):
         logger.info("[{}/{}] {}".format(i, img_num, image_file))
         img, flag_gif, flag_pdf = check_and_read(image_file)
         img_name = os.path.basename(image_file).split('.')[0]
 
+        if args.recovery and args.use_pdf2docx_api and flag_pdf:
+            from pdf2docx.converter import Converter
+            docx_file = os.path.join(args.output, '{}.docx'.format(img_name))
+            cv = Converter(image_file)
+            cv.convert(docx_file)
+            cv.close()
+            logger.info('docx save to {}'.format(docx_file))
+            continue
+
         if not flag_gif and not flag_pdf:
             img = cv2.imread(image_file)
 
diff --git a/ppstructure/recovery/README.md b/ppstructure/recovery/README.md
index 0e06c6547..41fb3e45b 100644
--- a/ppstructure/recovery/README.md
+++ b/ppstructure/recovery/README.md
@@ -6,18 +6,39 @@ English | [简体中文](README_ch.md)
 - [2. Install](#2)
     - [2.1 Install PaddlePaddle](#2.1)
     - [2.2 Install PaddleOCR](#2.2)
-- [3. Quick Start](#3)
-    - [3.1 Download models](#3.1)
-    - [3.2 Layout recovery](#3.2)
-- [4. More](#4)
+- [3. Quick Start using PDF parse](#3)
+- [4. Quick Start using OCR](#4)
+    - [4.1 Download models](#4.1)
+    - [4.2 Layout recovery](#4.2)
+- [5. More](#5)
 
 <a name="1"></a>
 
 ## 1. Introduction
 
-Layout recovery means that after OCR recognition, the content is still arranged like the original document pictures, and the paragraphs are output to word document in the same order.
+The layout recovery module is used to restore the image or pdf to an
+editable Word file consistent with the original image layout.
 
-Layout recovery combines [layout analysis](../layout/README.md)、[table recognition](../table/README.md) to better recover images, tables, titles, etc. supports input files in PDF and document image formats in Chinese and English. The following figure shows the effect of restoring the layout of English and Chinese documents:
+Two layout recovery methods are provided:
+
+- PDF parse: Python based PDF to word library [pdf2docx] (https://github.com/dothinking/pdf2docx) is optimized, the method extracts data from PDF with PyMuPDF, then parse layout with rule, finally, generate docx with python-docx.
+
+- OCR: Layout recovery combines [layout analysis](../layout/README.md)、[table recognition](../table/README.md) to better recover images, tables, titles, etc. supports input files in PDF and document image formats in Chinese and English.
+
+The input formats and application scenarios of the two methods are as follows:
+
+|  method   | input formats |                      application scenarios/problem                       |
+| :-----: | :----------: | :----------------------------------------------------------: |
+| PDF parse |     pdf      | Advantages: Better recovery for non-paper documents, each page remains on the same page after restoration<br>Disadvantages: English characters in some Chinese documents are garbled, some contents are still beyond the current page, the whole page content is restored to the table format, and the recovery effect of some pictures is not good |
+| OCR technique |  pdf、picture   | Advantages: More suitable for paper document content recovery,  OCR recognition effect is more good<br>Disadvantages: Currently, the recovery is based on rules, the effect of content typesetting (spacing, fonts, etc.) need to be further improved, and the effect of layout recovery depends on layout analysis |
+
+The following figure shows the effect of restoring the layout of documents by using PDF parse:
+
+<div align="center">
+<img src="https://user-images.githubusercontent.com/19808900/195319853-045123c9-f542-4596-b4e4-6081708dfc56.png"  width = "700" />
+</div>
+
+The following figures show the effect of restoring the layout of English and Chinese documents by using OCR technique:
 
 <div align="center">
 <img src="../docs/recovery/recovery.jpg"  width = "700" />
@@ -26,6 +47,8 @@ Layout recovery combines [layout analysis](../layout/README.md)、[table recogni
 <div align="center">
 <img src="../docs/recovery/recovery_ch.jpg"  width = "800" />
 </div>
+
+
 <a name="2"></a>
 
 ## 2. Install
@@ -61,9 +84,11 @@ git clone https://gitee.com/paddlepaddle/PaddleOCR
 # Note: Code cloud hosting code may not be able to synchronize the update of this github project in real time, there is a delay of 3 to 5 days, please use the recommended method first.
 ````
 
-- **(2) Install recovery's `requirements`**
+- **(2) Install recovery `requirements`**
 
-The layout restoration is exported as docx and PDF files, so python-docx and docx2pdf API need to be installed, and PyMuPDF api([requires Python >= 3.7](https://pypi.org/project/PyMuPDF/)) need to be installed to process the input files in pdf format.
+The layout restoration is exported as docx files, so python-docx API need to be installed, and PyMuPDF api([requires Python >= 3.7](https://pypi.org/project/PyMuPDF/)) need to be installed to process the input files in pdf format. And if using pdf parse method, we need to install pdf2docx api.
+
+Install all the libraries by running the following command:
 
 ```bash
 python3 -m pip install -r ppstructure/recovery/requirements.txt
@@ -71,7 +96,28 @@ python3 -m pip install -r ppstructure/recovery/requirements.txt
 
 <a name="3"></a>
 
-## 3. Quick Start
+## 3. Quick Start using PDF parse
+
+`use_pdf2docx_api` use PDF parse for layout recovery, The whl package is also provided  for quick use, follow the above code, for more infomation please refer to [quickstart](../docs/quickstart_en.md) for details.
+
+```bash
+# install paddleocr
+pip3 install "paddleocr>=2.6"
+paddleocr --image_dir=ppstructure/recovery/UnrealText.pdf --type=structure --recovery=true --use_pdf2docx_api=true
+```
+
+Command line:
+
+```bash
+python3 predict_system.py \
+    --image_dir=ppstructure/recovery/UnrealText.pdf \
+    --recovery=True \
+    --use_pdf2docx_api=True \
+    --output=../output/
+```
+
+<a name="4"></a>
+## 4. Quick Start using OCR
 
 Through layout analysis, we divided the image/PDF documents into regions, located the key regions, such as text, table, picture, etc., and recorded the location, category, and regional pixel value information of each region. Different regions are processed separately, where:
 
@@ -88,8 +134,8 @@ The whl package is also provided  for quick use, follow the above code, for more
 paddleocr --image_dir=ppstructure/docs/table/1.png --type=structure --recovery=true --lang='en'
 ```
 
-<a name="3.1"></a>
-### 3.1 Download models
+<a name="4.1"></a>
+### 4.1 Download models
 
 If input is English document, download English models:
 
@@ -111,10 +157,10 @@ tar xf picodet_lcnet_x1_0_fgd_layout_infer.tar
 cd ..
 ```
 If input is Chinese document，download Chinese models:
-[Chinese and English ultra-lightweight PP-OCRv3 model](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/README.md#pp-ocr-series-model-listupdate-on-september-8th)、[表格识别模型](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/ppstructure/docs/models_list.md#22-表格识别模型)、[版面分析模型](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/ppstructure/docs/models_list.md#1-版面分析模型)
+[Chinese and English ultra-lightweight PP-OCRv3 model](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/README.md#pp-ocr-series-model-listupdate-on-september-8th)、[table recognition model](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/ppstructure/docs/models_list.md#22-表格识别模型)、[layout analysis model](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/ppstructure/docs/models_list.md#1-版面分析模型)
 
-<a name="3.2"></a>
-### 3.2 Layout recovery
+<a name="4.2"></a>
+### 4.2 Layout recovery
 
 
 ```bash
@@ -129,7 +175,6 @@ python3 predict_system.py \
     --layout_dict_path=../ppocr/utils/dict/layout_dict/layout_publaynet_dict.txt \
     --vis_font_path=../doc/fonts/simfang.ttf \
     --recovery=True \
-    --save_pdf=False \
     --output=../output/
 ```
 
@@ -137,7 +182,7 @@ After running, the docx of each picture will be saved in the directory specified
 
 Field：
 
-- image_dir：test file测试文件， can be picture, picture directory, pdf file, pdf file directory
+- image_dir：test file， can be picture, picture directory, pdf file, pdf file directory
 - det_model_dir：OCR detection model path
 - rec_model_dir：OCR recognition model path
 - rec_char_dict_path：OCR recognition dict path. If the Chinese model is used, change to "../ppocr/utils/ppocr_keys_v1.txt". And if you trained the model on your own dataset, change to the trained dictionary
@@ -146,12 +191,11 @@ Field：
 - layout_model_dir：layout analysis model path
 - layout_dict_path：layout analysis dict path. If the Chinese model is used, change to "../ppocr/utils/dict/layout_dict/layout_cdla_dict.txt"
 - recovery：whether to enable layout of recovery, default False
-- save_pdf：when recovery file, whether to save pdf file, default False
 - output：save the recovery result path
 
-<a name="4"></a>
+<a name="5"></a>
 
-## 4. More
+## 5. More
 
 For training, evaluation and inference tutorial for text detection models, please refer to [text detection doc](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/doc/doc_en/detection_en.md).
 
diff --git a/ppstructure/recovery/README_ch.md b/ppstructure/recovery/README_ch.md
index bc8913adc..eaa5260b5 100644
--- a/ppstructure/recovery/README_ch.md
+++ b/ppstructure/recovery/README_ch.md
@@ -6,19 +6,37 @@
 - [2. 安装](#2)
   - [2.1 安装PaddlePaddle](#2.1)
   - [2.2 安装PaddleOCR](#2.2)
-- [3. 使用](#3)
-  - [3.1 下载模型](#3.1)
-  - [3.2 版面恢复](#3.2)
-- [4. 更多](#4)
-
+- [3.使用 PDF解析进行版面恢复](#3)
+- [4. 使用 OCR技术进行版面恢复](#4)
+  - [4.1 下载模型](#4.1)
+  - [4.2 版面恢复](#4.2)
+- [5. 更多](#5)
 
 <a name="1"></a>
 
 ## 1. 简介
 
-版面恢复就是在OCR识别后，内容仍然像原文档图片那样排列着，段落不变、顺序不变的输出到word文档中等。
+版面恢复就是将输入的图片、pdf内容仍然像原文档那样排列着，段落不变、顺序不变的输出到word文档中等。
 
-版面恢复结合了[版面分析](../layout/README_ch.md)、[表格识别](../table/README_ch.md)技术，从而更好地恢复图片、表格、标题等内容，支持中、英文pdf文档、文档图片格式的输入文件，下图分别展示了英文文档和中文文档版面恢复的效果：
+提供了2种版面恢复方法：
+
+- PDF解析：基于Python的pdf转word库[pdf2docx](https://github.com/dothinking/pdf2docx)进行优化，该方法通过PyMuPDF获取页面元素，然后利用规则解析章节、段落、表格等布局及样式，最后通过python-docx将解析的内容元素重建到word文档中。
+- OCR技术：结合[版面分析](../layout/README_ch.md)、[表格识别](../table/README_ch.md)技术，从而更好地恢复图片、表格、标题等内容，支持中、英文pdf文档、文档图片格式的输入文件。
+
+2种方法输入格式、适用场景如下：
+
+|  方法   | 支持输入文件 |                      适用场景/存在问题                       |
+| :-----: | :----------: | :----------------------------------------------------------: |
+| PDF解析 |     pdf      | 优点：非论文文档恢复效果更优、每一页内容恢复后仍在同一页<br>缺点：有些中文文档中的英文乱码、仍存在内容超出当前页面的情况、整页内容恢复为表格格式、部分图片恢复效果不佳 |
+| OCR技术 |  pdf、图片   | 优点：更适合论文文档正文内容的恢复、中英文文档OCR识别效果好<br>缺点：目前内容恢复基于规则，内容排版效果(间距、字体等)待进一步提升、版面恢复效果依赖于版面分析效果 |
+
+下图展示了通过PDF解析版面恢复效果：
+
+<div align="center">
+<img src="https://user-images.githubusercontent.com/19808900/195319840-68fc60ec-ea66-4095-b734-0ec115860341.png"  width = "700" />
+</div>
+
+下图分别展示了通过OCR技术，英文文档和中文文档版面恢复的效果：
 
 <div align="center">
 <img src="../docs/recovery/recovery.jpg"  width = "700" />
@@ -64,7 +82,9 @@ git clone https://gitee.com/paddlepaddle/PaddleOCR
 
 - **（2）安装recovery的`requirements`**
 
-版面恢复导出为docx、pdf文件，所以需要安装python-docx、docx2pdf API，同时处理pdf格式的输入文件，需要安装PyMuPDF API([要求Python >= 3.7](https://pypi.org/project/PyMuPDF/))。
+版面恢复导出为docx文件，所以需要安装Python处理word文档的python-docx API，同时处理pdf格式的输入文件，需要安装PyMuPDF API([要求Python >= 3.7](https://pypi.org/project/PyMuPDF/))。使用pdf2docx库解析的方式恢复文档需要安装pdf2docx等。
+
+通过如下命令安装全部库：
 
 ```bash
 python3 -m pip install -r ppstructure/recovery/requirements.txt
@@ -72,7 +92,29 @@ python3 -m pip install -r ppstructure/recovery/requirements.txt
 
 <a name="3"></a>
 
-## 3. 使用
+## 3.使用 PDF解析进行版面恢复
+
+`use_pdf2docx_api`表示使用PDF解析的方式进行版面恢复，通过whl包的形式方便快速使用，代码如下，更多信息详见 [quickstart](../docs/quickstart.md)。
+
+```bash
+# 安装 paddleocr，推荐使用2.6版本
+pip3 install "paddleocr>=2.6"
+paddleocr --image_dir=ppstructure/recovery/UnrealText.pdf --type=structure --recovery=true --use_pdf2docx_api=true
+```
+
+通过命令行的方式：
+
+```bash
+python3 predict_system.py \
+    --image_dir=ppstructure/recovery/UnrealText.pdf \
+    --recovery=True \
+    --use_pdf2docx_api=True \
+    --output=../output/
+```
+
+<a name="4"></a>
+
+## 4.使用 OCR技术进行版面恢复
 
 我们通过版面分析对图片/pdf形式的文档进行区域划分，定位其中的关键区域，如文字、表格、图片等，记录每个区域的位置、类别、区域像素值信息。对不同的区域分别处理，其中：
 
@@ -86,6 +128,8 @@ python3 -m pip install -r ppstructure/recovery/requirements.txt
 提供如下代码实现版面恢复，也提供了whl包的形式方便快速使用，代码如下，更多信息详见 [quickstart](../docs/quickstart.md)。
 
 ```bash
+# 安装 paddleocr，推荐使用2.6版本
+pip3 install "paddleocr>=2.6"
 # 中文测试图
 paddleocr --image_dir=ppstructure/docs/table/1.png --type=structure --recovery=true
 # 英文测试图
@@ -94,9 +138,9 @@ paddleocr --image_dir=ppstructure/docs/table/1.png --type=structure --recovery=t
 paddleocr --image_dir=ppstructure/recovery/UnrealText.pdf --type=structure --recovery=true --lang='en'
 ```
 
-<a name="3.1"></a>
+<a name="4.1"></a>
 
-### 3.1 下载模型
+### 4.1 下载模型
 
 如果输入为英文文档类型，下载OCR检测和识别、版面分析、表格识别的英文模型
 
@@ -122,9 +166,9 @@ cd ..
 
 [PP-OCRv3中英文超轻量文本检测和识别模型](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/README_ch.md#pp-ocr%E7%B3%BB%E5%88%97%E6%A8%A1%E5%9E%8B%E5%88%97%E8%A1%A8%E6%9B%B4%E6%96%B0%E4%B8%AD)、[表格识别模型](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/ppstructure/docs/models_list.md#22-表格识别模型)、[版面分析模型](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/ppstructure/docs/models_list.md#1-版面分析模型)
 
-<a name="3.2"></a>
+<a name="4.2"></a>
 
-### 3.2 版面恢复
+### 4.2 版面恢复
 
 使用下载的模型恢复给定文档的版面，以英文模型为例，执行如下命令：
 
@@ -140,7 +184,6 @@ python3 predict_system.py \
     --layout_dict_path=../ppocr/utils/dict/layout_dict/layout_publaynet_dict.txt \
     --vis_font_path=../doc/fonts/simfang.ttf \
     --recovery=True \
-    --save_pdf=False \
     --output=../output/
 ```
 
@@ -157,12 +200,11 @@ python3 predict_system.py \
 - layout_model_dir：版面分析模型路径
 - layout_dict_path：版面分析字典，如果更换为中文模型，需要更改为"../ppocr/utils/dict/layout_dict/layout_cdla_dict.txt"
 - recovery：是否进行版面恢复，默认False
-- save_pdf：进行版面恢复导出docx文档的同时，是否保存为pdf文件，默认为False
 - output：版面恢复结果保存路径
 
-<a name="4"></a>
+<a name="5"></a>
 
-## 4. 更多
+## 5. 更多
 
 关于OCR检测模型的训练评估与推理，请参考：[文本检测教程](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/doc/doc_ch/detection.md)
 
diff --git a/ppstructure/recovery/requirements.txt b/ppstructure/recovery/requirements.txt
index 7ddc33913..4e4239a14 100644
--- a/ppstructure/recovery/requirements.txt
+++ b/ppstructure/recovery/requirements.txt
@@ -1,3 +1,5 @@
 python-docx
-PyMuPDF
-beautifulsoup4
\ No newline at end of file
+PyMuPDF==1.19.0
+beautifulsoup4
+fonttools>=4.24.0
+fire>=0.3.0
\ No newline at end of file
diff --git a/ppstructure/utility.py b/ppstructure/utility.py
index 7f8a06d2e..d909f1a8a 100644
--- a/ppstructure/utility.py
+++ b/ppstructure/utility.py
@@ -93,6 +93,11 @@ def init_args():
         type=str2bool,
         default=False,
         help='Whether to enable layout of recovery')
+    parser.add_argument(
+        "--use_pdf2docx_api",
+        type=str2bool,
+        default=False,
+        help='Whether to use pdf2docx api')
 
     return parser
 
diff --git a/requirements.txt b/requirements.txt
index 7a018b509..c90ca3771 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -15,4 +15,4 @@ premailer
 openpyxl
 attrdict
 Polygon3
-PyMuPDF==1.18.7
+PyMuPDF==1.19.0

From 8273983a9728342fa23506a4e2892263a1b68fd7 Mon Sep 17 00:00:00 2001
From: an1018 <614803115@qq.com>
Date: Wed, 12 Oct 2022 21:32:31 +0800
Subject: [PATCH 02/20] add_pdf2docx_api

---
 ppstructure/recovery/requirements.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ppstructure/recovery/requirements.txt b/ppstructure/recovery/requirements.txt
index 4e4239a14..d67e0a95a 100644
--- a/ppstructure/recovery/requirements.txt
+++ b/ppstructure/recovery/requirements.txt
@@ -2,4 +2,5 @@ python-docx
 PyMuPDF==1.19.0
 beautifulsoup4
 fonttools>=4.24.0
-fire>=0.3.0
\ No newline at end of file
+fire>=0.3.0
+pdf2docx==0.0.0
\ No newline at end of file

From d58c70223e57a004d55d54cc10712905140f3b4b Mon Sep 17 00:00:00 2001
From: an1018 <614803115@qq.com>
Date: Fri, 14 Oct 2022 18:45:39 +0800
Subject: [PATCH 03/20] add_pdf2docx_api

---
 ppstructure/recovery/README.md        | 9 ++++++++-
 ppstructure/recovery/README_ch.md     | 9 ++++++++-
 ppstructure/recovery/requirements.txt | 3 +--
 3 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/ppstructure/recovery/README.md b/ppstructure/recovery/README.md
index 41fb3e45b..209c995f8 100644
--- a/ppstructure/recovery/README.md
+++ b/ppstructure/recovery/README.md
@@ -86,7 +86,7 @@ git clone https://gitee.com/paddlepaddle/PaddleOCR
 
 - **(2) Install recovery `requirements`**
 
-The layout restoration is exported as docx files, so python-docx API need to be installed, and PyMuPDF api([requires Python >= 3.7](https://pypi.org/project/PyMuPDF/)) need to be installed to process the input files in pdf format. And if using pdf parse method, we need to install pdf2docx api.
+The layout restoration is exported as docx files, so python-docx API need to be installed, and PyMuPDF api([requires Python >= 3.7](https://pypi.org/project/PyMuPDF/)) need to be installed to process the input files in pdf format.
 
 Install all the libraries by running the following command:
 
@@ -94,6 +94,13 @@ Install all the libraries by running the following command:
 python3 -m pip install -r ppstructure/recovery/requirements.txt
 ````
 
+ And if using pdf parse method, we need to install pdf2docx api.
+
+```bash
+wget https://paddleocr.bj.bcebos.com/whl/pdf2docx-0.0.0-py3-none-any.whl
+pip3 install pdf2docx-0.0.0-py3-none-any.whl
+```
+
 <a name="3"></a>
 
 ## 3. Quick Start using PDF parse
diff --git a/ppstructure/recovery/README_ch.md b/ppstructure/recovery/README_ch.md
index eaa5260b5..5ef823d43 100644
--- a/ppstructure/recovery/README_ch.md
+++ b/ppstructure/recovery/README_ch.md
@@ -82,7 +82,7 @@ git clone https://gitee.com/paddlepaddle/PaddleOCR
 
 - **（2）安装recovery的`requirements`**
 
-版面恢复导出为docx文件，所以需要安装Python处理word文档的python-docx API，同时处理pdf格式的输入文件，需要安装PyMuPDF API([要求Python >= 3.7](https://pypi.org/project/PyMuPDF/))。使用pdf2docx库解析的方式恢复文档需要安装pdf2docx等。
+版面恢复导出为docx文件，所以需要安装Python处理word文档的python-docx API，同时处理pdf格式的输入文件，需要安装PyMuPDF API([要求Python >= 3.7](https://pypi.org/project/PyMuPDF/))。
 
 通过如下命令安装全部库：
 
@@ -90,6 +90,13 @@ git clone https://gitee.com/paddlepaddle/PaddleOCR
 python3 -m pip install -r ppstructure/recovery/requirements.txt
 ```
 
+使用pdf2docx库解析的方式恢复文档需要安装优化的pdf2docx。
+
+```bash
+wget https://paddleocr.bj.bcebos.com/whl/pdf2docx-0.0.0-py3-none-any.whl
+pip3 install pdf2docx-0.0.0-py3-none-any.whl
+```
+
 <a name="3"></a>
 
 ## 3.使用 PDF解析进行版面恢复
diff --git a/ppstructure/recovery/requirements.txt b/ppstructure/recovery/requirements.txt
index d67e0a95a..4e4239a14 100644
--- a/ppstructure/recovery/requirements.txt
+++ b/ppstructure/recovery/requirements.txt
@@ -2,5 +2,4 @@ python-docx
 PyMuPDF==1.19.0
 beautifulsoup4
 fonttools>=4.24.0
-fire>=0.3.0
-pdf2docx==0.0.0
\ No newline at end of file
+fire>=0.3.0
\ No newline at end of file

From f3f473d3f9b91e06d3c0122eff10e03030729d13 Mon Sep 17 00:00:00 2001
From: dorren <dorren1998@outlook.com>
Date: Sat, 15 Oct 2022 20:27:05 +0800
Subject: [PATCH 04/20] update CAN model

---
 .pre-commit-config.yaml                       |   7 +-
 configs/rec/rec_d28_can.yml                   | 114 +++++++
 doc/doc_ch/algorithm_rec_can.md               | 170 ++++++++++
 doc/doc_en/algorithm_rec_can_en.md            | 115 +++++++
 doc/imgs_hme/hme_00.jpg                       | Bin 0 -> 11789 bytes
 doc/imgs_hme/hme_01.jpg                       | Bin 0 -> 15278 bytes
 doc/imgs_hme/hme_02.jpg                       | Bin 0 -> 4928 bytes
 ppocr/data/__init__.py                        |   3 +-
 ppocr/data/collate_fn.py                      |  46 +++
 ppocr/data/hmer_dataset.py                    |  99 ++++++
 ppocr/data/imaug/__init__.py                  |   2 +-
 ppocr/data/imaug/rec_img_aug.py               |  30 ++
 ppocr/losses/__init__.py                      |   3 +-
 ppocr/losses/rec_can_loss.py                  |  61 ++++
 ppocr/metrics/__init__.py                     |   4 +-
 ppocr/metrics/rec_metric.py                   |  71 +++++
 ppocr/modeling/backbones/__init__.py          |   4 +-
 ppocr/modeling/backbones/rec_densenet.py      | 135 ++++++++
 ppocr/modeling/heads/__init__.py              |   3 +-
 ppocr/modeling/heads/rec_can_head.py          | 294 ++++++++++++++++++
 ppocr/optimizer/learning_rate.py              |  43 ++-
 ppocr/optimizer/lr_scheduler.py               |  60 ++++
 ppocr/postprocess/__init__.py                 |   3 +-
 ppocr/postprocess/rec_postprocess.py          |  33 ++
 ppocr/utils/dict/latex_symbol_dict.txt        | 111 +++++++
 test_tipc/configs/rec_d28_can/rec_d28_can.yml | 114 +++++++
 .../rec_d28_can/train_infer_python.txt        |  53 ++++
 tools/eval.py                                 |  14 +-
 tools/export_model.py                         |  11 +
 tools/infer/predict_rec.py                    |  69 ++++
 tools/infer/utility.py                        |   1 +
 tools/infer_rec.py                            |   7 +
 tools/program.py                              |  15 +-
 33 files changed, 1678 insertions(+), 17 deletions(-)
 create mode 100644 configs/rec/rec_d28_can.yml
 create mode 100644 doc/doc_ch/algorithm_rec_can.md
 create mode 100644 doc/doc_en/algorithm_rec_can_en.md
 create mode 100644 doc/imgs_hme/hme_00.jpg
 create mode 100644 doc/imgs_hme/hme_01.jpg
 create mode 100644 doc/imgs_hme/hme_02.jpg
 create mode 100644 ppocr/data/hmer_dataset.py
 create mode 100644 ppocr/losses/rec_can_loss.py
 create mode 100644 ppocr/modeling/backbones/rec_densenet.py
 create mode 100644 ppocr/modeling/heads/rec_can_head.py
 create mode 100644 ppocr/utils/dict/latex_symbol_dict.txt
 create mode 100644 test_tipc/configs/rec_d28_can/rec_d28_can.yml
 create mode 100644 test_tipc/configs/rec_d28_can/train_infer_python.txt

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 1584bc76a..b6a299ba4 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,10 +1,11 @@
+repos:
 -   repo: https://github.com/PaddlePaddle/mirrors-yapf.git
-    sha: 0d79c0c469bab64f7229c9aca2b1186ef47f0e37
+    rev: 0d79c0c469bab64f7229c9aca2b1186ef47f0e37
     hooks:
     -   id: yapf
         files: \.py$
 -   repo: https://github.com/pre-commit/pre-commit-hooks
-    sha: a11d9314b22d8f8c7556443875b731ef05965464
+    rev: a11d9314b22d8f8c7556443875b731ef05965464
     hooks:
     -   id: check-merge-conflict
     -   id: check-symlinks
@@ -15,7 +16,7 @@
     -   id: trailing-whitespace
         files: \.md$
 -   repo: https://github.com/Lucas-C/pre-commit-hooks
-    sha: v1.0.1
+    rev: v1.0.1
     hooks:
     -   id: forbid-crlf
         files: \.md$
diff --git a/configs/rec/rec_d28_can.yml b/configs/rec/rec_d28_can.yml
new file mode 100644
index 000000000..aeaccb6b0
--- /dev/null
+++ b/configs/rec/rec_d28_can.yml
@@ -0,0 +1,114 @@
+Global:
+  use_gpu: True
+  epoch_num: 240
+  log_smooth_window: 20
+  print_batch_step: 10
+  save_model_dir: ./output/rec/can/
+  save_epoch_step: 1
+  # evaluation is run every 1105 iterations
+  eval_batch_step: [0, 1105]
+  cal_metric_during_train: True
+  pretrained_model: ./output/rec/can/CAN
+  checkpoints: ./output/rec/can/CAN
+  save_inference_dir: ./inference/rec_d28_can/
+  use_visualdl: False
+  infer_img: doc/imgs_hme/hme_01.jpeg
+  # for data or label process
+  character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt
+  max_text_length: 36
+  infer_mode: False
+  use_space_char: False
+  save_res_path: ./output/rec/predicts_can.txt
+
+Optimizer:
+  name: Momentum
+  momentum: 0.9
+  clip_norm_global: 100.0
+  lr:
+    name: TwoStepCosine
+    learning_rate: 0.01
+    warmup_epoch: 1
+  weight_decay: 0.0001
+
+Architecture:
+  model_type: rec
+  algorithm: CAN
+  in_channels: 1
+  Transform:
+  Backbone:
+    name: DenseNet 
+    growthRate: 24
+    reduction: 0.5
+    bottleneck: True
+    use_dropout: True
+    input_channel: 1 
+    
+  Head:
+    name: CANHead
+    in_channel: 684
+    out_channel: 111
+    max_text_length: 36
+    ratio: 16
+    attdecoder:
+      is_train: True
+      input_size: 256
+      hidden_size: 256
+      encoder_out_channel: 684
+      dropout: True
+      dropout_ratio: 0.5
+      word_num: 111
+      counting_decoder_out_channel: 111
+      attention:
+        attention_dim: 512
+        word_conv_kernel: 1
+   
+Loss:
+  name: CANLoss
+
+PostProcess:
+  name: SeqLabelDecode
+  character: 111
+
+Metric:
+  name: CANMetric
+  main_indicator: exp_rate
+
+Train:
+  dataset:
+    name: HMERDataSet
+    data_dir: ./train_data/CROHME/training/images/
+    transforms:
+      - DecodeImage:
+          channel_first: False
+      - GrayImageChannelFormat: 
+          normalize: True
+          inverse: True
+      - KeepKeys:
+          keep_keys: ['image', 'label']
+    label_file_list: ["./train_data/CROHME/training/labels.json"]
+  loader:
+    shuffle: True
+    batch_size_per_card: 2
+    drop_last: True
+    num_workers: 1
+    collate_fn: DyMaskCollator
+
+Eval:
+  dataset:
+    name: HMERDataSet
+    data_dir: ./train_data/CROHME/evaluation/images/
+    transforms: 
+      - DecodeImage:
+          channel_first: False
+      - GrayImageChannelFormat:
+          normalize: True
+          inverse: True
+      - KeepKeys:
+          keep_keys: ['image', 'label']
+    label_file_list: ["./train_data/CROHME/evaluation/labels.json"]
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 1
+    num_workers: 4
+    collate_fn: DyMaskCollator
diff --git a/doc/doc_ch/algorithm_rec_can.md b/doc/doc_ch/algorithm_rec_can.md
new file mode 100644
index 000000000..9585dae0c
--- /dev/null
+++ b/doc/doc_ch/algorithm_rec_can.md
@@ -0,0 +1,170 @@
+# 手写数学公式识别算法-ABINet
+
+- [1. 算法简介](#1)
+- [2. 环境配置](#2)
+- [3. 模型训练、评估、预测](#3)
+    - [3.1 训练](#3-1)
+    - [3.2 评估](#3-2)
+    - [3.3 预测](#3-3)
+- [4. 推理部署](#4)
+    - [4.1 Python推理](#4-1)
+    - [4.2 C++推理](#4-2)
+    - [4.3 Serving服务化部署](#4-3)
+    - [4.4 更多推理部署](#4-4)
+- [5. FAQ](#5)
+
+<a name="1"></a>
+## 1. 算法简介
+
+论文信息：
+> [When Counting Meets HMER: Counting-Aware Network for Handwritten Mathematical Expression Recognition](https://arxiv.org/abs/2207.11463)
+> Bohan Li, Ye Yuan, Dingkang Liang, Xiao Liu, Zhilong Ji, Jinfeng Bai, Wenyu Liu, Xiang Bai
+> ECCV, 2022
+
+
+<a name="model"></a>
+`CAN`使用CROHME手写公式数据集进行训练，在对应测试集上的精度如下：
+
+|模型    |骨干网络|配置文件|ExpRate|下载链接|
+| ----- | ----- | ----- | ----- | ----- |
+|CAN|DenseNet|[rec_d28_can.yml](../../configs/rec/rec_d28_can.yml)|51.72|[训练模型](https://paddleocr.bj.bcebos.com/rec_r45_abinet_train.tar)|
+
+<a name="2"></a>
+## 2. 环境配置
+请先参考[《运行环境准备》](./environment.md)配置PaddleOCR运行环境，参考[《项目克隆》](./clone.md)克隆项目代码。
+
+
+<a name="3"></a>
+## 3. 模型训练、评估、预测
+
+<a name="3-1"></a>
+### 3.1 模型训练
+
+请参考[文本识别训练教程](./recognition.md)。PaddleOCR对代码进行了模块化，训练`CAN`识别模型时需要**更换配置文件**为`CAN`的[配置文件](../../configs/rec/rec_d28_can.yml)。
+
+#### 启动训练
+
+
+具体地，在完成数据准备后，便可以启动训练，训练命令如下：
+```shell
+#单卡训练（训练周期长，不建议）
+python3 tools/train.py -c configs/rec/rec_d28_can.yml
+
+#多卡训练，通过--gpus参数指定卡号
+python3 -m paddle.distributed.launch --gpus '0,1,2,3'  tools/train.py -c configs/rec/rec_d28_can.yml
+```
+
+**注意：**
+- 我们提供的数据集，即`CROHME数据集`将手写公式存储为黑底白字的格式，若您自行准备的数据集与之相反，即以白底黑字模式存储，请在训练时做出如下修改
+```
+python3 tools/train.py -c configs/rec/rec_d28_can.yml
+-o Train.dataset.transforms.GrayImageChannelFormat.inverse=False
+```
+
+#
+<a name="3-2"></a>
+### 3.2 评估
+
+可下载已训练完成的[模型文件](#model)，使用如下命令进行评估：
+
+```shell
+# 注意将pretrained_model的路径设置为本地路径。
+python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_d28_can.yml -o Global.pretrained_model=./rec_d28_can_train/best_accuracy
+```
+
+<a name="3-3"></a>
+### 3.3 预测
+
+使用如下命令进行单张图片预测：
+```shell
+# 注意将pretrained_model的路径设置为本地路径。
+python3 tools/infer_rec.py -c configs/rec/rec_d28_can.yml -o Architecture.Head.attdecoder.is_train=False Global.infer_img='./doc/imgs_hme/hme_01.jpg' Global.pretrained_model=./rec_d28_can_train/best_accuracy
+
+# 预测文件夹下所有图像时，可修改infer_img为文件夹，如 Global.infer_img='./doc/imgs_hme/'。
+```
+
+
+<a name="4"></a>
+## 4. 推理部署
+
+<a name="4-1"></a>
+### 4.1 Python推理
+首先将训练得到best模型，转换成inference model。这里以训练完成的模型为例（[模型下载地址](https://paddleocr.bj.bcebos.com/rec_d28_can_train.tar) )，可以使用如下命令进行转换：
+
+```shell
+# 注意将pretrained_model的路径设置为本地路径。
+python3 tools/export_model.py -c configs/rec/rec_d28_can.yml -o Global.save_inference_dir=./inference/rec_d28_can/ Architecture.Head.attdecoder.is_train=False
+
+# 目前的静态图模型默认的输出长度最大为36，如果您需要预测更长的序列，请在导出模型时指定其输出序列为合适的值，例如 Architecture.Head.max_text_length=72
+```
+**注意：**
+- 如果您是在自己的数据集上训练的模型，并且调整了字典文件，请注意修改配置文件中的`character_dict_path`是否是所需要的字典文件。
+- 如果您修改了训练时的输入大小，请修改`tools/export_model.py`文件中的对应ABINet的`infer_shape`。
+
+转换成功后，在目录下有三个文件：
+```
+/inference/rec_d28_can/
+    ├── inference.pdiparams         # 识别inference模型的参数文件
+    ├── inference.pdiparams.info    # 识别inference模型的参数信息，可忽略
+    └── inference.pdmodel           # 识别inference模型的program文件
+```
+
+执行如下命令进行模型推理：
+
+```shell
+python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_hme/hme_01.jpg" --rec_algorithm="CAN" --rec_batch_num=1 --rec_model_dir="./inference/rec_d28_can/" --rec_char_dict_path="./ppocr/utils/dict/latex_symbol_dict.txt"
+
+# 预测文件夹下所有图像时，可修改image_dir为文件夹，如 --image_dir='./doc/imgs_hme/'。
+
+# 如果您需要在白底黑字的图片上进行预测，请设置 --rec_image_inverse=False
+```
+
+![测试图片样例](../imgs_hme/hme_00.jpg)
+
+执行命令后，上面图像的预测结果（识别的文本）会打印到屏幕上，示例如下：
+```shell
+Predicts of ./doc/imgs_hme/hme_03.jpg:['x _ { k } x x _ { k } + y _ { k } y x _ { k }', []]
+```
+
+
+**注意**：
+
+- 需要注意预测图像为**黑底白字**，即手写公式部分为白色，背景为黑色的图片。
+- 在推理时需要设置参数`rec_char_dict_path`指定字典，如果您修改了字典，请修改该参数为您的字典文件。
+- 如果您修改了预处理方法，需修改`tools/infer/predict_rec.py`中CAN的预处理为您的预处理方法。
+
+
+<a name="4-2"></a>
+### 4.2 C++推理部署
+
+由于C++预处理后处理还未支持ABINet，所以暂未支持
+
+<a name="4-3"></a>
+### 4.3 Serving服务化部署
+
+暂不支持
+
+<a name="4-4"></a>
+### 4.4 更多推理部署
+
+暂不支持
+
+<a name="5"></a>
+## 5. FAQ
+
+1. CROHME数据集来自于[CAN源repo](https://github.com/LBH1024/CAN) 。
+
+## 引用
+
+```bibtex
+@misc{https://doi.org/10.48550/arxiv.2207.11463,
+  doi = {10.48550/ARXIV.2207.11463},
+  url = {https://arxiv.org/abs/2207.11463},
+  author = {Li, Bohan and Yuan, Ye and Liang, Dingkang and Liu, Xiao and Ji, Zhilong and Bai, Jinfeng and Liu, Wenyu and Bai, Xiang},
+  keywords = {Computer Vision and Pattern Recognition (cs.CV), Artificial Intelligence (cs.AI), FOS: Computer and information sciences, FOS: Computer and information sciences},
+  title = {When Counting Meets HMER: Counting-Aware Network for Handwritten Mathematical Expression Recognition},
+  publisher = {arXiv},
+  year = {2022},
+  copyright = {arXiv.org perpetual, non-exclusive license}
+}
+```
diff --git a/doc/doc_en/algorithm_rec_can_en.md b/doc/doc_en/algorithm_rec_can_en.md
new file mode 100644
index 000000000..f2bc645af
--- /dev/null
+++ b/doc/doc_en/algorithm_rec_can_en.md
@@ -0,0 +1,115 @@
+# RobustScanner
+
+- [1. Introduction](#1)
+- [2. Environment](#2)
+- [3. Model Training / Evaluation / Prediction](#3)
+    - [3.1 Training](#3-1)
+    - [3.2 Evaluation](#3-2)
+    - [3.3 Prediction](#3-3)
+- [4. Inference and Deployment](#4)
+    - [4.1 Python Inference](#4-1)
+    - [4.2 C++ Inference](#4-2)
+    - [4.3 Serving](#4-3)
+    - [4.4 More](#4-4)
+- [5. FAQ](#5)
+
+<a name="1"></a>
+## 1. Introduction
+
+Paper:
+> [When Counting Meets HMER: Counting-Aware Network for Handwritten Mathematical Expression Recognition](https://arxiv.org/abs/2207.11463)
+> Bohan Li, Ye Yuan, Dingkang Liang, Xiao Liu, Zhilong Ji, Jinfeng Bai, Wenyu Liu, Xiang Bai
+> ECCV, 2022
+
+Using CROHME handwrittem mathematical expression recognition datasets for training, and evaluating on its test sets, the algorithm reproduction effect is as follows:
+
+|Model|Backbone|config|exprate|Download link|
+| --- | --- | --- | --- | --- |
+|CAN|DenseNet|[rec_d28_can.yml](../../configs/rec/rec_d28_can.yml)|51.72|coming soon|
+
+<a name="2"></a>
+## 2. Environment
+Please refer to ["Environment Preparation"](./environment_en.md) to configure the PaddleOCR environment, and refer to ["Project Clone"](./clone_en.md) to clone the project code.
+
+
+<a name="3"></a>
+## 3. Model Training / Evaluation / Prediction
+
+Please refer to [Text Recognition Tutorial](./recognition_en.md). PaddleOCR modularizes the code, and training different recognition models only requires **changing the configuration file**.
+
+Training:
+
+Specifically, after the data preparation is completed, the training can be started. The training command is as follows:
+
+```
+#Single GPU training (long training period, not recommended)
+python3 tools/train.py -c configs/rec/rec_d28_can.yml
+
+#Multi GPU training, specify the gpu number through the --gpus parameter
+python3 -m paddle.distributed.launch --gpus '0,1,2,3'  tools/train.py -c configs/rec/rec_d28_can.yml
+```
+
+Evaluation:
+
+```
+# GPU evaluation
+python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_d28_can.yml -o Global.pretrained_model=./rec_d28_can_train/best_accuracy
+```
+
+Prediction:
+
+```
+# The configuration file used for prediction must match the training
+python3 tools/infer_rec.py -c configs/rec/rec_d28_can.yml -o Architecture.Head.attdecoder.is_train=False Global.infer_img='./doc/imgs_hme/hme_01.jpg' Global.pretrained_model=./rec_d28_can_train/best_accuracy
+```
+
+<a name="4"></a>
+## 4. Inference and Deployment
+
+<a name="4-1"></a>
+### 4.1 Python Inference
+First, the model saved during the RobustScanner text recognition training process is converted into an inference model. you can use the following command to convert:
+
+```
+python3 tools/export_model.py -c configs/rec/rec_d28_can.yml -o Global.save_inference_dir=./inference/rec_d28_can/ Architecture.Head.attdecoder.is_train=False
+```
+
+For RobustScanner text recognition model inference, the following commands can be executed:
+
+```
+python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_hme/hme_01.jpg" --rec_algorithm="CAN" --rec_batch_num=1 --rec_model_dir="./inference/rec_d28_can/" --rec_image_shape="1, 132, 519" --rec_char_dict_path="./ppocr/utils/dict/latex_symbol_dict.txt"
+```
+
+<a name="4-2"></a>
+### 4.2 C++ Inference
+
+Not supported
+
+<a name="4-3"></a>
+### 4.3 Serving
+
+Not supported
+
+<a name="4-4"></a>
+### 4.4 More
+
+Not supported
+
+<a name="5"></a>
+## 5. FAQ
+
+
+## Citation
+
+```bibtex
+@misc{https://doi.org/10.48550/arxiv.2207.11463,
+  doi = {10.48550/ARXIV.2207.11463},
+  url = {https://arxiv.org/abs/2207.11463},
+  author = {Li, Bohan and Yuan, Ye and Liang, Dingkang and Liu, Xiao and Ji, Zhilong and Bai, Jinfeng and Liu, Wenyu and Bai, Xiang},
+  keywords = {Computer Vision and Pattern Recognition (cs.CV), Artificial Intelligence (cs.AI), FOS: Computer and information sciences, FOS: Computer and information sciences},
+  title = {When Counting Meets HMER: Counting-Aware Network for Handwritten Mathematical Expression Recognition},
+  publisher = {arXiv},
+  year = {2022},
+  copyright = {arXiv.org perpetual, non-exclusive license}
+}
+```
diff --git a/doc/imgs_hme/hme_00.jpg b/doc/imgs_hme/hme_00.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..66ff27db266b5d4fa05d8acd95ba881bb8a1aec0
GIT binary patch
literal 11789
zcmbVyc|4Ts-~Y5&CylZdF_o>dow5}&m25fAv|3|Y$TBf?B-C&hS+j&9I?S=9lE$u(
ziLoR}mdJ=1qjW68J=C}}+jBY3_xa=Z$MeVUxs7?5dv4eAS>K=c=MsMwvrzKK>>TY-
zYu2DpYrqdmJc_bGtzENvt-h~az1FW=eV16je%*RW2}#L+f2CxkC8cDfBqgQM(lWBE
z7x-%fT6V+g2k{#eS{ij`&BrxsHlf6AC@dIU;@>gB4F7$tS-Wn%grpSsUo?22Rvyf`
zZXK9O0!#y*js)MM)^C#7ykqZSNre;VrFLFb+;=;@Kw9%iWsB0uK3L26!k^JHvdSu3
zwyJ9D=<eFR$HdgkeE$Ipo1?aN_70B6PMtpE;_Bw^anZ;3lAnJ-V92%5>tW#$H)8I@
z#>FQjCS_#ae~^{^FekULsJP^D>653=s;X;h>*`;=YG|dmwRd#B?RwWgF!*U``11&3
zf<5{5+tl}erf0Z|OFw?{mXTj8Vg^cjRcttv2>PcmY1jYni+WsmPmH=CMlm4X52Rb_
z4Q2#8Az3hqSL!M4jbcRJhg;NZpKsr4sQdC|IIAu}`6x!OuqLBjHDhsSeV-VmNMnum
zA_mkJF={P=Y|HhV+r&n{#{IF_+(()q$p=_`<NwMicKYTvT^}-rkGta8omjWaFX%}!
z9Ve2m7o*mDBHdz?L?+VBHj)arWb05^6iGxEA2->NZ}PfYrQhB0PhDk~kM1&sH*+rC
zjBwPROx-{6AZPj43FoDF&PCWrv=!o&BH4TssP&Zal^7+vz?Q6@NaN5dgpc?(b1DNK
z!lEvw?t`faPaY|<vGP@A=Jy}2!(vpmy+GblB>R)^CD5-yhG8R~HdlffXNonbd5tH0
z`Z0##AKAJ+Df~rH(cpa0*H?8JyNi5YC3>iIOI+Xe=7yBu$Rt=f7^rBg81??qM4HHb
zxK%?RrX)2<u*>mpnl0cD1%<OW3=2%-s*3`L@Ij&HL(vbv)#vU;g_qd7*x9Xla6fvD
z(t;ROi4klz7Rl;?@wUM)UQJRz5A*y6tq6tTA;(kcXNG)+YJ~ef1?O4yR(Bc}TwROb
z^SJX`LgUe@*0Z~^4vz2O`c05hMN;HbT=N!ul7|Y9kSM^i(PIj2H~bidl(}qPW|NBi
zuY1*<I$ak-za+o={4PV<w71Vz@x=e~a;=E!BvfEInP5pM?ic1p+Qr_Hg*J@?g9c_|
zRKS;jkHu!HmypsdTpi($KE}weyFtID^**o~N-hlkQ&ET$qvT(TQ7xfj)IS~2k`8iF
zjLHgyCH|d{&==N0Nx@a&1gcJaO@S$L7>?mxhM_hiMiCnuv!Kg`?#|aH<U1Wu-9_dn
zeqTKKt0v}&?S|`FGV0I6Hcc;YT(S?@mf^H0MhVnxzyfIqe}aJQ@}j+gM(cTRkb+5q
zEZKd41v6t=+~C?!w)9-n3GJ`w(gQcMGj2MRU_vf;VAG3V)dvjw9P!5y#VA%DR1r2;
z3G7irn9_8HC1=CST(nZawV@XjW<(}PPMO<u9RnKGV7&V<?%4Z4HYg6g1zo8TN_Uv4
z)PG9Ca!c5~%Hd_5&3gs~p_5dFK-Ho8@<0W`ti^}&8_VXd%cl-xcqW$I%g%DW?&o*G
zJtt8s)BPv^pcp0j7RYI<RoD|`;f)vtd*B&<?GWY%CfZv{G&W@Nh@{w~)HXce)^7S_
z@Z07K%s=XCUa6?)h3e;D3ke8pRkBb0Mz#^j){0S5x?rRuVpO0QmDao)4x9VVKOLwc
zf#Cl}T32vuo#Ce>tAnBC0Uy1*O0o*Vu6;0iU+vMeDdS6%3%@9(`F(~CkuFC05)ea3
zpg7FCA}nfB;dZw%>!=+V^=S=NuEgu{dKOhtfFCr{H=RX|24D6~<-XWKFjLMjeb~xD
z@9W;O*)ACuLSq#%VG^l@I*J#e*M;$KM)lulrmz-nsT#EkjXe-5bd`NENH?kWk#19*
zgU^F+*pchMO=smsG`u<`y*Ma+RX=mgQ@Dn&AR2*SJbq+ABvUkr8&%+W3G=RrQE|9x
zsb~}aC}qQ}vKV!-Vvrnmwbaop=iry?Rd1<U^>|-DBGJ0&N^UHNu9z=vMmOkP)aLrJ
zvr|R;1;9l*Xh|LmywXI$dEg?Xwi}d@3o)Fq<&dUjg@`N9>@|M*TxTRbd)0airSu;c
zSq&Fkajg3k_$iK+LX!aoQ%-@KX*IYBfAXPMaHjycE@In6o$K8bbO~1z*~oGJ?#I`(
zw7S#D*!%KV1a$8|k(qi9Ce11F-2{h#ow)<sT??nRQfdV|Ve<sdKC4q~CTCPy)DCHm
z6x#4y|MGIo8r1LjhwZBoru#Lr8Rr%M273#)6$&IIH){wfIKSTbhWMl=Uv~K+cYhD%
zhqt<}>Y$C4FKphGuJkn{VC%)C{o{^XH*Jll0@FB-V`Ym`ScsL0!(K}p>E$JJp_rfL
zBo8=}mS_;%r1Zq=ETiQ4u<P|_slzTq^V%4{-P?C~J&(Vtb$coHe?kSJ09a|Tu3*!0
zD-CwRFeY8IC+`hPwL)>KkuRQ3I+%3V#5rG>iEzJM6U{!8FI_4>)};lP?d$aC#z#A{
zbF3-C8XRi|8&6v&&|`~HBSevO2wx{0$NC<RPu~0;kCYW7!L5Cv{(YKGcVAvpI9G94
ze`-sS`+Rc0+G(}F<1U>DVqsxh9Aae8Dc~Q3T8;=d8F8HWrx7|EOJ}OiD35REp2aF#
z?+G*x7{}H&H`Sdgw*%@`US!;NS9Mf;TF*avvv7(0j~Erw3@oSxLu1{eRWdoH7x_*E
zfg~*OHsdloWP(NL>1pCqG-f07#jWKkx6zUEwk*wy(>Iz+H}u<V9yO_?qf*K_WsB=X
zn61JFvJBrb>WlXVVYWyK>7}oT(qT{f^2gcXh9mEm+`}_gzS)W>Chbi*<b=!D1kY{u
zI%~f_c_3YNKeXK+q;2XTFd<7SFBXyDAA?#PtrRAyBfXPpF#@R2gNzxtY*<T?2+tip
zQyVuOnsUjmJ|f8Hn)?wah5Nc!+~p@X7*DL~1wRLpvfN={SQ&`k#F&jEcRj=YPSYXL
zzE2A^(&mt#80B+hdFaoxuTpK37|uuZ)ch`?8##Ka4URj16OM~fF)wKWP%K3XnE1VB
zEkuRg<7usn)U)MukHJ~lBrA;&Vf@M3!TA^Cy6HY<eeM1`BQ)L3oFf9CD>}V|cAa+p
z2NEa&x7UW3d7s6o=pfNMS_Qtvz`R5COleE6)HF|w+Fah`Smb6;d`2`=J(!>87vgJj
zs>jlHi^UUqf{np@-}D^UZUIa!0&93cfqxgHl4<an1;(VaFsoUfcq8iW=72?_h7s57
zu}x>eKOeg{oEl3oFB@}e4!@YXetT1<&T`TYEg*;Uv|22kI?~BYN8Dc^#=Lw=Hxj~1
zllPbb>FL^nazM9lLEy(}*!Ee6kzeyI`;f<UJr6G@?WSnutoHKK^hu{tSJ7D-3uH3*
zig^bC(Q7WGNM6IPofgR>MibO6)|C@-%AUWid??uISzDuW<3Z=cf2!_eO=qEl(mRY)
z&2JVdh75oIkLxJ$^Y~l+nIP>cE{WtWaG|+cHdZ;D{s%XEE&?E!&Xtt7oNX@LFM&Fk
z&4H??a(t6~URFLZ4A#sr*?U&T04wy^Qv3++0Hy)~!Tupey*mY6qVV@)B*m!d#ShIe
z&>kH}q{#|M;QJ&-)cfnuS`(pyC!$+=SI(xhk6B#rty??zLuh=ezCK-|!D^F-)$rj>
zB{@!Bzt3nIw$QZnR=kCcQ3?Wm<OIk}L8DM>IT8R2OAcyvf}x41MmBCvP8E&gc)73_
z=oEk2tHl#!e$I8BUlc>WYsfZPk-tBcv4CicQE#xxw3=msDxQBzuoYPYFCXU;k`_e!
z4dy8mLhW^9-u6Dtip?+GE^2KpQRWToYt6}UIk7Ep{#^=EL?{Omzoc>p1PC0mM~q4`
zO_N9b@U+H{)>1ReD@va@8BRKR*BJSkfy45~YtP>w{qp6A*KZvMzWvU*z8;97m+U@4
z*aTeLRbW(v+-ze+3de85IERYipv%;g?k2O32Y(Vv>hn%#Zg@OCHGVEFZf~!`B=ITk
zzJ|gofV;^qH-6JB0RUV%@y1YrTwF>~fEhj{6bdy?VAsd(YKia-^R=~=$a#p8G3BeX
zjS!RU3BS2n7Jg$O7Cvr-7}&RfWkl(6rADoLivulAOSVqFy)VgEpDKAez39bB^1PI(
zJpHX^!uotI^F1z@fS4|@{oc?<G>k^WcuK+w0hWJ^d~o20(DM{Gs(Rw(^P0Cm#`d-D
z_Q@A92pqMCrqV@b!+V{!os{W6YU}WLOgFOtpch~mF)lR!1!NcYwv{>mc+`NkwHOua
z3YU6~L==x`$2ha`@}FhS?Iec#tX0z^KJ=LEYP|n;B=vx8@ki%vDc^zIuP)317`^_D
z@J_R3xB^c{7;PoTM&s1N(R7X&g)Suw{j?mVa-1HOdCJsqdafE}@+i6=UOk$-XA^AA
zRHeR8QN;fBZ?^D?r>8MgCKp<IWf967nsoDigx=bqn^MC)KY37%9;MSzSv$<R?t~p#
zxDaRS^rQ}Naryo}PVV7kOTRSg6=1|f(c2qooN0c9V2?0;Zmw3Q+$vxaO@!p@G9gk>
zXi(8n=Z?~l4cXf}wBtI|p5EX0>QeHa;^ZUdKlva8Y6;6}=Xw#{VfxrQ-k7ki>0s$=
zW~?cC%y{BMwMtCf)939^$KCK+b*h&O_g((hSYMr?lm0bYQnpP0S+OfOmVZP*<hpWp
zbC)}@HGZizLhAufE{H~^xXnbfhTrk(&&^$jPkJ4@_dIpJQtCl;T-dq4*S?Riid@~j
z>koC{LiS(-*EE!nF8)nomIrw_%;Sy*3V!`QY<ca>hy5Aq_NN_pyt&oB_mRWy9Y))U
z?za{PDdtNyqK$immE<tK57ctjT0hX+u$r$-fk)d^2pF=#<A`)eo~PW-dA1a3?~W8o
zZ=-p3c;-!e9!<+(db27LouoGla)~B@$G`=6sX&fTqx#0x;NG|>weaZC3FrERO;MTx
zdxzX1x^u<tzcUsY<BJZyu|IX<!rRPRw1NgYwGt}yt4e>W^LIwQg<xkava|P%kvejd
z<(oyBD8tK!TVqtnXNO6oub)i$uPug#zMVQvJB`8JZ|yuDUJ$KSzvQws2bY;VgZ{!I
z21bHy#(X?*$5Z4pY|I^RiBCT<>&d9dBcK0aV$ydwmAeOdjqCNeg#YVT_Zj+eL&|YX
zw0=e8GkmYC-WMDz$65iVra`P?O7-{K<j{#coJXH2HC02QV(40sN0(s`iF0Yl;9}Oe
zf3bviVqebIElR!@q_+?Mu;k;Rmfa95nF%}NQ^Y709E*yphguh;I$48*26NWRf^GfR
zoK!yQxO}|E_pOgVVpeGzQq~heDzhj^=bYRf`z2a8&XFb93&aS*Qj0z0n+DuVu$=(+
zVBz1Pcu+}Z3w9D1lu-zd?z+H)?+MH1^6?a&msz*aexziy@jU?K*&QFk$hU!604NC`
z!^c3d9rG>(=ZjJFQ5+GQfJK9G#HcDUYJwm}wW1&QTosm1MN&EAP<9D{v&eJj^gf`N
za%ob2t0Q!*21uq*`!4Smc=}IJL*^Xg(?v3-Fj^$Ni3NS8@#ygK+(CY@;5avpJ_b$P
z@D$t}COa1QDh<c|AkA?CY<n`A6EFK@&n*TT#U_27kH%%*t0H5?s0|=}NCt^f{cbdW
zj6l~=jQVw-x?T7hBpbvm6J&$AJT^@QYQaIQ0^Am>5bpJXIUJlA?%8f(@ZL<Nae>o6
z+Y<&03sbUELUvAF>$WSJ`p_X@6*ge*0LGvx47Oxb;+jnb2E$O50M7!+*qs=)ZBW#$
z2+#?wCQrBiQFo5_0W&96vtbvPjxF*I)XxiurQPr!!jk4SQLB4D)q`O@E-3U8oI<=$
zwW}tFg{fpUTn<yNv#851akL(c={XfyR<%EMx31gN1=&_*To-nvh<8R<M7x}pbe$rB
z^X-imZRhqT&I{^9??39RlB!yl+NBu5K5R=T->mQl^Ot<$aC@B^VseGRfnsVzxIkPL
z)IuHkE|Ttsb=fM(24uAfLM+d`S--CN=2dLMk69ayyh4NbAMZqbi~Z3(QWD>t`lU;|
zrod}5?X6&MA<YxunI5AwMi?C=HM7^SUmGM}D`!2xc5M|N>9&Ifq$@)W&DDNaNs$VU
z+s>_qH1#tX0oP+)b4||;T!v!As9*}Bi{YsAH;BG?^v;>Gdr{#ru3RaB*4lwL`#k<}
z{vQ6JmHn+?fcpI0?Wv`xPq)_ZZ7Sb3?%Rkp(>*Mdf?8y8zG*Q8*oJ^;)qdx6f*`33
zKc)cW(0*gSK&PBdlMftUyN@W^(pQVlv+n4$%XK=M&t+%085lCYJlmq(v|E1{?C?|2
z7xj^*DmVjjR3x{Pvvsl@hp!nDK$CL#w%nc<nF^gmxlca_vriW5X*NDT{Ow#p-sfGY
zY7IOUc#tM8kaCLvI1feF2@1!P{|DJ((CQ3}xqe2ae!0Ocv)F^_rAD3OKb#BRku>jC
zxOA@RCNcKUnZBBxM=ksOfgb`#X*tw%2CUvh*dz$$yUZ%gZ1T@dE#P6+`ZHoI>N5ZK
zR8W6FyFc*Z-m$JniT%2J)yf{_$gk0k0?yO&ga+ej>ViXHBzwUXLMla`d^iC9&dU^i
z1d@nvI{27rD~dg@UNZjR2~oMEICp<=uy&Ty1?_ql>}S|grYJmGNCT7K1p}0kK{Xhy
zE9w}!60S#&!p3wk;^V&#L`jj&MQ<DG`(IeM@mxOSRL6JK)m6pKZ*BhI&Fpb#dYHFm
z9{<&8a=CLMnO1AmL68@Hgle#D1~%>9`?xNDlS0SBPF?YNnYrf7ktZ^W4oT^q8v-la
zw*_nqOh_t9D{RE1K-I&Y%n@q4i%3FPWGxj=f1^4!o}bNYFi#;Hhn9Fb2li?aucUj~
zrS_>Lo!%(f-}myv>B+QJhLBdUfS8zzWaQv#k<ulw4G9v`Sct@k6iA4JDaUf+GGX*1
z<O}&m$dq<Jjm+QlJ+kzJXt-wds!k83Nj`Ydk9r5kn#Li!<BIrUK%z$$nx&ANM+Z%)
zk^<DADM>nD=*GkPecrRX!!LXamA~LTtw<%3He7A0!WedGx8bcSjizE1UFViN@F=Kt
z8911swE}W?VmXD33jrLS=cL4kvdJ<m$6(p4IqKE(&z%8P+L771{G0e!i{C&fF6x?E
z8)jE^GoHQ>OMx9ph`A<j1E`!lR~1}YS1`HCXm*M)nP>X+LxW_eEX$m|j;cO!EdROn
zYfYc;z+;MY1@ZHF{*`mSsgh$??jm#`gdM{Ae~n55K!r%YKhpVT1&VtiAU^SIvXIQV
zvSfdeNDPp8@Z8R|CHDj=NF`_2X?Lyh*=@&KP2m+jAT*r-ZffGX3Y64%OK>l}=VFUR
z>_RJ3c1E6vRWW_Yx=JKHw(~wZ@b!W2?a3_RgjwP@$M1z*rmdRgHH{F6`vD|mKY?ct
zEb4UBC_xU#dO)ZOYcaslnG&Ln2p!)i+J!u_8!F6yr?$J?u#i@hDjoD<UN`IA43Dug
z1??sIy&ibMDgM1%tDj5(j1UTH0ft%=b{Fzr*aU&x#rlh|5u}(P!EU7X8&b#Xp7=%&
ztmmmL_pHffOCRm;dFJWeb7i=%`;z(v_s1?tTehb<Li=$O12p(FbH$FrSp)bN(pfR;
ztH;v+(F9q-{8~zk_cnyjg5*Vg`3i%k!0GBVej6~f3M&ZMr`Rxf&8H!S8dTt$wYjP*
zSlf1|+uKW6i^z!;A>Rua+aMDYB^T8#S})9`vF;EgLoqS=>K{#h&DElNw}tO%^(^0G
zlFRVCK5&cH6diC07tB99T<NTpvj4vMUp>2ifh@UhRWwn3%qkFr!n}orAhst^;D_NC
zCrcAd{;{?QWC$A%zL)RR>?A!KRrB|DXl_wloU4vAMvunTJKVb)v7L|e21)`8Z&{0D
zRY&bXUbC@?=66oc6Mw9-0$!Du4U3$uBh6b^uBFbrxHzz2@|L9B;dt1d=<bu+zOZ9d
zI_+&Olx)2lYAFTj%##o=vTTOaKz{Ty!XV%AHH*!?YzmruwEuj?YvyLl2*;;qJ-p<S
zJIt20|6X(1^-rbiAGs8PDnK+6c8Duq6Di|0AU)5>MsP}Lj1~5IcxIc%hCzYn)sW{c
z#L{-F{_T9ZVjf99{c!Vd^|n`cO8Jl8DD4GPA7Mu4??HrMoZG^@CNzs<gRg6>KnhC3
zGXhKxcxjq780UFUuQ?hrRr@;m>6Y!g)9)GY%$ViB1~N1Q`!}UsHbVAfw@~Br^@O*Y
zwF9y8K~}nxXpWIL;`}%~)1j^}Wl(2+eA3n2T;Yw}&yu#y(~E~S-}mL@U39wSM5}>J
z2-s$nz!wzv`;d4ZS@^d|V@5-Xn;jdv&iBPX9=U6@n;vd>zSYt3!Zn|a14Wlo-M%_%
z`q{hui?fyf<xrgjC%NP4{7AAZvb%J)nGk1n7>=xcP57O!Z_iO#i7LA&@z?nIQ#&KB
z28MOly6HvyI@c~wos<JWJ*pmT!U&{hHgt=yL5w=rLlDWViZw_`5uE2{Ph4(@Y|n~+
z60n?AU+<E8$w1<93h!ooZD`WS<jZ_?U$|f0`5;B<yX&1*s51okPYUo=#I=<G<Bbq=
zl1OH$RAAM}eOz$9(0`>f<?>~N7ZmqKo5x4Hi0W<k&N4SQ5L56c8XsEP&0bxfWHs;&
zsN_bypv3$Qjz?N?6;Vvz32qnGSn0BRmBP{O9)77!Xm*f|?a&XeBWgl5_kNN$W2SXR
zWwY(o4VCw}$5Nlo%-HDd+8(X<vr(k!0*v&r7}d8<2%-U;HH`ap)f)gfiA&&qeAIv#
zRa+`ht%S>Xcab7?xwQdl(ytR;Zro|s@JHGPm#XP3Mt#DIa{V1HuHhGM|H#{~fBXGh
z-QQ07agM)JqD@@E0PybsN_^+w!Jvl`&IA-sK$9_SIn5ReT7u31*`6Cnl4g4S<BjDX
zq%!6=6B8~jMh$$O+vM3)8IkeDv&hP6*FDISSwVe6yN!eYNV|=b!Lf3!Wx04t87K~Z
z${nE#C8wdf=FU(*VfC{p?vy9V`1sI{?769n)tpB=CHJTHPIilACBOnU3V|=N9#gOc
zUb3)^e5Q29;)_SbfC<q{*FNsuu;YNlr=CF<`RByf?8N!UXsrePZw?r}^jvsF4{(8q
z=JFeLdtM$8O%YiFG4_L(@m#MG0FC5ymfNhcpEAZcBom@uZ1dod|GY!0e{4$afQdoo
zo~J2iMxxCZAWi@X|CK{H7AO<otCFQ`5~G*r&6Oyf`vv1!qP#-K@|k_!9-qlaXfIRQ
zi_(|p&u0wz3leMH-=|z&mQ5*2nA%0!qyjimWY4fb-MKV;ptX6oFb-nnVkOB~%$rEh
ziOA*KhE%}`R@q&2aq8*D0fw`No+mO{SE+0eIMj8p$|2>YqwFnBY?8?d+eju@4+wS9
z)<~I1wu^t(UM0bff&olzLtM@?vTHt%Gvvf~_f0S8-|nED3Yo#4XYKs#Z0l3BGb8TV
zY+wRL0rsLG2UI5O=RjJOaD|~3qY-pxbJ(qB2_*W%clP!4;<Jpx@ZK{!Itq%N!{y!y
zbM+ik3T$K#zUID4FNz2<@HV82WV69S3<qee*9~d(G-MA`pnzfHVt%+s=^@r?%mh6y
zx;<>|f#SIFq4BPhmT2PF=#}+JH}&4SoKSW2RgGObIxj~3rJ=t{Zs1ri$G34^N+&p2
zp54GtzLVS$;!UqGM<o4k?H5I=2hO~T4W^#YyKoORqn&hMZN+%P$cc|+P*}t`(pWW+
zlGT~36AS8O4H%+(I$T?RndZf%rp@`)<{#odsk80ZSh=bv<tI_-#%ud^rCmw%8z_4&
zP?8|{4<@6t9jYAecn#54K<@y{!*VPa&|dh1cWA_l`sTEE5fgECZLWLc##AR_*4$eM
zNDagU&AZ2C&du`IiiT*)B0Yc6_6Ecj?taUSy>lZtaTDKGpn7JG@!qBSt)}N)N|oER
z`|T;T2BNm?9TD~ugBaljo3eSpb)!Fpm(dAz3jzf`Zy0pSK({qu97Bsa1e<=pdp>BN
zT3MLho_}1gpEy{&IM#!<HTLRKvp-m+H@3xP|8p@41qEV0m-1c;lLV)mHww#Ev&@!o
z$@!ooyl(uXw%lzM9y5P>Sd`G+y*7O%9F246%?n<%b8#DSD|>29Ado<W+zGL2^DV(j
zYymmAFoBDuNz>|@@Z2yuPdR@Zm&6meFyTbzNXt|0*UKZ9s$YVt>2m$FQO1-h&X3WS
zxDlv%T8#QUD?;OdDdB{-n`F#enIoROX0BAcV6P6d8VNa3RW+oqzgK_B{4?k0E<?>K
zra@c3BQG?q=aQVx;xqdSsO<z3_JUTdNrDdVEn?I+Uvdkd7&T6-O%o_OvGXZhDz6;@
z{wEWk_=ARNFLaxMKD($w56*8}`f%^p_1|pNE_mjQUOm|vC9{2(1`gEYG*E$mrm>#V
zeq{7AfVW_D0Q=Q-<_-Cp@HrF+F=F^eB2@w}3%Siz;A474K=yZLJ6stw9+da(*SI~S
zB=g;p)sy<5j)mREc|}+V2&;ry^}Umn1l-5PNLth{Lc!*v0tNUH2i&PJrEx6XXjQaY
z6N3vwoy+@<RpY1LCsSTInq4%iy+Z5;C=f(|UEe`-Ll$cB5?K+k)fA50q@2ppJ_z*o
z6vH!S-(-y&3<lN61<$BQx%kkkYF{QAUdS<E$DO;<f3(pYC20+4fz`<05(Xk-J_SAl
zXt2}P8b}{sen!=vLm-w;p8(Uyo8)bLKb=d?V#LiRObp7gXlBM9JvU~3O-^<yF5BqX
zCkB@-9$7tBAOuWBjHmVP@C8=b92#g)0kq8Py+qwyT#N`a&0xlx;;PzlBSst(szNWx
zJbPc$pO>>tf?hl?PyTItIw2VZq<@9oq6>Z7O{>NW|B{M<e*E$y9D-W<n2UK(Dzpqd
zd}eR)a0Li6VA-jVG>uh&g>8(Ia4Jm_FrH7~#Q^wutG@}$!YMbGl|@+C#<nx#PDTV9
z1gTG$eWXK>$v|yWxuUgC-cB!?D>BMkg=xou9I9y*zIR2Ran;MMpfpJ1rEy;rJnnvm
z*tDfR{(Xr1&Jc?5l`L4lG!h_>zWK75QCF@#Hk?(JkY1EJ@QYhWyGRROkU`2?AlT1u
z?m-dKNw<j03^VVPS(vCDoXeNd7@8J%g%YnQz0uiSK9%JEg-qS{Wae0fF1Mo*GcxOr
zm`?PjHA`_ZoUIcFLHT|2{G{Bz$<7<fnSlPXw{v>J;4zfl&+F-Du}`1WPL4ajR635W
zWY)wB)Nh0AL7B_*<8OsplxaQ-@?@QU6B6b#199qz(u~}D3I5RqxYWZXRyXdLi^zK>
z=1X5GSPf?$oAJT#!{5WPG9cJvAqlFY36xq1qusn3E>56Bfosjf!`R=GLV16E`8@5K
zeVwE&tn%m>jq2oCUUzbv@G*l;YxcG`=-vf>ix`9n)Bxm@vFA|)iWHGFdmM{Ag-bi~
zx9`Kzp_nWz1NS*%k@J}1WES$pu;?-)+Ur+^rxtW!JiS;)+Ls7@z#!Ka1WM+j^iuvI
zXyOsTBFYVzSQ>0=gy_YC4)bS$1xyhrWKT*wE_m{b=J%3|zdirMa{VGZ!n3D&Fazzj
z@5zy0883Ky?!Fp3Ej$lMDp6EBsBQou1u~VC1{sH};f{jSE5!(PVv17EM40%he095n
z%0siQ%XZ`Jn|F1y)+c@_@VULrSu!k4zBi`~^vYsmM{a_K72k_2gOvt$_Xr%~5K^K^
zR=BHvT+P;x$TKy$uns7&q5iNh`U^wLtnQ=DwlqD+--h?(#f$CXIVVm+-_Y#dR;IdO
zBLdQXG)1w-XpW|U;M!CgbQ6B`*4nN*hHkjwj-?%bsm>W4v#+*3sC=L$Poxwk>3y`8
z5DnpKadR}`Q^3CU@tw$rkTBt^uTan`6F8~I(d657Em=`;b5)&FzoITThWU|lk&a0U
zt{fwSRJVJt<+|idQr9Anj`xMWRyiIgM%fKrdo*0vP|*<EmVVYo-^c1?(X)nzd+**!
zZ%+apIOL|vswy(vS5@&9sAA8P@M@7Pm`_5ByAT_STZ_z!WaYSYz_ONG6XpEuJ~itT
zRm|#n)jLu>BMLLV7FSvrYih22{`~NIRd_`>E`0VK))j0H5y|aGLSW+w!rkgg5`)e+
z_8g|pk?jq0AT5i$=c;*vu0Y=hFANF?I6s7&LlfFJ+yza0rBZ)i!FgdWE|AKw<e4L-
z0U(Z_Mf%v7b>aB~-2~3TwZa1a>D^YRgt=4J7aQvW&L!qEe8YVf2Q7ZvLjAPxxmUEY
zhs$ikYz$ylQ3T5CcvlgK`{<4LPOfB502{Y;R@Dk%;R_WOiMee4@&1CUSG9wp_m5?E
z#Ep-S#D9+az3QzN0R=+bJ#Yd7kLb9*SzuVwAyr@wIe`18gQg_{u}QqV;Y7h2c&TkA
z>go9jHU9NFOO{y(ZY1bMb3aGDY?oUu<2*XTc6&WwJO6VS>j&mGRGY@Z^Fc@pgy<$w
z{Y-glbwK4sU{?ua<Q?BZ^l_2gtJ}-hul&_acw{&>GCI|~_vQEAe^$o;PA8JN$R|Sd
zMu_z{1x>r`nYWIA3J*gGYWy4R2IOBQ{VzD*DrtO+tpzTxl(UT=hP{2Fvnl(X<S9OY
z4ie)a0)B~lyG()YXJ4y`WX|&)$wq_UkoHM56BAy4XuzN_#=W+$R_oKHA=1Xo{YmV8
z&$42lj2Sl5&{QP{^T@<FZ3=W93u%CtI7dZH`Meb{WMWj;xT3H!N|x+};7X8-9oS&e
z`%3N~v}Y}l%81dtzR%K0IWzy^eWMPpms1|>`9o$82H8%XE1RU<X2Mqqt(1{WUJYU-
z%x06MkQlEZ0$oF=0z@eSBOE&3RQ>Ar1>h;2q)<}GR^Q7uR)&Y|sTeTtG#T_{Iu{Uq
z3<q?`w@j}@){e!u%h}VfbUr>hy|uRQuK?v2HjNb1^0^BU(mQ@n{&q(34uJt|-rQcK
zOql~}kQ^p^2rI9V5c{9%M;1g1v;>iCWy8|c9q3X3@!~e~x6P->!#DKXicf}YNKfC{
zxUY1nh|8Z`?jSI-Cyf#ye;fnDRxV&uw}zJ;H*c74eEM?VS;t$?3Qn(2(z11!j5Bp(
zr<$ai9<@C&3o^bFm|I(n>SQ8%=AB~%UWo870f8k{@$4gGud<!WsH!t6E<;qnn3k07
zG%c^C=QxcYJ+w<D@YNS-lqefn7HxQma9DVT<plN)S%H5PYHdeC*jRaI(2l#=zPrc+
z2`0$~95-#GR2SE0S9yIIJ373VUM1x)m1J!8`Afqu&<RqX04CIWg71c`mbY*wAV2S1
zoB&N7%;$*wZ3973?dQ$P!p^V%7~ulHKSv}wTR6EHfCCZ=tpm)$Df1)$9#UAr@pb?e
z0D#nD3#g<hrC}>HUsW`q8h^(GziDO@bEd&0*O2m*QQNPZ%}Zbp8hm%q_W7L3T_51G
z%O-1+ef<|4d)ESDNE2esMHh`WK#NsrNRp)pO%<c;d&H=R-E^$rId!#+>!sbrDvI`l
zsV-RS2u>oyT%$IP-?(N^`M4P@BTAF|N40eU6!vXqW&Pe?^$Lw$I&X_oO}}&o7Cw1{
za{Vqf7Yc0uu<#}AJcVIFV#KgWoeT0%>)|rwb||DeLcH<1XDmK>YV5#0qI^vZhc6lk
z;dfeJ3%at{u62@gFRE9J>QIAP#wnHc0)WY6#i$@mY!nutT2F$flo)lpC$tc6Liy1@
z>GYB!@+f9<_X<`)S0-37cq-Tk(I%yK#z6s;IA#DW>>Y7+MA9bwUj#RqKXtY`>0nUU
zl(D7mEH(X#D&utO<wO1J{c{fxreyzfQ0RZc$N%$@dPw*H3V@QJN*YI<MPsC~9+&g*
z3W!>p^2&>x=7`C-*>2Lf$JvO@_L`&kO&Kypu7rJCi;rBHT4j96*#b33(8~ua#3X>O
zFrAG-&Qf3?>`K~%1i0ZI!lTs8Es=YT`M568=pw{+<<U5pY9VFWnv}_ZC%Ojt6H=n~
z2urN3kx6iTXoynEAV%T1aiwhMOT&(dleKX(D>F_{Pb#I44Q*enIR0Ty5#^qyyOJja
zVJw)b1huRKO#@2-7O~}T3CGbXmAQe+-YQ&5s;g^QL%$(agVWUL>EP{DpEa)L{!{O6
z;<*MQFQ;f}=U+9{UoAkRen7+<MQ=gcCxC_|)Z$HJB{4O~SgPP)1)w7)wsW&vdp5dd
z4BMv|IM0_D{1s<wL9X=dCS6%EXgkp*TQj>A|Kp~r-~<(z+ypod1-ly|Cvflw3Jg;c
zuyC3f_0fg{vOqBK*kK0ktNFKo0r~S^(ByZa>pb)+YQFi<zuM8OKz4%`ailO;aOUcN
zAJ$2>T2nqYy>LGCIilA#>~Usr=sEZ8(1OA91kR@L!awhD3{<!I+U;LiErmusMQCHN
zBVDkgM6&*XNH*#-4d6W;jV=9x!^-s+A49F!qG{0?!4>>k=P9=ly5<GFoZ)~ArBM!A
zyRpChZ$~%(bKkoF<j^s2)+zumJBp|PM&yNPSGX8;v}SO#XxI)^y+74mhSW+NH9hYn
z&G^~g*Q)yain&C3$CXYCY;`<mmhUF&!lJEp$%?{!ksj2Fn@CH}$ExbKr5N*NTNn7=
nZw`l{;@`iDb_8)EH1b;G-~OY^uRfePh6n+z_J4Rm;*tLXXr63Z

literal 0
HcmV?d00001

diff --git a/doc/imgs_hme/hme_01.jpg b/doc/imgs_hme/hme_01.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..68b7f09fc2f330ee523ded27a14486b3c92763cb
GIT binary patch
literal 15278
zcmbum2{@Gf+c!Qf5^|5S71flrvP?y^V3LreNy?fgAzMt6jEuSNls$wSr83!-kYy-j
zvR9HUSw_qvL?$!TxR}NNtKaW={_pc1?{gf_d%R;ft{HP(*Y|rapYuFF=Q+!R%M66f
zNee3r#Hv*Y#47NESRO{0Ay%(id93_hz4BPQX63Kg+O=!et`l3g?$58d#QJsO65{LD
zt(RIaA-VDZM;oLhH>|u^ev6P=kGQ*9eATLrh~*Xp3S3<5&o#jv{`FY3dd*s~b>iS$
zDewV`47lf-HQ-KS;5OjX;o$#>wHw7Y?c8U&PS(~<TqRI$|Lx@L^{U6p8|BY-bJX-M
zUyqQGR8ZW!MM+&lbJy-Y`UeaQ4<0&fcHG>;@`Tk%yL0F59UPsU-90?7czOHy2Hgn0
z8A1rXb?0tmRCG*iTuSOcY3Ugc9%km`=H(X@7Cn1jQCUT)u6gykwyC+LwXOZ#`wu<6
zeP8+q28U=&*4Vf4iOKI%)7-iFU%z<^@Z!=k4Y7VjY#4+H`koWF>;L&FOcZ7;BN(5s
zG^mxlVVGsep5oiX7{=C`%*eE;gr^4{o9w7HbTBOOlz-D#J~~V@*X}jW9L|&dTGdG?
zPnOH=2r%P;E8IB@uJU{tAubwtE|BUYp#(0yT`_7^Ju_9vP15M|VaU-3Wu<O?{Zfwm
zpKT^wu6vR^ai-v${e99U;#nBW#WF$?XB!j*ey|S<DGrmZ=crYJL#!uC;U=QqkFgp_
zoi$H3omgm9yRJK~oc1NkbX}!DlZlZhEVMd%-6l#gpO`uK|1LSEEUj$Ob2V#XS~Tso
zu@%3;He&9>&ku2NQhIw4oBV!bn0UdKnq`F8HTX8e8l_06Y$1yaq>Ee^sotELIHG6B
z0b%-kSH;e7gSMZ2v*$OVGmbDl&J17Jj|Q7;^v5tplkQPDmN-cm%M##}Z!9BXaY&*~
zk@R+GV|z}5$`70Vli0(~R#yh}GUBWWYRQ_37RuQV)lQU4VHcU^a<>1|XE=Eov5rgd
zVhqG%7**s-Q3jVbiH$!V-?nLVu)45VdDo{(fA^rkyj}NCJjy@(*#6kT>&JWKVv1uW
z4U3@0KBx-A-p0j=B!7?1-k~cA?CP4A5z;2jZVcoG$LY=>w|qxepX!jP8y{sZPyFai
z^yyHoFc=Qh=Q_XrJr_L_4P>-SSig+8^u|J}YY4@A&W(;>@9fj0;*n0i7U4=0(|&>e
z>Mcv2Z&N-^*lo&*PFM8Ycl?8yMI;nLWn^JEw&*B`6Sxq0qim>IBpI}fSR2-7WV6Xv
zF<l92Ba0Dye+$rgBNhqIF1R%0dALxKCqEDST>B?#?q+>xvd6~bKa_8x#DP(bmcg1R
z!A9A3lJPPkqKJ5!i*7;2n{0;9Gk*;b#Toor$@XV-m^3lEqnG}w>Ri&9*P9YJHfr|O
zJW1!!M|*X~sjO&$B32|>SOiq)K=m(PM#PK$;-c=!G6UiZX+uOkPIUaGn(9=ugc2Rm
zXq%LiXMATCS3S94zfqFXaPD45_e4x3SW3}F#4;i(7+Cvhj63!=$(UOf{f%oFW3t0+
zOh?nYysz>=TUwFx588udmE^OQb1MDXDo@f*TIx^N4H{itM%>f?AFE%^!IVR0Wh9Ek
ziFTZUHfm(;EDskeK(VBB@XeXzq08i%3|?v;Z$30gt6k&0y9I6f!(j7j33)z{(nbTQ
z@gP`lHf}Y<JO?%%2r(X#Icd{N4uabUmJyNF<?lt+KR!L0DG3hu56=;#_pcY3&(M8`
zG#`$)$2=l)#0BzFBI$#|Cs4H@jDr@f|H56<vRyN^dBLFeVk&q4rvb;FQjH@Xv1&?L
zlp;%?TE|?wVEL+4iv*1?*E*HwP?TB0_Cvsibm1>WU4^8Mi}D<((RE|+dR=Xeir(CT
z-((&C57z$1{n=Zp(lsAuo^h{;IxevZzvG1G;xa-c0en;(308g#Y$J=j1;{)mv#`bA
zjP)~iEd1Dq$@Z+R@T0O(-1{OcL<EvDJZhzC?HA5FsL<_Ya+T!Vq#hCIzq);i%)KL8
z9|>%43@9y<7{Wlqdkkl`ahpXl_zi@C7H|vcHO!7}hxMs`|HO0#ReV)U!uEeG*i|^X
zeblqo)zpY3(MJMWzw?yJ$XiBKCb4t*mIQI$72!RH<^_x-2|*zJIR5!JUV6$8?-m3d
zG2Qm@aA+J>!_{E@)vIo|bK?$ZXGm{JUl(Dvc>BNgefcF6Sfe$t@4u!ooO4J?xRRF-
zbGegP*olK;_CLMEb%^_xK2lodQFB6fwq#82MEMlg+#UU3VPREnY<~1rLj?Alkj}RR
zBHMyt++Id-&R~Yx$8Np`*S*Na#bB6ML|r1(n>uO_QOuyGbdV6WjJP`IJb3%rB9(^n
z7JaObp!x3)uPa^qLf7Ga0u|v6VoPZ=?-sn9RYtQLn+4bO!mOTF3NNSu!rbhWrQBUg
z@ws@!F`%q8ml8YS-92&{xhH*3Vx#F!M@~3KB#8z09u($M8EMq2Yr=<ApLCA!Q&|v`
z?~1qjQm^y;z?Z0ew<A`YGgZ|BPd&O4!BTL#oKSQ0;~I<ZccNAQt+4)$p$^|6flP>K
zeFhT;?^%~SMo&Uf(5>NS%z?L1WM&vOPH^7$y_Y(>yC&UTPI2Jcn<w@5<tkrCIy883
zhja!yR!??CPYZ#S15=Awk=rpN2|R#?#%;jlN-@LRHRGF?3i37YH0*ru6Ap!b`;g8t
z;@xj{gK~$Zl2WW=ul_XD`MBTW?p0aR3DE$x9AabmVggSxL!2GRxpRhti(~dOsIDeF
zf>+{1|D{tQG8bIx?_X#+S*sX3Vt*n(A%-yGFuRO^H6Ve~Bp)m4UJ71l2d{S}<`$7Y
zVZ@1;!Y2;r`ixxsqzft`Km0Y;Z|OIEF*gXgH8rnpzG*mGw0-n=#DwNeYz&5xhmlxD
zctE$I>Vc#N8B!B<b1)Jm(Bbg1@t({s#p#f8SF56T)%U~8?{ZAn+O^b>eG(7lBz2_d
zTUsPLc|gdORDv2rWmIn0=z|!K=^X#EnS7wZzHa||x11l4{4aiZNZS%V_Qs<?wT;c=
zKW>#iysn<0QI)Rab-sL`N}HPFOkSeX_02Zp!1_G9T5#K7NF?buhKp9x-@c=s16){y
zq}!A_`OxlJx%nI@&A#MMk$UHug)!`yzd}jKu`00WJH7F8=r?Q(n#y>z{RrH+5~;Z%
zf=%;03p{(<vEDK97!daT=nB5cW);n+>gtf8Vf!tf6YH$4G&4TL2Q6MziG7lyw*-L`
zbKr8Y{1BjB6QVXAR%?HYgEhz?3dUYEu7fe*KE(n@E|PbgHXB*?d%Z^DrzO_R5H{E(
zZwpP(WRuw}mkxPp$;G@R$#k2`Is6O}Y<&X~4icTru_h1ciKNyrksDx)-9NC2GBQ51
zE8<1gm5SQFr&Y;1qvvEpZyGp1R!Vzl9iPy@;P8IEw06%Jr|mxzHmZw26U28nsL`R`
zlIg0~a=5(}d3#B0L`0Tu?T;)b)Y*lV9JV~#>96NJm3p+N&B|h*wVLNYGa(oD{SsnD
z(l5Zaj)>Z`j&M(>fxZ3(UQQ7p(QXL&KKL1|+#G!Z$U9bhVlJmPp|fh?R$6P@kISt(
zm#gLr^p3f^H(5;rCAERyD|DfHJn!#iMALO(iw{qJH`xs}*3voVY+QQ?ExHj4ALKKZ
z5z&HunrXs^d4xh&&O|wckGXF(F>3n4$<koz_`N#+G1`lwkTl(|%Lsv4ERcG|G9v#m
z#tXa@|GbP?%qzVE5-+78{%S!WcO<Ea9zo^=qJefd6UgW0p-Q7eOgz%ZIZ{Z^knPk=
z!;8mcSs9pbO)>a#<ouRIga-8>FmSL~n<Uy1k%SB15;^pU_YjZ#I=x_mqV)fI;?hLR
zb#?T$m#s>U&Dn1$_rBU&cc*9iuHhRS-s<~rO=A2TH2#<POIk_jHUVE}U?suPUePK-
zB~KS#M(}q`EhFSi6u@Dd_*t%$<ukPqdtS}WmyXld-unc}r7u0k`r-fT@bdH2*056d
ze6Tru8PN=28xQL*Bi`b+cnF`06re`KWrQ~n1^k{ORX6M(@^-sM?Ps-@Dc??1>f}s~
z?upc9d8D6c*{dD+BEbE;#N5F2GGYe!uoxcb@)p$S@q3#cOLL}~L&es@-JED_pDt0J
z*>&%*XV#+^LzW={_pL&_La(An``efwZ)WHXho)E<NIX@s4t})Q4K4v-t??3OB`2@8
zY{e)7K&ggW$nf!B%Lpk##E;`#R1<3T2kl>4F8o8xIM$XHy;HRGj=GKeDIXWKw${<U
zw%rpWW^+*>0`NIOO%0p51FiJiqkB1}v19&5bXd=cs~HvW!atbrnry|3-+d+F?6gkm
zTRuy>?#jK%zg`2qN1YZvTShRC1M%2`Uwy$*kb8J8Bg_>=GQ+boXn2XQDS#Gnz)q6L
zUx5^VQ#D6f^`f|4Wk-dn4PxM&Go(HcqA^#J;FHixaIMS4quurUgf%==6^KW|h9zZO
zRp^nic_P)2)HzibV5oZ?1L7!Sj-EsdY3YLZ7~J7_HIrp1(#Lw4Wlchp4kwNv=~{KI
zMP+DUV7Qj$*1s)0Gu!-kXL;AGI~Qee`ST365CADw0xtd&8txE{Lim@6)|^k-OfpS%
ztUp^UI_g)r^LtipcWGYSgt5}7_hVO=SbrU;DmcnI=8~6>^gX7>@pcT*{Wf?vrwfEA
zaM;bA6)0kO@A~xlxQ#?JPS_NAz0sCu9bttR>CTHOo%l<JsuJp7N_BL+5|39uyi`H4
zF5eyH5}CwlXLYrrw+RB_LKZ#&lB3YsSXsEaMX{D1^BYsqDo&gIIT`v>V~%+)(Rl71
zn<9TcC=!9tA_G7#`#K97nw{yTGV&pg<t!~;w4OSa;DD{|XY_s>XxQA$f|R~mZr$(t
zG|XN8#g0y~gs4h)pHRJHY6;)I1gagn4}5vUN+R&*xr7l(0AO89h55itufY6b(I=>q
z94|oS{J4d_^V7&GIC#wU*fi}6+pV^W;$cCleigJk>|Ko5YPQ3X+Xz(8G6Li|jd@g#
zGm3VHl@-gH9aa=cN&~2BZNLsy4bRH`5E*e%F%(zKN!iL;7xmxwNcfD2E)|!DOpB09
z7ma7eFu>FN9R!<HnYcD5BQ5?}aKUDb)I`<iV5vH!Jd8QjcLRy^+v|E<EoS&(>2JSs
zA8m(K#Fmty;f?hKizB^Bz%GGn-Z5B4Oljk)#6_|hkl=%ZDGR*?YRo`YYTgEDw9Sx-
zX+zR9&t$O4G}7+g(%=qdIB*w1G?+T_y}D3qcOs_ZEy?AwXY>8JERY^XWJ8UGsLC=P
zJ&6R&wo$2J2jNc39Dy`3VBJH~aH9QCI^WWMHCgSBT|Z?#+t6ChHz21v#k)I?R=OoO
zNTebJp0SZrDUu%$$mWV9v^Wr>hDJ0M=0A54QrXVu`@b7K!_6G_EXasYpJPYUlpGd!
z4BA$dpvIHfVSH~=?=qsMu+2$?5e=Xee<PI$rT!*TVtwdKQR~?yZfh;^VsC0K#j1{0
zzCMvGpO@~mSuuI2z5yuqjTz5O7{840Sw;-^j%lM17!T;KQgnimg8|u?@58feUVF3u
z<=-B!vRKTq2-*%O8Kcs;9<YAvcX4zqTq%+U)*UfMKsVKr?*%6fTAs=5wd$RCQyXo?
zZMY5H;I>2#xlt(8l(2^<&MIt9{&uq|r&0#lvR#`)j;j}Am0;gC90tw>Oh`BTKvZR}
zZ$H*Q*ezJ<!FWyPZUiCU(%!;iRoXuhtK`Z5B_#}t7Rcgw^?lGK>Tnm+fgR`-R%GVG
zpl)`#K#08YNGj}6@e`%9{=biSyT4Y^)9%-EJ(mAavg&r)*rja%YD9k>5av)X<7j-5
zr0a81AN6Hdo4;8bO*5e$mG_lUDq66tD^Kils!phy_h0L9vwAKj^2w9CcawMQx6wtR
z>XLYTkwhg&oM#EPPlg(00BF!@;vz{M&O+n-6mE4Dzhl@X!Ca-fSGBiFf!duGyd@x>
z{F3q}A%Fa<#tHUh)(6!+4`kkkf{ahoK_E|M3<wmXo2XS_Uq~jVNr+)YM~c5)6ks^y
z(@CccL#%R7XV7duWveN$w2nBI<kWV{>koeWp!e}v;C>=NTitWE=HV;;DGn<F%-jfk
zG)0&tShb9h!16FWsAWA7%cH&&=6)2lM7es+@IwlWW?zOX&#hqRz;HP=jOtf57|q)t
zyiLW^?~yszq6Kng?MkE)pTDxR3mc&|!^5%PlI$p-XG+dGvd0I}!6C0IAJbZp$v#^x
z=B(DBAGyP05Z_GHP3Htn3KXoFg-zJRtli6qyDp7O^cY<oqA`PQXzy9gDT`t=Q<f2D
zteuo36kJdGSs&61);^fUykzfKG8VVX*i7_S2axM+f&*)kZOZI|H5O(jO~{2r9U)mC
zp3$wsOYb);Fllgeq>Uad@$w5&Ap5FRBy8Ac?RRAQ%$Scn6J!f!C^*nk;EIC7aY9oJ
z(;0f#wT!?nBWRep)xcd)zrkmai#`{Zfdh+a0!YA;ju3Ua+u1vT5KFjt0UCSPIXaph
z?5}s)#<zd6C1U8sw^u3CUmLgD*S@KI5m>L$D^#%y-D*2*{jy7Q0@*|#a^NMx-F!<&
z(K|@GK?=Te27XjTh;ZG65i`EyLT`j*4|hqo1YPB$lgB^w>bEM0#aaH7(>7yF1y%^|
z>MtzCFj7VGFph<yQO|O2xeDxhXG`^kQq8wM)?_#vrTcgcC=9%~bi(M1VRwQ5c$R_p
zH`@5IN<p?c2_ReJGjbJGphpG5Z@jpz?*JY-wPe|fL{2wIf0?B$3d+!(cX3YbS0|3!
z1{q|#xnGTI6m(VT`$k!9%$Nj*sAD(-a)%87_Qf1vHb-~ZiX(bbNFyGt`*LF&y6JPM
zff6b0S!Cagj!KNBQtLO`MN?Ue_;MwKbJ}q)NSFR2QhYa25IDkxoFnZwF>(!(L~E}2
z9RW6fkvf#6%Bik2AwxbB$2srEFC3jx3uC3681|F?FjKfA8COgPnqmB0MkLab-afJ8
z0dtPXAhnW_0HFL;`0s?ga-J)SJ*=At3ftJovcZ5C0o4a7GK=>8BHWOf;Um@1_|F93
z-I9?Yy?#vU#tmi}!&5-AzVK~e6SD10zMrd|73}5Zmpf%YFWqx8IYH04cdTw$yJA;u
zI_*%NdD8W#=MzBfqzo)V*W}1D;xh3VXO`{Gy|V(z7=PaE>hDScuY-9m=_6Xg0&BL`
zxy}!Z5sG}{-eD2*R9Hoi)n`&Cuq_K5Y=GwRxg(P^V|;)Qjb|J7`l4imkx05L;c`dI
zO)I=qr`+*bnNjbX82sG)r11;Aysiy)dp#USj|)K39Z?NV(rEw=)Tj#i5~x>_;&7Xc
zFf1w?hb)UB9xC}RVMHRO>HHoqsk_j;A6faeW^iOwE;QM&$SLpJz{i!O*FO5LK$BIr
z9WFk_LEk3o75O=jQi{yna=mFbsb5EMBd1sgOPFzcFFLsn+1i?3xq?`21DVq0>fJyz
zk_JIC;v8;eH&UW~1gHg7E#8LJvd^%s<or_fDfGy%8`-!c(X?Rbc!rfnfS}7{$!T0+
z)Y`QXznBe#4v2z?Zm97RQG)MHG%Dwy*=}4j))RUx(Jk-$b>bmzS&Lx<g{vK+l<@Mf
zuXXXG82!Zn$rq~^&wJeVyuR(+D-6iRIJcmgP>z(1FtY(g8gAIm*-p+A9DBsNOHv*P
zexhYfiLfE{|FFt34?lK3x~n$*tB(TLB_s8!ZO4B0kqoc-+<%9Se~~PBuq3e+l4Tk3
zHx8u#zPMN%<szBm&YW%TG`*S>_ZyEKo;IjCYwkO6EB+MDmSSM*?DFPug8tuaKW<!W
zexE8XPynn!%fCGpvz5iYN#<ghhx$}1$ZeSl89Mp>Nouj{E&662<|!mwUz;<P>+UkT
z`hIhAb_#1VdA_LuIBJ9!P^uQjFA3hW(B?9G7gaG3t^X_Dyti{UwjRf_8n5oIIyi{A
zU!&J5S2pqKdw1)G!1_dcOPWfQkLnzfrpPFaqh6sVH5?Q^76cdhEZn01ZRGY0#yi#a
zc!ug+sV%Qf>34|CJAN~!{?xe&Rrwuhc5lbN=fo{Rj1Yl*4X;r61bEtG2!sj71G<!d
zJ*E)WWJSlQeIZ)14xU}oxrs42vOYj(=c}lxoeI5Xm+o6DH;FtP6xX-B_#0T{TEHJg
zJcegQ5@^215b2A_Mh;G(ewf&RqIMhEG12T@FQDqHx*J`0^6iE%9H_|%k$&>&dv8$q
zqYgE-B=Q+>_Kav40=4w;*qClKcZKwu=kFslGZ*51zcI6?!i`0QthUil#-wRC4;@{V
zn(98?ONkfSZ_OW8)6ve@?h)m1(~T1;=P<h<Py&Yb*DgToZdpcHQkD_&hQbY?now9q
zys{MBhPs~M9-}zd@a$f`C20u9vBSk}mxTw$Fc`oFNHpy56CpX`Q8VXrUiMto{w<+%
z@7cyHBSC>Y!g$tA^}I{Z4O-<knuBN|*z$%+Urp433;EVi<I#rQL~~Iqr@aY%hf5<Q
z()lVbj~y>!HSz<)Yd7^kxvfuM&7Iw?gsURvZEkwMxQ6SsLg*e8*l`WpsBzA-yuw(4
z8%t^g-*O8Ox_8(i3qkb@vZ2?WTW(+UxJiq9{<`UJgnZWkKx7V<4r^i90HW7+aL@Xo
zDl*z#%}#f#_lb>GGHe_ljjXZ1%<Duy@!UC>@*(|Y)4-WfhhsZzHIDi%CEj6zoKovF
zcQhGrs(S=hWRXl68#lC!IGt&OZ9ROGyw$)!vD?f@>27)R+8SgDrHbq0`}yn1o?}0J
zHd(!10~ugA7a&-xERM=3BU74)4&|Jm*-UD5!=_%kB^<=`W1D_U(X!LB(#;OqocuI#
z>D$cy#T|Rv#j4aN_y;qw_jZ+C0_qMU#J7`VgbfYia3vGpTn3*q!sZJuaXRVIM(V;c
z1+uUP+FpE>$HVwO*vzh<I<GSSSU!m~v33m(gn`CyWGSKqX9e1s1#o54Va1^X$gKiY
zH-#$qHT=1g$uP8SG`k>g*fp*_-p1sUPI;u=x1XwEb%x_&at=SBnKeLm#$Xv2rpcU>
z!ZjkvVi4r8!n%4l22PUDdnUd4SWmsWC*EH@p--tayO8o~#2+##)^7UZb*`o+_G#jg
zs6Z7}D-;gc8c2>j+utV5V51whfkJgJ(T8ss&2AVOD_L`PK!^0@dR-1w)gJzQitrk*
zeF_w+6l;iHloX(J@RyMd(1HxqDD6+$-<}O05lPcv9K`uGtq^M%cK{;}U+HynvEL|s
zZku25L|<Cl<AZ7pUF;tfg_LdIG+R&{M~J2+S}){V!o^Ju`@nwAlioG#h3!HNvP(t<
zC!<@E)f8&xE*KXNm889JPW!5meBAu|oSQYF+cV+($>(?Brj=?=>ldiyKoz&H9cr|t
z`p?R%H5`VI6cOb){-sZM^b>8l4py&vOCyShLyLX#-xY@hqf?JqDerIgQ#Dn)GHtlI
z3}hLsF4(YHB%Rjh(QmYk<!{D|6F$4)YVxzmuVg<@>!E(wWKBx{^w$bmw`_A>@ou+d
zB#q+d{af5ZQRbbgV4v_g)dz}$sx8^u#;~-cv5O@<sw_O&qNr3_SP}XyYTzS&gyyn1
zXQkp|DwDY0-3za~+5PCXh!uVyCr*d~;n@O&#7J_Lr9i%ExCJLyMP?)6^UDb2Z{!xj
z^)~Mv=`<UAtvY4pPz|eG;vxng_TZtUQHTO>PAf^fm`DZ&0ALtonZ+VWAATtOgICh8
z*{9NdV4K;@_O;!Vb;16cN2@iCXhFLRpX^9WHI&UeeHk=E_wQJJasde&ka-GRIy>!8
zvJ}Ybz^@}jKx+jWT(O#aMqVtNojGyaH2n6de@c_<`S_xQmvC%!{k`=YdXDKj8N4`a
zt@s7_b?5?nypVWKSY))3zlxyf+$UYwRAT(QY-{uO+`w%wmOkZgK>KV+FHve&tL{JI
zX`L2oF>={EI{p1F=pzc&OcKa0jLiz<dU;QKsGjJdPtqnjc-!7t^iRr@z<`WXJ^udL
zK1!@P7e`Hkiq~98_Xl~suZ`PH?>nHlUOaD*qUG`mxrQ^r!c>5S=!E(M+|>(>R5#9J
z*-^-Mx_Y6@eSD}ey%Ia|@j)!{<?L&Vx9$G7AhsQFrh7niaiSIsqW%c~H0cYq(j=Qf
zUI!PSse0L67VPhbHME$X==Xb7GEtt$EM<GnU3yS~mGf7&ux-M0;11ee1o_z7QDF=b
zXiRLou)2N?Uy*R6T?vXR75EihP@{<o1LiH={rr54*?l=`jhVK1e{{x14=1(31htcG
z=E93yFM;fUNE%o|#3i`cj^n^4^Ko$V7}TP(rg7f9jDWQ23&^LWI^F&l)Q;rI?UOzX
ze|5%64&i(qZ+mSI<)<cM-l90?(6Ig(FIY6poqbD%RUKN8GK4HF@+)oX7Ph71h^za_
zR}a`JYWIC_^=EG@n7W)`(`Vmfqon`ne6xN4Aw(qg5IE8=(dX@Jxv1u{VWeoi1lZad
zp9RQW^zjvZD9^hW_x0*O9U4Ert9F;ZFsQnA``by;_4j*B`oug0$-?Yl@|}ySFn=sb
zM)Zj!+oz9=t`Y6+`)MSR?c1YR+gs&ToBX1r;7a=Cp7wyt78*_xo|q%TvGxJCT+(MM
zig<|QXAiOfV{Ut!0ty){G{yg>uXZA2PU|S!*4jzwwD7EVMEICi*X$gTTC>&?qcX+|
z6Kxn5mYM7qIKh!zlt=>4B`sjNP-7@baXM>f6{j$EMr`Kheq&);{>bBF$LwYdt%^>~
zJJ;6x?-;$kp=Nu?-%ukO!vNgea2KzYYah!M=hbl1vpEh-JTJne!pLe%HP<5Fx9nO<
zt7oB=vhj;L#<_WXy8OA<iL6~e`$GMHgK(;d5=mNd{6y=6IX)OhB93E8UEv}A3V!2<
z4fku$LVpfU4GLop&UTQ-HB)>7mJyW6NL75**Ow{%?@LU&Qq?v&LniwFCUtQoz@M%t
z-W6!EFz}vbMBITS8Q2R;t@~^uKE1Fc|An1mt?}drEi2V`Kt^x1+dso!zaDel+jeOD
z6#J(5{2E>)Jn*kpS0cu{Yn#u~zdK#NB(;A38xOMnbh`3t({?&-1D!4{u?;^PkBU8*
zgS@DqM=k4%*^Xh*%ZB55+OW0%m>75TVd0O%Gg{0y8lyjLE*d*D)4eFYI=O_&(EL~7
z3e&W6j`mpHpP0%Im3s-$25-QSW18rHXg+3H1X=0^`d>7kg0p6{eiz0356yQ%hdHm|
zu|o63zaBatl)huN-x5gUB4(h*{mX~|G>y)4ghAc17UVS%R|#RwA|=?oa5lPKsV0W#
z8szVv=jh$P*D`x*LA*14dWmK+FM+EZF{EkFjEBU#B~!H_UkE->*+hMrF_TnG3=mde
z7{xd#fht#WpmB*;4$L~L+{fFUYurzk?buE(7{%e7u_4+|X}MP`b>uQm-d0;w!oHW(
z`UQ!ky@ipKq+t+f%b4hxrLH@eDkE8ryGRSUY4q!XZ+lxI8MoY)RF_gZU~;`f4XxIe
z#%miWI?XE>BnU7fsSN%Zf&;G=Y@|wX9p*C5^$s7e<wo;N62kL-SQXKx?Q73h9X{uL
z*Kp(WykuAPl&_^DABqp2nG+kO@3J)~MT49{*=RR^9cg$}z8`aW7S=)v<hoeV&FFRC
z4@UyjZY*18$wfF4F1YUZP4#uy1bI7OnBwg(@*Mox&sH7%80Q&!bD&$K0s@U;!)}2s
zM>Be*If8)zn<_5Sh25R`UNj@iqT#NBL6nBAq%d*JCiPt6{KB)Ktq;BH3+|#nIo%Bx
zzlULrl)*bEXSM-|O~??))rq9scu#OgMP0y1?cP2sm^%FP;UpVF?Wrurxh}>A`l&3Q
zX&-OdZckUnCvn?(k=+f_grKq6L?dJVA-M9)fN1+5q1mp6qfu9EarTF8cgnPR#p{R8
zyH0kfzlt5)oV)XZAa=nL1qvUWCD^Ab<}zxf$6<+K<WNCk?2g+;7(2t6h-C?n{r>&L
z!K%heH@&|D`?^m(4yE_a%G26D($h0IHh<IRaf$Jo8Hg>xzX3HWLyTeUw(l#W5Hq>J
z$ZEObi6q_cxT%bb7&tP=CWl^+m+#LA3G*xQO+V{uJ$c}v@{J!JJ>xFVXc874vDCeo
z05W_q30MR_{QFYZ-wmpw&p1vjLGwb>D2!yjiRsbaE_<av)y3+dS3_;}6ScJb>esI`
z6@5sIa|w1V$v-r7Lm4m$P?t8Is|SH&?HysMF67gNByR5&1p5>dOct7N%|@36d%a9?
zaT>3A?GQ2U{*T$7ug)$h7k8h}owxI?d{Hg}oMn@6rM|~pP1*>Kf5GJ(hZfYj0|X;e
zz!D(Ep{7HnKIyd1s#_sLy+YaM>#mN&E{bXDpuX_)i+)uxqT!xm8u%0SQe3dLlOr`&
zBam|d6}v#TnWrmEsYh|#8S5&GeAA`FSSS2%)IBRL=$)vms;MY1Pu8v)^86CgTp2#>
zO6l`FzW=q|;wq36t|q;Mwm=@J73gXmM#b}-Z?$q!ZAxWHf}NoSxY)7!Zi?&aImfN{
z#N*BdXG*9BpH-{5{)pv0o(lL9_<+<~loZAt8fxdo0|AYYK3_b**Nh@cGx=rf2B&V`
z`e$Qj@OVjjVd$2)arcCuUjIxf^q<sq_?YR#y3x_}Wg5jfjUGyEqH>Pm2lZAugRY>j
zp42pA)`F?BjH5{4-ZpIGI>g@p5l4G)CV6k2D{tw|*!z&cFx`NFr!Nz+BY6=#+G$Ai
z*HK}C;5^qwv|gRRn@$47i(Hk^kCoAe-Rw*}88KkR>6qe``=t6fEMR>ki%-TYWMxh~
zyj-GP5Ef~whj-tHYgE#DDa>k+Boy=hf%yz*sOxQQ6xpX_r6H%}74^%n#MvRSU%B{4
z5&LpzrhbUyoe%kOxi8P0us1^0ffi_EAy~Z&E?RV?fs=F>WZ!5cJc+HEYv^Ku8sd15
z+silfO-Dklo?UQumA=<zFu&te)m4Y<@;AFUWPwsS$VS?@sJN;wB&f3t$WNVd(=x*&
zezi3%39HyQzK=9fK6dzp>9gf7Ww^<mJ#qeY%grY8QDHX3=*GgkHD?AmQthZJufzre
z0kW>!Xca?<ZOQB`O=a0!wtiM})!$<!gy`p4{F8S|PA?~Yhl1@p!ONWgXHq~Y>TbY5
zjVS6$R31Xv3&}=kt`wHLjA++NFvb&YvVBfRw_Mcix8LO~)~B>{zqkFKjA!cghSGPd
zeexpl0ZLc0qS;}<62(c~qq4v`!Udq57BhrhIAzTu6nC}_8>3WqS8zx%YvAVS#=E{s
z-)=5E(l}PLu+Q1&1od#c)K}|C{flQGS&F<N)|4O&271sFPcZTy!^%Lt;g;tIf?eJN
zx*^E*LYF?7ZImb+D*m;M-~l$_&PgzhLQX(Q9YE67@Frm@h%`fGjRquChW&iKlXu3_
z3x)S<6*0bVD17t!!@(6z3+wEHg0{6FeLHk2p3RGcBJWQc;7!0bk@j$Bf%;a%ip__k
zC?!9f?;0o>B;*YlcYYD}eNv4Z-JB~no;H#uFQ;(j5<+d@7bpwYJ%bt*sCdj^^w`hW
z`~cAxDiSPh?f0xbFfQn+_vofCluf~N1BTA#0tY+iwtMO2S0lVLcYjNqKIFcK_ua%-
zbi;23xZ%!rf&4%#j&lM-V~Vusgpe~_&1Sm7)Ksp(f<sAs%6+d>l;#nCruyEymsa6d
zLmt#K&gW)L$Z&*DJ5h9zHrG(F1v`e7_k}10<^E&q1AgPhJHvuG%2qu@D>vWFs#5$G
zHy20HiGdXitT#AE_?&n(`26W<j1h>`5E%VQpaRx|30Tx<E75zpj0tE3cmb9RYI+N3
z_#bC8$N1Qfe9b{(lHPgh@c2yqNRpO`uNP@u*A8I>)gK=RaZb}|?M>urbfe9h<|RN;
zpm?)09Vj_km+@Vy-^*9!shuY@qAXe*_R9~eFe6g$OOo-zG6=NM0hPrL=cYsP3v^1?
zJ(I1+X+#Bkk_>FddV@XM{bDR;Yel1ccc^EGSMo%rm4|xMx{a1{fdV)4aFD^TP2u1L
zvTNEf6eB8!M~>gVgHzb7qt(nV8U#a#E*IYmE1$Q9dsW)J=1b?dHVs=}9e6}TtGBfd
z8Tr>>vX>E6Xu;MMUaDId2VFsKC9Oc%G4x>F8BQ=g4$Mp(>oY;q>oZ+@EGJJ?FFm2W
z&2qJ~{l_wd$SVswP?fIzd86zuzhHswe{k4f_=$mRpS?$|7-+*GNuQz0$rc^6riT5T
z(x>PfIw7a7snw*4Ih}mDTl@4q?)wdK-5pz#w#I#*DFlYo1aU%V1@d9dS@OL`#xICy
z7*#eh=)?JgBX@)KYx_KoOAhGilooD~(!Q(cCTH8WC9%c4DUk~5kz1N<EDOn|t``l0
zM640a&16Xo7-516=meLeer#1X>1N-J*I`o!b_M8U${OJVE-4GXs5cd>&Mzal8|i{g
z9ip@(A*gOSs%FcGiGU<H2GDe#%ZQWhD=7!O5lCy^&{h0z4C4oW$ekAgDxZ*<U?98A
zFor{IqeVymyfKWD`>r&SK+Tx&zhV7PdAI+S*VVG-Ym)*r<3G;*BW@x2<6k6qD7h7*
zO!zqu)@_Fzk_CPnYqF7Wr+I;fkGVmQ5^a4<r#{+vCdI|K_Fi>Oe~&J#UK6BXAI=}{
z*xRw_4ghASFuP$D>C2zNB_mP?u5yCRsbv1FDRL={G(r~m^j8NC(uQfKzUl_Nn3ty7
z6AG_Wb}j6&y7ptjI_ZGG<PRH=2%vl0`y#<PSge2uU@HPd_{O~?MbYPlTNc^Uou$Ra
zt}a8)U266@Vs($OwY{@X6+d2_$|M-Y@V-76i%=Jb`cWKT(Ei|5^6J3(dx*zjMQ)ju
zs2dPeY|LN9&!+HeWzCA9D`w1u(?_gh3KLI^S7*8$WMvxQvbLz5Irq|S(G8#{$gdjD
z{hEdMK<vrTF;t|9YM*X{EUbuim14dyymrC4q%Ra1`i%0T;$@=CqkDhzcP6v_XVSe>
zPCP(@8#Cs}cTqKpZFCy>tpf&0T0=a_#Wta%apIHxvj^B~-i8?7vX6!W!wJEMO7V=p
zUl6K#e=P*3+mx&MGBh6m47#gt`=_OaybQKcu#D)j#&{zIMwye#2z6UPj{G0qc8DQV
zwq~usCIz#N@S}zHFW4Md`czqb@y#FD^hjs<c&G!|d^j|ie&w>2T7S6w>@wmdz&jKV
z+%>`vSUsQxHrR2yL^4#^#st(L5{3f1!xwXTUFE`D@EH(`{4p7oY&&6RU5#Lf_G%uP
zL`OC37xhuM5Y70eqR)^F1u0MHXm!5*W*~UPrZI(eO6<JkQ#D(aJ(gSdJ#Uu_%nN+3
zvL~qq2nx&?2Vn$qXpsbkkDkI+V4Be4L^Ky2sV{wOZtN&t2@aTZDKR<nER*Hpk@A5z
zXr)L~%ev(0a9{st9Z%akiw-7bi=jq4==<$IeqrrL7Q8CEi(x#)aqOYtE<g&g3i~m>
z^u*ez+#K^F<u}T2Gp!7q*QGT6bVh4@&?_I0)h&n&D4Y+tcU<)&!{O7vVvsx^Wcuq8
zU@Swi>5U~1IATPG;0ow*7mtX#P*N;vPs!D*B>`#?{S$P%+CE*K^lSO}hOYeZEKR2z
zobKF&p}OaX@KxX%Uw|z+EhF$4cz2;dp3eHKoNq4(?mMt;6Z-ZNWi)|dqxMK=jg?#3
zl)qxC=a#{_mJGXNHFk?N7AhMju4yfD{}asq4+n3WnLvEeJQ}RXSB2Yo@~|m){tRcK
ziH@3<9fhgQnODd@TQl#an&a0_<BiCaytB=I8_pg})xK`AZJU#4GOqc@FX+$kf6<>g
zBMNG!JcPd+ZgzyBJf9_c^tWy!cUJhC#_V9+CPy20ohM%nG2ZEZj-<~ORf+xSyqVJC
z@y)5bpo4!B`xQU__p=Oa9M&)6^@H(O9Pd3hdS)O0sP}eMJqOF)&4h34rZRU@BCLIW
zyPT`#>qNHGt~ff==9D{r*h$2BWwHDYrX8z)$ft@V<N>I?0OO32I7&PF5&r`5CTH><
z+DL-a%p}%OHykM09~SmbV8h(v`KIsZxk~eZ6km=;yyu4dKWmPscwH`$ZmpVJEdS4S
z{riB51?D%5hSdz;l7}bTp^X?{^l*py5J!rMi5?gAzv`p>C{b!!Mw}<yE9`8zXOTnu
z(5?RZ)liUBOV6O*&nPRo?@%L=%1A+#qi6Wwv@U_Xsz@S;Ylfx5NG|parQ4slxAw`%
zn4_<bU(lXTKjZFDL4ktd?m+(=O|1TDgVSaY{4v@<Tw1`dU{IC_WbizI{}Gi02M0Kf
ze~3oFm~#T+i!S(NT-?hoi|M2kPhZQ=E4g%^)*g7%bML76uKRCy&`OOyGj@LeWAXtg
zfxNOvg2@EukCjCphLQxzK826oc(tJ<OG^*9xb8f!>zTp6_I|D5O|H3grL}smDes(<
z>N<b+OJXBm5wyIwKzJ<eP7|sE^Hxg71TPQwo(T&#rucFF{rtC{c^?=Uq<?plO4R6<
zf=h98``hgGlRxB!Q$Y-<0xLI#8jV5o8$#4;a`<m?H740yEnd>2w$=1Dl47FuL7l7m
z?taEyG>_hFr@~&(7Mk<Z*LCjKDmt}KoY@;af9yYu=f7%8i+2Pt?sp0gpg>C7C?jy;
ziwp3afqO>hxnmBTaPY4k@a<Add{}`m?ZB8+NNxSYfE}-`zyF&0xx-@L*DadW5iB2d
zy;Je<*uVRT|8jCrT7w1x#|I~nwPbp++xZ@Vcr0YEVq%bRZX-rqfI&WC%*qU!hWQsy
z*FJZ3p$T((CTNZUIyq6JOLplAJxuMPkfQs@N?=RhPzex%%18z^SQ#8VIb&WpB~t7c
znGl*?xSH|)H`bS2E$sazRdUR_IM4HCSV-LNCybtwyZ1ZfGW{dJ6z|)+y9_CerZU4w
zeJC)|$yVl&Pk=x?DbS+EC}KECKFf&j)%2ky`N+{omW|mY9^E-77&$#T>6KO!YE`$<
z0wd%1n=ewe2WkKjqkGMt>>+&svl!@zQh*pgfQ{|JvvWC>E%fpMfgZk*58iPxLA~MN
z2Z@>t_H|eLN&Z>!^0}E_{#n-rok^DZ+_#Q2W#ymbyCz$w=qT_71gJW?sVkmZ<u%he
zhWyKqw2m7UJ?4U2M;NfI{Tx8?bI~!sa)#`C=a#t^twZLq#q<_j)Gm9fbpxpA*r=NR
zvave08KZC}mjgA6Hm80Nee5x^+g|(vH_TGo6k2ot;{P^`O3w#WA{CH^ur`uC1%O~D
zK#xM8etj5c*q)^1tCZhRoKuemwpnQYZ2FcL?M8l|?#I<jveHl9DOMWSX@_jFl1N-P
zkAzi0Ymv(ML6!%Vkun$5`fHjSP_JRbVmMrv@(3Mc_4IoUr}@ZHTee%bzw?sxNwpx&
zA$)eCg5zX_ea*w16ilTEL24#*Yyf9p@B~oreZpvr2iwFyfRF9Z<7#81(rs|IeQLcw
zbuZ2z?<u)>Gp-+3ZL0pdmraZjb>lba#`%wJ6rF%teCeEum5G2RRc^0HqLIlbM>vCI
ze;H9OaA)G-lNbuciMYsf{fd889oI8AIHaEpDgf)XB)`WLnyTE^WdwT>$RZbL`W)1F
zoC;dNGN7*qam@KR83O(c#{#H!tKa~CXGKpAA=lN~>CVhTzWt|({vOexa(AO@b!}3Y
zpHzK>DSYRDbBO=urNapD6LUJOiv|tEgzfV1doKoVXc)_FtKnj>J*gI8Ol6btlhWAW
zOPMioP9<Zvvv?X|<5yy@)Led<wrO|x`+0G8Hvb~gQ}_nsi&O*?Zm8KCi}em;Hm14<
z#CooZQA(-;&9=RHQ?)PRM4U3gEz-L3UB_mh?UveXx~O&K3l&M!m(g$0iPTDR6M8)X
zW801K#)y|i3A}<?HYU@4#agz0Mb5Jo-T{|Sj1N6K9kl0UU5Z}Ksyjp`4oK>ygFvB@
zCnik6c+5(Yx*#RsvXOuS+)p5bBqu=B3n#N4R%JuQiwg^i`xbM;H%(wi%=NG7PLaoN
z2UfW`fJ}phro*;?p<%j0jfXLe6sovLD!i}-MRU%l4vW6zy9V6(kS1y_aP|Q`L*2pf
z?0hIiI#PL0w+IXoHy))jvR8Og(CHcbA?hYm@Nrc_OP6@;;VTJ#dEYrh&Qpgk)xNHc
zYm!e0skb%mYxH<B{`lFd@#}gQQ5PHe+rZd*HC6y1usw)qE=<CBV`T`+XX=K9h4{i#
zsgAy3Dp|WkL+*#)`-P5|d~Cy*XwGFGo1(mRql<cg`ke9b9x6M8SBFm|*Lbx^y}{UW
z1eqGoo_<#<`uwZpSQA#?TDmjpK{c<&<Z`FV5o=|=>~ojj#pU*47?0>+)*H}cB8lcg
zf;dz0PJKuroWYolph!hOVqPqKI>l)Z_X`d*ZtXL(wYCf0P~h^=Gto}<#r3NrTg!Lf
zrdopGA1bUi3y@ay*C9?>9E8Wis*~+Dm?qTb(gV`_aWM@NT=V)&(gSpWZT_oN+p5G-
zD>gd+FRM2cLj?!B!3$!Wtkqj-JQU|BZZL%R02sfx=o6-@jNP%qa3qOjkQ>aI{4FXR
z9dhgqmnjPVrB2JULn(&dtshfH^3(p23PU|P@Nr6C?U9xE{QuDI|6|r%{xg^uDF5>f
z#~IRYq8yy@uWvZ6%)MJO{jdD%8;;D>53g?btbD^Uxw>CDZRoVAN*yu*s-6X=f0AHN
ze+z(<9l|whUs5l4n!&3zOdvu<)739W_E;%>RdcXAtevf%w&wOL?L@t(Lvu_2e=hZ3
M!{Ptc`^!WBADBZ#_5c6?

literal 0
HcmV?d00001

diff --git a/doc/imgs_hme/hme_02.jpg b/doc/imgs_hme/hme_02.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ecc760f5382bfe3d94de6141379f6a5a196e8430
GIT binary patch
literal 4928
zcmbVQXFS|nxBlw}Q6gH@FfyVf(TO3^iHIY5FnUA~hQVO;L?_B2hd3m955dvKFv^S&
zoaiA$jYORpC8N%Da^Cm8w|=;H@BMA>-`abx^{i(V$~0vGV85$ps0UC{0RYwc1SqqB
zHo!<v&p=Pd$iTn=0x>eNT)M!*%*?`fk(2F`;ALSU!OH>yBH-%~5m9L|0Rai+YtlDm
z6%-VNA*$*sa?tDY3UZV#fSD1nrs}4m;s7YUfbh9+I;x)o_{X54rlF;yXJ7;|F`pl3
zVh5<HXlSTuY3S%^Y0poGo$mv*9CR12$ZFDanmRFv!Y|21rN3bm)2hdEJ@`lf%RBo;
zgP6E^c=;}iUxi3qlT=VtQdUvDrLCi@r*B|**Ua3)(#qQAp^K~ABlpK1Py7)9fkDXN
zkeFxBW8>lz5;HP?&B}iHTTX6aQE^FWS$Rd}yN1T5Kbl)w+q!#t`*8UF_XDG2;}erp
z)1PJ*mzGynzpj1zzD^|V?(HA^ARiu4W&p;&)OH&<JWG@){Qs}ktC<zrB|Vi!Wpkwk
z@|%fUZ=5)rp;F7VCpBH_FG|e*2hkldXIu<16yRI+w}@YFpM+PCUoLXLDng3=LezM<
ztY-X(8KUFX6m>V-T^AdW6)B>kf-kEHw!dVm``;FfNSNJ>;LzY$&!=u7iH$d&j!mmM
z3g;wMt!zV8-TW$VrZQN*WzGlHJQZ(lNi=Xz7%&t_v`CM_(h6xZB%34tvrMi(pI|^l
z?u5k+nF!Odli%fz2alm=2=C4i9*19Low8_2bpCs$x}kbS0jDpIrM}DB+IDr$w8iKm
zj&BYnxcPqK9SmRPf!vnBmqKBG>$HPr@k`GQ^8LmwThq1)3h+)EaoXCNJ&W@0kw57h
zzwr8+x-YN0;K{$N&=GS0rvMpo<q^}8Ca3&d$kR)v_V{i=_hZhb*d!_F5`T_yUIOU&
z?6Jg~SUq!9z1!Psv{9xf567HFF_OsWdHVG9Z943AO|=r;tT+JP+AlP{G1=1gPb;^-
zhzl1d6vRd})uId+ceY+th+5RAEE$SDQ0BgXUi1q@VXY%sZ>5F#ba!%kg-d(-k!|e=
zC_kIw7t;5Ihq)I-`1~8WRIk$}MKc#h7388h`?8ge+>@IInN?2OZSA#|c+KEyrml`f
zkjzMihxl^@RER`-R>jwBEoFGD`ljPAXS+N50yG(h%UoLp!akY$V`$lhBvpvgAKXdu
zhm~a?n4smUi^B`tb_-OytQ}~LM2WYK<RBtvkKsZqifuXGJxqM{21`G`^HtPP%IYG1
zhhCGvp_2hH55Ge8UbytJYyy3o$n<A%pfOx9YdjsgtMUM=$H$aOsvUh6Q}CK}>!&U{
z#$fqlT@DSyS;Y^Hu8c$PCHaHnhvPhHgW2Gb`F6T~%JdLHUF5$5&t7~dx=Biy>ihi7
z>`?8pFQ565MOHovH|1FBW|PCW+BnC<1>-i4E3{pR3bq69csk!@&x^ixyeW0vHvg52
z6E0Y69ZCUu+q`ZmCpLv$pDTVW{q|9FO0h`2x!>zNKtAf5)b&KkzUTY6obp+RPw!Xf
z9u8z`9c#|4YnsJiu{1TI3|nF@aN*>}R|hY+8$8bL`Lv*hGgO^Y(IC#<1I49tX#VXX
zT-(A7CC!9_9tR%e;tWjy&yh?^HubE%IhTFcnkTwGnqFf8e1y!~9SVTU^!2_ohStIi
z!OkBx?;C<ihS0A?=QgX2=_(-oa-v89oNOIVFE@>%;Q1aHnf33|?`~<Of$x4W&?TAH
zVPgDcyS5E?U_t+K&QSd=u`oE~{F(HbN_F4Q5FU-FT9S%OK01Dk^y5D~(=O9;ymWMN
zbanjt7nA2d`W}zTCh+H5$VY}hT47zAY}OEPu=Ah*7tyMU)%kWQ<=P-M%>jdMWr3rr
zn+GETQ<}s{+|t+*rbL0bnYt};HFBFFgmF?1`F&T6HB4O==A(Pbsu87oEYw60>&jh$
zg&2A?o0y{jIG-U%fC6x}U`Mq3M@)Era+H)y&pg)@$_nX?P?rn+#Iy4&01l>28TPIE
zy^K#xrR!@m)RfTP8_r6Y_@lW-rWmB&)qE^BgfVv(h@REF=LA}OVY-ugJyB{(LkRg2
z>%N%Xv_Jv4p9F=3z+g4_99rqVmmF!ADlv?YTl>HD@U02lPVqutbE)p(iP=}{&^A$8
zA2mXhSR1vzE0W0bW(o;{9{D4z;EOlnc!VGXpLo>WsZQ6XQRkrKU@eozTY9dp$6clL
z!@l7ZpuScp@f#FG0mRvQ91ggeD8R=mdj{N>L#dga({>8*9U8FqH#oS~3usT=3(5ZG
z<9lJ@Dt)ny>D^;PF&DY!mHHWRxQpzD3-I)4gSE4(NO~Ut`KwL!J3Egb5^fDG2$8H|
zCRrTwTRyyV8BHLACSiB6G^qxuW=6D1+S%TlyUH+2Ti0TY4L?Tp&qg^{wg*e|x~ROB
zDm*8&)V~O=i~>wc-ah4*L7sBeFYYjHV5HDWZ9+Nfn@K+gfQ!&*y)_k!;j!fOJV$@?
zeS67c?TPCOL-%KlJO_h5Nt;ZVkVKN%^=aAlG%FFNu!@CF5E8bru$G82tLXv1%pMW%
zD9RC#eh}=jnpTmV39e+_Ol8o_JG8%Ko~Efd(QA1?G*lq97%Zn4$=9m@RPVvW#0?EG
zGS3+o1&H_FE7%flKAcSQ|9U87DYxAYZ|!W&TkC-DZwWUtOz)eJb<n1sYi|!~^Wg$5
z{4V_9`}DqFFkU2m8lkfrKY?bP`{geA+QJ3a$tl+$xXfY1&EIQ9>By?XUt3(Jk{wwP
z#1M(M=7cZ`(Cy9B9mq3(rM`Uvt$7d|T=UT`KGLNj`oYzEk{KXpqcr4#6)~>YhiM~M
z{On~BnZ|ewW@SqMI_cuw#9DqeOATfTELF?rVD*#X^DhXHWzhl4{9#v$Go*C&^0%!I
zwhdnL;Y=R%S-rt+E<}2cK|P4D5p2X0LR`<O+y%8wzxXu_2itxEsfDp?$0y6Y{}6Hk
zyI2L=y-NX}`7AtrQ%;^aJ2oHR=hs}dec~TX0o+;w0+I%7?zpB~-owc0;qZ44yZ;Js
zQlEv?u*b6B+5%HbPfIs!#)YmeM^lsD|6%C*nVMwH$nLtfD%2xnbvydR!qZ2l4<Fm~
zn_~j=dvNx}%EG7&L#aq$O)<`%A?o(Z8?xtF_aB`{ik@?4q9|50F(`ij2K%&UntnjQ
zs#jw^HM_Ttb`BM!F!P6c$3_p-eZkwatF6#_gF{cJ0>d8<dR!{t?ja%&?W+l-vT1?7
ze*4?6zlAb8knKAeYO2pZ0z{=0N!>NO+Yny<-!VZmM3M=A@r#e1V#`k(+!wdl^vV8j
zFoU)R8MZ^k)YHd_$Bc9dACj3!cW=IBs~MrgUYJTntNXw&`D<P;^UBZ{jkPk9jAn$g
z>+{z3EDx6`{^&Cr47R+P%}to?H5bT|5FhlPLL{r;fiRPtsbInjY_^6nvAk}4CCz5m
zw&`;{gx;~bsv*$t^-DgnJZgGS|K9P9M5~z}iPdOv!mLhX!{~Ag-S$xPKplf+Fzub!
zDH>;*YOjzuylCsljC`-sQCy<J7AxI{T)TU_7!x)LEl%1%3$wKQLCet>kR9h<=5t{l
z+1?wNEBNd$HFmlT_lnFUuYsvUu5H06Kp{$!NY{)F3+h(BIE}Axcp`+j=jOtzi);m7
z6B8BH7v%GgNrTe#Y4D>7FXFfN)DA^5)~6o*h)U3I<-&o0J*S!ajcS5V&yXiR@Tj;=
zeuif(oo^{uE9IKwv4x7*?JL5vH0dK{ttVizEd^lIp#U@UVxFAUb6bf6BFDL3BtEQz
z7W(fv2F5(s>tV;C&6awP&|f$MdMCxxS7R45=4_%sKEHpY!vkJ!DjOMwE4fP>>zy<n
zZ9Yku3rN^<pNzm{u1g1&lgr?bF1G~QJjcRP(1?9epq}RH31o=^0OYm4xSrdD+QwOf
z8xLHgzdOaNGL>K)%UN8@U9y}taq9ZXNc<fiW+dL?qm3)b(|3^Pw}B5Ig>#j}k?*z2
znqe^|PUHV8e;k<1aC?Rd>i57dH#CvF5kpWk<bw=JmZQ>tWI#k9!@qsDfTyr+gysY~
ziH?8=b7vf+i|0xi%Zxua=Y~8<d7W{8QeBpmmySa~jR+Aq*yWaJu~NfIaKE)t2_C#&
zEM$TC#e5H%EI_!=D)qe+gbJ>lZ9F);+^%CKp*tqt?hX*%$z`Z+BnRv%4sC>7Em|CM
z4ue=WAFJ>2qxzexwm!d!lQe*`BNYo^^#nD1u5yf`ptGP~M8m`RKK>dU9tCxaG%hbD
zfgw;ApAH1KhCJDfur#cCBITmdQI)r}j&Mb+q`D&b<!XC=cnp75t`X+Ld|Dcb#jp`k
zFrB(${jJVNgVGP~RTS7sqP^lER}xc=k?QAp2-kK3u?UM1)5*kPK&heq86wTxM7*qD
zP0NeCD$%EFkG|5%)1>De9GUdeW5~v}8}qD${gOlvo{Qp5c~Lr##6OAkTPQtO6P$ev
zNuOvKiFG(}AkLSS*-gVdF>W>ECM@p{4Cq%JwOO9^_d`m2_h(S@g!!f_<di0+ZghLC
zp?Q2UxYkl^RgWWU*8lWRt=;666{-x~K;cG9Oj}&C7s5JM$@u!`#@{Si-VMe|8ZM+H
zaqylI*XrttrZ0Pi8K9sg-Oh}~MNRi>>DG<!8XXP#UggC#JN>ym?7y*u_?c{R&7%aS
zW_u_AC!%x?l8!phmU2ujl0b}%s^4hyZNUDKA$xKt@AL52QA+`4JI^xlu!#RccdyB_
z=aD<qo&q#SoI+C3XYFjU80B*&X6~!1eD8Vi%Enn~N}`Na?2)B(-m<SGD;q;P&QD#`
zVTN_&{<xPHN_nwG8l}!sG;A3UVsVW=vwJ)c|LMkf9XJ}^Uv;KR{^dM}3W!I$EXq8=
zdD`C>_s|&KtYj*`^GhY?M_-Fb$$>CmjOp^*xXbwSkJLqY+D#7~?8pxui?zEv`n=}%
z_6Ppnk6MjJ1!Esm!q<BGn&Qu4`9~M#?b^8xwjY$m(h7kD5)as(MR3B_>?RQufX{R6
z@K&+yQd?Wc;|w1DW>JnP?uC`;5_lcedtPG4!(l-+8_c`r_EZvcXo6_!SEzT6MLg?G
z11ycY39PhYVo2F?ywNkZWptmpo$nKqLUx0~Ju8ugERkgqfl#Ja;Y7t3WCO&hAL%Ge
z>!{m$_4!UoaQO0$1vqbO7)cwTxP6pYz=#0{!=%UeZ*0ycp^~a>AO=@k%(2u4xUUhv
zI`R%Lv3_2?<G@0EmP?C~?(|{q0pF4cuyyn6O}j+z^IJC7vDZD1qwAYGXIBJoHB^eQ
z9>KPrYj;#VC>=dNm#rAtMV3y*gGp}CDt8fgL3OsChwj}S&Sw$vmv!KMr#Vl>TN^mM
zQimp#gG<>vQ4%6t0~y7&)=%H&L!Fjv&`wK(ry-%pt@*9-_+K(OEVLxvuLqXDUxq+g
zv*KVm1TmdBYw{}}g)<xDTk}R(ew=_BHAl$am10&l6-kYvSLC>*AYxEzfV$bpZq2<?
zDLG|2_G{fvwpsm$@ylNysqodp5?kG?o;(cenCQrU6XZ=d<JFqVm&5Ib<@IIdb&ALC
zYc!2}IB=ujh2H~@ykk2=AKG5+PhUvXs+xWp0HJx!x8TSM6g?m}Ee~1FIkpgGW<*<j
zy)9&BRm8GW%+p8iEtkEny?=DJYlhAsoo$D<SIA;KR^-$i0XckTByb|xamlI@r9{pw
z_J9eu6C#qOSI~sh*;N6zla$`4bpC0PCktgCYBH!GDG4iN=Hl5wN$m<x6B(>qqWmod
z3H>?#WXIPyRp~Fv-~-}c<(gDA;YY8!XHPiS#EI(T?2PY6g|`M8^o;cEmE^C``dJD3
zX_(^eNQm=WuaR5XBCE?r0|b?fe%A<%fv?%t?YcNe8O%(xu#RuH9KL`ueV*i5C`j1h
zv}hw<ZFLJ!FkI8a1sC`V<3{zbNYnGgN|BO7@*<MXpdXut)N}|_&)s(wWHiK4oABLo
z*}`z5HY%P?RrSk=6wKtGg^y)fY?B1Shs)z~Nq7IBHn1MJD2~dxd@-I+#GPi`vaHwS
zq7AtnAClebBtV>*3%0!&K!ux5XT1XsFq>};`B{?wqYoqb_0_rY6npGELY~hhZ$7_l
dWRrVUD$Vz98a$0v{rUgYRQbPkaHJ@o{tbqwXEOi*

literal 0
HcmV?d00001

diff --git a/ppocr/data/__init__.py b/ppocr/data/__init__.py
index b602a346d..1f3de63de 100644
--- a/ppocr/data/__init__.py
+++ b/ppocr/data/__init__.py
@@ -37,6 +37,7 @@ from ppocr.data.simple_dataset import SimpleDataSet
 from ppocr.data.lmdb_dataset import LMDBDataSet, LMDBDataSetSR
 from ppocr.data.pgnet_dataset import PGDataSet
 from ppocr.data.pubtab_dataset import PubTabDataSet
+from ppocr.data.hmer_dataset import HMERDataSet
 
 __all__ = ['build_dataloader', 'transform', 'create_operators']
 
@@ -55,7 +56,7 @@ def build_dataloader(config, mode, device, logger, seed=None):
 
     support_dict = [
         'SimpleDataSet', 'LMDBDataSet', 'PGDataSet', 'PubTabDataSet',
-        'LMDBDataSetSR'
+        'LMDBDataSetSR', 'HMERDataSet'
     ]
     module_name = config[mode]['dataset']['name']
     assert module_name in support_dict, Exception(
diff --git a/ppocr/data/collate_fn.py b/ppocr/data/collate_fn.py
index 0da6060f0..fec1e895f 100644
--- a/ppocr/data/collate_fn.py
+++ b/ppocr/data/collate_fn.py
@@ -70,3 +70,49 @@ class SSLRotateCollate(object):
     def __call__(self, batch):
         output = [np.concatenate(d, axis=0) for d in zip(*batch)]
         return output
+
+
+class DyMaskCollator(object):
+    """
+    batch: [
+        image [batch_size, channel, maxHinbatch, maxWinbatch]
+        image_mask [batch_size, channel, maxHinbatch, maxWinbatch]
+        label [batch_size, maxLabelLen]
+        label_mask [batch_size, maxLabelLen]
+        ...
+    ]
+    """
+
+    def __call__(self, batch):
+        max_width, max_height, max_length = 0, 0, 0
+        bs, channel = len(batch), batch[0][0].shape[0]
+        proper_items = []
+        for item in batch:
+            if item[0].shape[1] * max_width > 1600 * 320 or item[0].shape[
+                    2] * max_height > 1600 * 320:
+                continue
+            max_height = item[0].shape[1] if item[0].shape[
+                1] > max_height else max_height
+            max_width = item[0].shape[2] if item[0].shape[
+                2] > max_width else max_width
+            max_length = item[1].shape[0] if item[1].shape[
+                0] > max_length else max_length
+            proper_items.append(item)
+
+        images, image_masks = np.zeros(
+            (len(proper_items), channel, max_height, max_width),
+            dtype='float32'), np.zeros(
+                (len(proper_items), 1, max_height, max_width), dtype='float32')
+        labels, label_masks = np.zeros(
+            (len(proper_items), max_length), dtype='int64'), np.zeros(
+                (len(proper_items), max_length), dtype='int64')
+
+        for i in range(len(proper_items)):
+            _, h, w = proper_items[i][0].shape
+            images[i][:, :h, :w] = proper_items[i][0]
+            image_masks[i][:, :h, :w] = 1
+            l = proper_items[i][1].shape[0]
+            labels[i][:l] = proper_items[i][1]
+            label_masks[i][:l] = 1
+
+        return images, image_masks, labels, label_masks
diff --git a/ppocr/data/hmer_dataset.py b/ppocr/data/hmer_dataset.py
new file mode 100644
index 000000000..d5d92f264
--- /dev/null
+++ b/ppocr/data/hmer_dataset.py
@@ -0,0 +1,99 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os, json, random, traceback
+import numpy as np
+
+from PIL import Image
+from paddle.io import Dataset
+
+from .imaug import transform, create_operators
+
+
+class HMERDataSet(Dataset):
+    def __init__(self, config, mode, logger, seed=None):
+        super(HMERDataSet, self).__init__()
+
+        self.logger = logger
+        self.seed = seed
+        self.mode = mode
+
+        global_config = config['Global']
+        dataset_config = config[mode]['dataset']
+        self.data_dir = config[mode]['dataset']['data_dir']
+
+        label_file_list = dataset_config['label_file_list']
+        data_source_num = len(label_file_list)
+        ratio_list = dataset_config.get("ratio_list", [1.0])
+
+        self.data_lines, self.labels = self.get_image_info_list(label_file_list,
+                                                                ratio_list)
+        self.data_idx_order_list = list(range(len(self.data_lines)))
+        if self.mode == "train" and self.do_shuffle:
+            self.shuffle_data_random()
+
+        if isinstance(ratio_list, (float, int)):
+            ratio_list = [float(ratio_list)] * int(data_source_num)
+
+        assert len(
+            ratio_list
+        ) == data_source_num, "The length of ratio_list should be the same as the file_list."
+
+        self.ops = create_operators(dataset_config['transforms'], global_config)
+        self.need_reset = True in [x < 1 for x in ratio_list]
+
+    def get_image_info_list(self, file_list, ratio_list):
+        if isinstance(file_list, str):
+            file_list = [file_list]
+        labels = {}
+        for idx, file in enumerate(file_list):
+            with open(file, "r") as f:
+                lines = json.load(f)
+                labels.update(lines)
+        data_lines = [name for name in labels.keys()]
+        return data_lines, labels
+
+    def shuffle_data_random(self):
+        random.seed(self.seed)
+        random.shuffle(self.data_lines)
+        return
+
+    def __len__(self):
+        return len(self.data_idx_order_list)
+
+    def __getitem__(self, idx):
+        file_idx = self.data_idx_order_list[idx]
+        data_name = self.data_lines[file_idx]
+        try:
+            file_name = data_name + '.jpg'
+            img_path = os.path.join(self.data_dir, file_name)
+            if not os.path.exists(img_path):
+                raise Exception("{} does not exist!".format(img_path))
+            with open(img_path, 'rb') as f:
+                img = f.read()
+
+            label = self.labels.get(data_name).split()
+            label = np.array([int(item) for item in label])
+
+            data = {'image': img, 'label': label}
+            outs = transform(data, self.ops)
+        except:
+            self.logger.error(
+                "When parsing line {}, error happened with msg: {}".format(
+                    file_name, traceback.format_exc()))
+            outs = None
+        if outs is None:
+            # during evaluation, we should fix the idx to get same results for many times of evaluation.
+            rnd_idx = np.random.randint(self.__len__())
+            return self.__getitem__(rnd_idx)
+        return outs
diff --git a/ppocr/data/imaug/__init__.py b/ppocr/data/imaug/__init__.py
index 93d97446d..a64092286 100644
--- a/ppocr/data/imaug/__init__.py
+++ b/ppocr/data/imaug/__init__.py
@@ -27,7 +27,7 @@ from .make_pse_gt import MakePseGt
 from .rec_img_aug import BaseDataAugmentation, RecAug, RecConAug, RecResizeImg, ClsResizeImg, \
     SRNRecResizeImg, GrayRecResizeImg, SARRecResizeImg, PRENResizeImg, \
     ABINetRecResizeImg, SVTRRecResizeImg, ABINetRecAug, VLRecResizeImg, SPINRecResizeImg, RobustScannerRecResizeImg, \
-    RFLRecResizeImg
+    RFLRecResizeImg, GrayImageChannelFormat
 from .ssl_img_aug import SSLRotateResize
 from .randaugment import RandAugment
 from .copy_paste import CopyPaste
diff --git a/ppocr/data/imaug/rec_img_aug.py b/ppocr/data/imaug/rec_img_aug.py
index e22153bde..bc7fbc604 100644
--- a/ppocr/data/imaug/rec_img_aug.py
+++ b/ppocr/data/imaug/rec_img_aug.py
@@ -465,6 +465,36 @@ class RobustScannerRecResizeImg(object):
         return data
 
 
+class GrayImageChannelFormat(object):
+    """
+    format gray scale image's channel: (3,h,w) -> (1,h,w)
+    Args:
+        normalize: True/False 
+            when True convert image dynamic range [0,255]->[0,1]
+        inverse: inverse gray image 
+    """
+
+    def __init__(self, normalize=True, inverse=False, **kwargs):
+        self.normalize = normalize
+        self.inverse = inverse
+
+    def __call__(self, data):
+        img = data['image']
+        img_single_channel = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        img_single_channel = np.expand_dims(img_single_channel, 0)
+
+        if self.normalize:
+            img_single_channel = img_single_channel / 255.0
+
+        if self.inverse:
+            data['image'] = np.abs(img_single_channel - 1).astype('float32')
+        else:
+            data['image'] = img_single_channel.astype('float32')
+
+        data['src_image'] = img
+        return data
+
+
 def resize_norm_img_sar(img, image_shape, width_downsample_ratio=0.25):
     imgC, imgH, imgW_min, imgW_max = image_shape
     h = img.shape[0]
diff --git a/ppocr/losses/__init__.py b/ppocr/losses/__init__.py
index 6abaa408b..6a34dd1c8 100755
--- a/ppocr/losses/__init__.py
+++ b/ppocr/losses/__init__.py
@@ -40,6 +40,7 @@ from .rec_multi_loss import MultiLoss
 from .rec_vl_loss import VLLoss
 from .rec_spin_att_loss import SPINAttentionLoss
 from .rec_rfl_loss import RFLLoss
+from .rec_can_loss import CANLoss
 
 # cls loss
 from .cls_loss import ClsLoss
@@ -71,7 +72,7 @@ def build_loss(config):
         'CELoss', 'TableAttentionLoss', 'SARLoss', 'AsterLoss', 'SDMGRLoss',
         'VQASerTokenLayoutLMLoss', 'LossFromOutput', 'PRENLoss', 'MultiLoss',
         'TableMasterLoss', 'SPINAttentionLoss', 'VLLoss', 'StrokeFocusLoss',
-        'SLALoss', 'CTLoss', 'RFLLoss', 'DRRGLoss'
+        'SLALoss', 'CTLoss', 'RFLLoss', 'DRRGLoss', 'CANLoss'
     ]
     config = copy.deepcopy(config)
     module_name = config.pop('name')
diff --git a/ppocr/losses/rec_can_loss.py b/ppocr/losses/rec_can_loss.py
new file mode 100644
index 000000000..a6c655e0e
--- /dev/null
+++ b/ppocr/losses/rec_can_loss.py
@@ -0,0 +1,61 @@
+import paddle
+import paddle.nn as nn
+import numpy as np
+
+
+class CANLoss(nn.Layer):
+    '''
+    CANLoss is consist of two part:
+        word_average_loss: average accuracy of the symbol
+        counting_loss: counting loss of every symbol
+    '''
+
+    def __init__(self):
+        super(CANLoss, self).__init__()
+
+        self.use_label_mask = False
+        self.out_channel = 111
+        self.cross = nn.CrossEntropyLoss(
+            reduction='none') if self.use_label_mask else nn.CrossEntropyLoss()
+        self.counting_loss = nn.SmoothL1Loss(reduction='mean')
+        self.ratio = 16
+
+    def forward(self, preds, batch):
+        word_probs = preds[0]
+        counting_preds = preds[1]
+        counting_preds1 = preds[2]
+        counting_preds2 = preds[3]
+        labels = batch[2]
+        labels_mask = batch[3]
+        counting_labels = gen_counting_label(labels, self.out_channel, True)
+        counting_loss = self.counting_loss(counting_preds1, counting_labels) + self.counting_loss(counting_preds2, counting_labels) \
+                        + self.counting_loss(counting_preds, counting_labels)
+
+        word_loss = self.cross(
+            paddle.reshape(word_probs, [-1, word_probs.shape[-1]]),
+            paddle.reshape(labels, [-1]))
+        word_average_loss = paddle.sum(
+            paddle.reshape(word_loss * labels_mask, [-1])) / (
+                paddle.sum(labels_mask) + 1e-10
+            ) if self.use_label_mask else word_loss
+        loss = word_average_loss + counting_loss
+        return {'loss': loss}
+
+
+def gen_counting_label(labels, channel, tag):
+    b, t = labels.shape
+    counting_labels = np.zeros([b, channel])
+
+    if tag:
+        ignore = [0, 1, 107, 108, 109, 110]
+    else:
+        ignore = []
+    for i in range(b):
+        for j in range(t):
+            k = labels[i][j]
+            if k in ignore:
+                continue
+            else:
+                counting_labels[i][k] += 1
+    counting_labels = paddle.to_tensor(counting_labels, dtype='float32')
+    return counting_labels
diff --git a/ppocr/metrics/__init__.py b/ppocr/metrics/__init__.py
index 20aea8b59..5e840a194 100644
--- a/ppocr/metrics/__init__.py
+++ b/ppocr/metrics/__init__.py
@@ -22,7 +22,7 @@ import copy
 __all__ = ["build_metric"]
 
 from .det_metric import DetMetric, DetFCEMetric
-from .rec_metric import RecMetric, CNTMetric
+from .rec_metric import RecMetric, CNTMetric, CANMetric
 from .cls_metric import ClsMetric
 from .e2e_metric import E2EMetric
 from .distillation_metric import DistillationMetric
@@ -38,7 +38,7 @@ def build_metric(config):
     support_dict = [
         "DetMetric", "DetFCEMetric", "RecMetric", "ClsMetric", "E2EMetric",
         "DistillationMetric", "TableMetric", 'KIEMetric', 'VQASerTokenMetric',
-        'VQAReTokenMetric', 'SRMetric', 'CTMetric', 'CNTMetric'
+        'VQAReTokenMetric', 'SRMetric', 'CTMetric', 'CNTMetric', 'CANMetric'
     ]
 
     config = copy.deepcopy(config)
diff --git a/ppocr/metrics/rec_metric.py b/ppocr/metrics/rec_metric.py
index 4758e71d0..305b913c7 100644
--- a/ppocr/metrics/rec_metric.py
+++ b/ppocr/metrics/rec_metric.py
@@ -13,6 +13,9 @@
 # limitations under the License.
 
 from rapidfuzz.distance import Levenshtein
+from difflib import SequenceMatcher
+
+import numpy as np
 import string
 
 
@@ -106,3 +109,71 @@ class CNTMetric(object):
     def reset(self):
         self.correct_num = 0
         self.all_num = 0
+
+
+class CANMetric(object):
+    def __init__(self, main_indicator='exp_rate', **kwargs):
+        self.main_indicator = main_indicator
+        self.word_right = []
+        self.exp_right = []
+        self.word_total_length = 0
+        self.exp_total_num = 0
+        self.word_rate = 0
+        self.exp_rate = 0
+        self.reset()
+        self.epoch_reset()
+
+    def __call__(self, preds, batch, **kwargs):
+        for k, v in kwargs.items():
+            epoch_reset = v
+            if epoch_reset:
+                self.epoch_reset()
+        word_probs = preds
+        word_label, word_label_mask = batch
+        line_right = 0
+        if word_probs is not None:
+            word_pred = word_probs.argmax(2)
+        word_pred = word_pred.cpu().detach().numpy()
+        word_scores = [
+            SequenceMatcher(
+                None,
+                s1[:int(np.sum(s3))],
+                s2[:int(np.sum(s3))],
+                autojunk=False).ratio() * (
+                    len(s1[:int(np.sum(s3))]) + len(s2[:int(np.sum(s3))])) /
+            len(s1[:int(np.sum(s3))]) / 2
+            for s1, s2, s3 in zip(word_label, word_pred, word_label_mask)
+        ]
+        batch_size = len(word_scores)
+        for i in range(batch_size):
+            if word_scores[i] == 1:
+                line_right += 1
+        self.word_rate = np.mean(word_scores)  #float
+        self.exp_rate = line_right / batch_size  #float
+        exp_length, word_length = word_label.shape[:2]
+        self.word_right.append(self.word_rate * word_length)
+        self.exp_right.append(self.exp_rate * exp_length)
+        self.word_total_length = self.word_total_length + word_length
+        self.exp_total_num = self.exp_total_num + exp_length
+
+    def get_metric(self):
+        """
+        return {
+            'word_rate': 0,
+            "exp_rate": 0,
+        }
+        """
+        cur_word_rate = sum(self.word_right) / self.word_total_length
+        cur_exp_rate = sum(self.exp_right) / self.exp_total_num
+        self.reset()
+        return {'word_rate': cur_word_rate, "exp_rate": cur_exp_rate}
+
+    def reset(self):
+        self.word_rate = 0
+        self.exp_rate = 0
+
+    def epoch_reset(self):
+        self.word_right = []
+        self.exp_right = []
+        self.word_total_length = 0
+        self.exp_total_num = 0
diff --git a/ppocr/modeling/backbones/__init__.py b/ppocr/modeling/backbones/__init__.py
index 84892fa9c..e2c2e9c4a 100755
--- a/ppocr/modeling/backbones/__init__.py
+++ b/ppocr/modeling/backbones/__init__.py
@@ -43,10 +43,12 @@ def build_backbone(config, model_type):
         from .rec_svtrnet import SVTRNet
         from .rec_vitstr import ViTSTR
         from .rec_resnet_rfl import ResNetRFL
+        from .rec_densenet import DenseNet
         support_dict = [
             'MobileNetV1Enhance', 'MobileNetV3', 'ResNet', 'ResNetFPN', 'MTB',
             'ResNet31', 'ResNet45', 'ResNet_ASTER', 'MicroNet',
-            'EfficientNetb3_PREN', 'SVTRNet', 'ViTSTR', 'ResNet32', 'ResNetRFL'
+            'EfficientNetb3_PREN', 'SVTRNet', 'ViTSTR', 'ResNet32', 'ResNetRFL',
+            'DenseNet'
         ]
     elif model_type == 'e2e':
         from .e2e_resnet_vd_pg import ResNet
diff --git a/ppocr/modeling/backbones/rec_densenet.py b/ppocr/modeling/backbones/rec_densenet.py
new file mode 100644
index 000000000..d3391d408
--- /dev/null
+++ b/ppocr/modeling/backbones/rec_densenet.py
@@ -0,0 +1,135 @@
+import math
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+
+class Bottleneck(nn.Layer):
+    '''
+        ratio: 16
+        growthRate: 24
+        reduction: 0.5
+        bottleneck: True
+        use_dropout: True
+    '''
+
+    def __init__(self, nChannels, growthRate, use_dropout):
+        super(Bottleneck, self).__init__()
+        interChannels = 4 * growthRate
+        self.bn1 = nn.BatchNorm2D(interChannels)
+        self.conv1 = nn.Conv2D(
+            nChannels, interChannels, kernel_size=1,
+            bias_attr=None)  # Xavier initialization
+        self.bn2 = nn.BatchNorm2D(growthRate)
+        self.conv2 = nn.Conv2D(
+            interChannels, growthRate, kernel_size=3, padding=1,
+            bias_attr=None)  # Xavier initialization
+        self.use_dropout = use_dropout
+        self.dropout = nn.Dropout(p=0.2)
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        if self.use_dropout:
+            out = self.dropout(out)
+        out = F.relu(self.bn2(self.conv2(out)))
+        if self.use_dropout:
+            out = self.dropout(out)
+        out = paddle.concat([x, out], 1)
+        return out
+
+
+class SingleLayer(nn.Layer):
+    def __init__(self, nChannels, growthRate, use_dropout):
+        super(SingleLayer, self).__init__()
+        self.bn1 = nn.BatchNorm2D(nChannels)
+        self.conv1 = nn.Conv2D(
+            nChannels, growthRate, kernel_size=3, padding=1, bias_attr=False)
+
+        self.use_dropout = use_dropout
+        self.dropout = nn.Dropout(p=0.2)
+
+    def forward(self, x):
+        out = self.conv1(F.relu(x))
+        if self.use_dropout:
+            out = self.dropout(out)
+
+        out = paddle.concat([x, out], 1)
+        return out
+
+
+class Transition(nn.Layer):
+    def __init__(self, nChannels, out_channels, use_dropout):
+        super(Transition, self).__init__()
+        self.bn1 = nn.BatchNorm2D(out_channels)
+        self.conv1 = nn.Conv2D(
+            nChannels, out_channels, kernel_size=1, bias_attr=False)
+        self.use_dropout = use_dropout
+        self.dropout = nn.Dropout(p=0.2)
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        if self.use_dropout:
+            out = self.dropout(out)
+        out = F.avg_pool2d(out, 2, ceil_mode=True, exclusive=False)
+        return out
+
+
+class DenseNet(nn.Layer):
+    def __init__(self, growthRate, reduction, bottleneck, use_dropout,
+                 input_channel, **kwargs):
+        super(DenseNet, self).__init__()
+        '''
+          ratio: 16
+          growthRate: 24
+          reduction: 0.5
+        '''
+        nDenseBlocks = 16
+        nChannels = 2 * growthRate
+
+        self.conv1 = nn.Conv2D(
+            input_channel,
+            nChannels,
+            kernel_size=7,
+            padding=3,
+            stride=2,
+            bias_attr=False)
+        self.dense1 = self._make_dense(nChannels, growthRate, nDenseBlocks,
+                                       bottleneck, use_dropout)
+        nChannels += nDenseBlocks * growthRate
+        out_channels = int(math.floor(nChannels * reduction))
+        self.trans1 = Transition(nChannels, out_channels, use_dropout)
+
+        nChannels = out_channels
+        self.dense2 = self._make_dense(nChannels, growthRate, nDenseBlocks,
+                                       bottleneck, use_dropout)
+        nChannels += nDenseBlocks * growthRate
+        out_channels = int(math.floor(nChannels * reduction))
+        self.trans2 = Transition(nChannels, out_channels, use_dropout)
+
+        nChannels = out_channels
+        self.dense3 = self._make_dense(nChannels, growthRate, nDenseBlocks,
+                                       bottleneck, use_dropout)
+        self.out_channels = out_channels
+
+    def _make_dense(self, nChannels, growthRate, nDenseBlocks, bottleneck,
+                    use_dropout):
+        layers = []
+        for i in range(int(nDenseBlocks)):
+            if bottleneck:
+                layers.append(Bottleneck(nChannels, growthRate, use_dropout))
+            else:
+                layers.append(SingleLayer(nChannels, growthRate, use_dropout))
+            nChannels += growthRate
+        return nn.Sequential(*layers)
+
+    def forward(self, inputs):
+        x, x_m, y = inputs
+        out = self.conv1(x)
+        out = F.relu(out)
+        out = F.max_pool2d(out, 2, ceil_mode=True)
+        out = self.dense1(out)
+        out = self.trans1(out)
+        out = self.dense2(out)
+        out = self.trans2(out)
+        out = self.dense3(out)
+        return out, x_m, y
diff --git a/ppocr/modeling/heads/__init__.py b/ppocr/modeling/heads/__init__.py
index 63002140c..fdf5a8a96 100755
--- a/ppocr/modeling/heads/__init__.py
+++ b/ppocr/modeling/heads/__init__.py
@@ -40,6 +40,7 @@ def build_head(config):
     from .rec_robustscanner_head import RobustScannerHead
     from .rec_visionlan_head import VLHead
     from .rec_rfl_head import RFLHead
+    from .rec_can_head import CANHead
 
     # cls head
     from .cls_head import ClsHead
@@ -56,7 +57,7 @@ def build_head(config):
         'TableAttentionHead', 'SARHead', 'AsterHead', 'SDMGRHead', 'PRENHead',
         'MultiHead', 'ABINetHead', 'TableMasterHead', 'SPINAttentionHead',
         'VLHead', 'SLAHead', 'RobustScannerHead', 'CT_Head', 'RFLHead',
-        'DRRGHead'
+        'DRRGHead', 'CANHead'
     ]
 
     #table head
diff --git a/ppocr/modeling/heads/rec_can_head.py b/ppocr/modeling/heads/rec_can_head.py
new file mode 100644
index 000000000..afd78ee9d
--- /dev/null
+++ b/ppocr/modeling/heads/rec_can_head.py
@@ -0,0 +1,294 @@
+from turtle import forward
+import paddle.nn as nn
+import paddle
+import math
+'''
+Counting Module
+'''
+
+
+class ChannelAtt(nn.Layer):
+    def __init__(self, channel, reduction):
+        super(ChannelAtt, self).__init__()
+        self.avg_pool = nn.AdaptiveAvgPool2D(1)
+
+        self.fc = nn.Sequential(
+            nn.Linear(channel, channel // reduction),
+            nn.ReLU(), nn.Linear(channel // reduction, channel), nn.Sigmoid())
+
+    def forward(self, x):
+        b, c, _, _ = x.shape
+        y = paddle.reshape(self.avg_pool(x), [b, c])
+        y = paddle.reshape(self.fc(y), [b, c, 1, 1])
+        return x * y
+
+
+class CountingDecoder(nn.Layer):
+    def __init__(self, in_channel, out_channel, kernel_size):
+        super(CountingDecoder, self).__init__()
+        self.in_channel = in_channel
+        self.out_channel = out_channel
+
+        self.trans_layer = nn.Sequential(
+            nn.Conv2D(
+                self.in_channel,
+                512,
+                kernel_size=kernel_size,
+                padding=kernel_size // 2,
+                bias_attr=False),
+            nn.BatchNorm2D(512))
+
+        self.channel_att = ChannelAtt(512, 16)
+
+        self.pred_layer = nn.Sequential(
+            nn.Conv2D(
+                512, self.out_channel, kernel_size=1, bias_attr=False),
+            nn.Sigmoid())
+
+    def forward(self, x, mask):
+        b, _, h, w = x.shape
+        x = self.trans_layer(x)
+        x = self.channel_att(x)
+        x = self.pred_layer(x)
+
+        if mask is not None:
+            x = x * mask
+        x = paddle.reshape(x, [b, self.out_channel, -1])
+        x1 = paddle.sum(x, axis=-1)
+
+        return x1, paddle.reshape(x, [b, self.out_channel, h, w])
+
+
+'''
+Attention Decoder
+'''
+
+
+class PositionEmbeddingSine(nn.Layer):
+    def __init__(self,
+                 num_pos_feats=64,
+                 temperature=10000,
+                 normalize=False,
+                 scale=None):
+        super().__init__()
+        self.num_pos_feats = num_pos_feats
+        self.temperature = temperature
+        self.normalize = normalize
+        if scale is not None and normalize is False:
+            raise ValueError("normalize should be True if scale is passed")
+        if scale is None:
+            scale = 2 * math.pi
+        self.scale = scale
+
+    def forward(self, x, mask):
+        y_embed = paddle.cumsum(mask, 1, dtype='float32')
+        x_embed = paddle.cumsum(mask, 2, dtype='float32')
+
+        if self.normalize:
+            eps = 1e-6
+            y_embed = y_embed / (y_embed[:, -1:, :] + eps) * self.scale
+            x_embed = x_embed / (x_embed[:, :, -1:] + eps) * self.scale
+        dim_t = paddle.arange(self.num_pos_feats, dtype='float32')
+        dim_d = paddle.expand(paddle.to_tensor(2), dim_t.shape)
+        dim_t = self.temperature**(2 * (dim_t / dim_d).astype('int64') /
+                                   self.num_pos_feats)
+
+        pos_x = paddle.unsqueeze(x_embed, [3]) / dim_t
+        pos_y = paddle.unsqueeze(y_embed, [3]) / dim_t
+
+        pos_x = paddle.flatten(
+            paddle.stack(
+                [
+                    paddle.sin(pos_x[:, :, :, 0::2]),
+                    paddle.cos(pos_x[:, :, :, 1::2])
+                ],
+                axis=4),
+            3)
+        pos_y = paddle.flatten(
+            paddle.stack(
+                [
+                    paddle.sin(pos_y[:, :, :, 0::2]),
+                    paddle.cos(pos_y[:, :, :, 1::2])
+                ],
+                axis=4),
+            3)
+
+        pos = paddle.transpose(
+            paddle.concat(
+                [pos_y, pos_x], axis=3), [0, 3, 1, 2])
+
+        return pos
+
+
+class AttDecoder(nn.Layer):
+    def __init__(self, ratio, is_train, input_size, hidden_size,
+                 encoder_out_channel, dropout, dropout_ratio, word_num,
+                 counting_decoder_out_channel, attention):
+        super(AttDecoder, self).__init__()
+        self.input_size = input_size
+        self.hidden_size = hidden_size
+        self.out_channel = encoder_out_channel
+        self.attention_dim = attention['attention_dim']
+        self.dropout_prob = dropout
+        self.ratio = ratio
+        self.word_num = word_num
+
+        self.counting_num = counting_decoder_out_channel
+        self.is_train = is_train
+
+        self.init_weight = nn.Linear(self.out_channel, self.hidden_size)
+        self.embedding = nn.Embedding(self.word_num, self.input_size)
+        self.word_input_gru = nn.GRUCell(self.input_size, self.hidden_size)
+        self.word_attention = Attention(hidden_size, attention['attention_dim'])
+
+        self.encoder_feature_conv = nn.Conv2D(
+            self.out_channel,
+            self.attention_dim,
+            kernel_size=attention['word_conv_kernel'],
+            padding=attention['word_conv_kernel'] // 2)
+
+        self.word_state_weight = nn.Linear(self.hidden_size, self.hidden_size)
+        self.word_embedding_weight = nn.Linear(self.input_size,
+                                               self.hidden_size)
+        self.word_context_weight = nn.Linear(self.out_channel, self.hidden_size)
+        self.counting_context_weight = nn.Linear(self.counting_num,
+                                                 self.hidden_size)
+        self.word_convert = nn.Linear(self.hidden_size, self.word_num)
+
+        if dropout:
+            self.dropout = nn.Dropout(dropout_ratio)
+
+    def forward(self, cnn_features, labels, counting_preds, images_mask):
+        if self.is_train:
+            _, num_steps = labels.shape
+        else:
+            num_steps = 36
+
+        batch_size, _, height, width = cnn_features.shape
+        images_mask = images_mask[:, :, ::self.ratio, ::self.ratio]
+
+        word_probs = paddle.zeros((batch_size, num_steps, self.word_num))
+        word_alpha_sum = paddle.zeros((batch_size, 1, height, width))
+
+        hidden = self.init_hidden(cnn_features, images_mask)
+        counting_context_weighted = self.counting_context_weight(counting_preds)
+        cnn_features_trans = self.encoder_feature_conv(cnn_features)
+
+        position_embedding = PositionEmbeddingSine(256, normalize=True)
+        pos = position_embedding(cnn_features_trans, images_mask[:, 0, :, :])
+
+        cnn_features_trans = cnn_features_trans + pos
+
+        word = paddle.ones([batch_size, 1], dtype='int64')  # init word as sos
+        word = word.squeeze(axis=1)
+        for i in range(num_steps):
+            word_embedding = self.embedding(word)
+            _, hidden = self.word_input_gru(word_embedding, hidden)
+            word_context_vec, _, word_alpha_sum = self.word_attention(
+                cnn_features, cnn_features_trans, hidden, word_alpha_sum,
+                images_mask)
+
+            current_state = self.word_state_weight(hidden)
+            word_weighted_embedding = self.word_embedding_weight(word_embedding)
+            word_context_weighted = self.word_context_weight(word_context_vec)
+
+            if self.dropout_prob:
+                word_out_state = self.dropout(
+                    current_state + word_weighted_embedding +
+                    word_context_weighted + counting_context_weighted)
+            else:
+                word_out_state = current_state + word_weighted_embedding + word_context_weighted + counting_context_weighted
+
+            word_prob = self.word_convert(word_out_state)
+            word_probs[:, i] = word_prob
+
+            if self.is_train:
+                word = labels[:, i]
+            else:
+                word = word_prob.argmax(1)
+                word = paddle.multiply(
+                    word, labels[:, i]
+                )  # labels are oneslike tensor in infer/predict mode
+
+        return word_probs
+
+    def init_hidden(self, features, feature_mask):
+        average = paddle.sum(paddle.sum(features * feature_mask, axis=-1),
+                             axis=-1) / paddle.sum(
+                                 (paddle.sum(feature_mask, axis=-1)), axis=-1)
+        average = self.init_weight(average)
+        return paddle.tanh(average)
+
+
+'''
+Attention Module
+'''
+
+
+class Attention(nn.Layer):
+    def __init__(self, hidden_size, attention_dim):
+        super(Attention, self).__init__()
+        self.hidden = hidden_size
+        self.attention_dim = attention_dim
+        self.hidden_weight = nn.Linear(self.hidden, self.attention_dim)
+        self.attention_conv = nn.Conv2D(
+            1, 512, kernel_size=11, padding=5, bias_attr=False)
+        self.attention_weight = nn.Linear(
+            512, self.attention_dim, bias_attr=False)
+        self.alpha_convert = nn.Linear(self.attention_dim, 1)
+
+    def forward(self,
+                cnn_features,
+                cnn_features_trans,
+                hidden,
+                alpha_sum,
+                image_mask=None):
+        query = self.hidden_weight(hidden)
+        alpha_sum_trans = self.attention_conv(alpha_sum)
+        coverage_alpha = self.attention_weight(
+            paddle.transpose(alpha_sum_trans, [0, 2, 3, 1]))
+        alpha_score = paddle.tanh(
+            paddle.unsqueeze(query, [1, 2]) + coverage_alpha + paddle.transpose(
+                cnn_features_trans, [0, 2, 3, 1]))
+        energy = self.alpha_convert(alpha_score)
+        energy = energy - energy.max()
+        energy_exp = paddle.exp(paddle.squeeze(energy, -1))
+
+        if image_mask is not None:
+            energy_exp = energy_exp * paddle.squeeze(image_mask, 1)
+        alpha = energy_exp / (paddle.unsqueeze(
+            paddle.sum(paddle.sum(energy_exp, -1), -1), [1, 2]) + 1e-10)
+        alpha_sum = paddle.unsqueeze(alpha, 1) + alpha_sum
+        context_vector = paddle.sum(
+            paddle.sum((paddle.unsqueeze(alpha, 1) * cnn_features), -1), -1)
+
+        return context_vector, alpha, alpha_sum
+
+
+class CANHead(nn.Layer):
+    def __init__(self, in_channel, out_channel, ratio, attdecoder, **kwargs):
+        super(CANHead, self).__init__()
+
+        self.in_channel = in_channel
+        self.out_channel = out_channel
+
+        self.counting_decoder1 = CountingDecoder(self.in_channel,
+                                                 self.out_channel, 3)  # mscm
+        self.counting_decoder2 = CountingDecoder(self.in_channel,
+                                                 self.out_channel, 5)
+
+        self.decoder = AttDecoder(ratio, **attdecoder)
+
+        self.ratio = ratio
+
+    def forward(self, inputs, targets=None):
+        cnn_features, images_mask, labels = inputs
+
+        counting_mask = images_mask[:, :, ::self.ratio, ::self.ratio]
+        counting_preds1, _ = self.counting_decoder1(cnn_features, counting_mask)
+        counting_preds2, _ = self.counting_decoder2(cnn_features, counting_mask)
+        counting_preds = (counting_preds1 + counting_preds2) / 2
+
+        word_probs = self.decoder(cnn_features, labels, counting_preds,
+                                  images_mask)
+        return word_probs, counting_preds, counting_preds1, counting_preds2
diff --git a/ppocr/optimizer/learning_rate.py b/ppocr/optimizer/learning_rate.py
index 7d45109b4..be52a9184 100644
--- a/ppocr/optimizer/learning_rate.py
+++ b/ppocr/optimizer/learning_rate.py
@@ -18,7 +18,7 @@ from __future__ import print_function
 from __future__ import unicode_literals
 
 from paddle.optimizer import lr
-from .lr_scheduler import CyclicalCosineDecay, OneCycleDecay
+from .lr_scheduler import CyclicalCosineDecay, OneCycleDecay, TwoStepCosineDecay
 
 
 class Linear(object):
@@ -386,3 +386,44 @@ class MultiStepDecay(object):
                 end_lr=self.learning_rate,
                 last_epoch=self.last_epoch)
         return learning_rate
+
+
+class TwoStepCosine(object):
+    """
+    Cosine learning rate decay
+    lr = 0.05 * (math.cos(epoch * (math.pi / epochs)) + 1)
+    Args:
+        lr(float): initial learning rate
+        step_each_epoch(int): steps each epoch
+        epochs(int): total training epochs
+        last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
+    """
+
+    def __init__(self,
+                 learning_rate,
+                 step_each_epoch,
+                 epochs,
+                 warmup_epoch=0,
+                 last_epoch=-1,
+                 **kwargs):
+        super(TwoStepCosine, self).__init__()
+        self.learning_rate = learning_rate
+        self.T_max1 = step_each_epoch * 200
+        self.T_max2 = step_each_epoch * epochs
+        self.last_epoch = last_epoch
+        self.warmup_epoch = round(warmup_epoch * step_each_epoch)
+
+    def __call__(self):
+        learning_rate = TwoStepCosineDecay(
+            learning_rate=self.learning_rate,
+            T_max1=self.T_max1,
+            T_max2=self.T_max2,
+            last_epoch=self.last_epoch)
+        if self.warmup_epoch > 0:
+            learning_rate = lr.LinearWarmup(
+                learning_rate=learning_rate,
+                warmup_steps=self.warmup_epoch,
+                start_lr=0.0,
+                end_lr=self.learning_rate,
+                last_epoch=self.last_epoch)
+        return learning_rate
diff --git a/ppocr/optimizer/lr_scheduler.py b/ppocr/optimizer/lr_scheduler.py
index f62f1f3b0..cd09367e2 100644
--- a/ppocr/optimizer/lr_scheduler.py
+++ b/ppocr/optimizer/lr_scheduler.py
@@ -160,3 +160,63 @@ class OneCycleDecay(LRScheduler):
             start_step = phase['end_step']
 
         return computed_lr
+
+
+class TwoStepCosineDecay(LRScheduler):
+    def __init__(self,
+                 learning_rate,
+                 T_max1,
+                 T_max2,
+                 eta_min=0,
+                 last_epoch=-1,
+                 verbose=False):
+        if not isinstance(T_max1, int):
+            raise TypeError(
+                "The type of 'T_max1' in 'CosineAnnealingDecay' must be 'int', but received %s."
+                % type(T_max1))
+        if not isinstance(T_max2, int):
+            raise TypeError(
+                "The type of 'T_max2' in 'CosineAnnealingDecay' must be 'int', but received %s."
+                % type(T_max2))
+        if not isinstance(eta_min, (float, int)):
+            raise TypeError(
+                "The type of 'eta_min' in 'CosineAnnealingDecay' must be 'float, int', but received %s."
+                % type(eta_min))
+        assert T_max1 > 0 and isinstance(
+            T_max1, int), " 'T_max1' must be a positive integer."
+        assert T_max2 > 0 and isinstance(
+            T_max2, int), " 'T_max1' must be a positive integer."
+        self.T_max1 = T_max1
+        self.T_max2 = T_max2
+        self.eta_min = float(eta_min)
+        super(TwoStepCosineDecay, self).__init__(learning_rate, last_epoch,
+                                                 verbose)
+
+    def get_lr(self):
+
+        if self.last_epoch <= self.T_max1:
+            if self.last_epoch == 0:
+                return self.base_lr
+            elif (self.last_epoch - 1 - self.T_max1) % (2 * self.T_max1) == 0:
+                return self.last_lr + (self.base_lr - self.eta_min) * (
+                    1 - math.cos(math.pi / self.T_max1)) / 2
+
+            return (1 + math.cos(math.pi * self.last_epoch / self.T_max1)) / (
+                1 + math.cos(math.pi * (self.last_epoch - 1) / self.T_max1)) * (
+                    self.last_lr - self.eta_min) + self.eta_min
+        else:
+            if (self.last_epoch - 1 - self.T_max2) % (2 * self.T_max2) == 0:
+                return self.last_lr + (self.base_lr - self.eta_min) * (
+                    1 - math.cos(math.pi / self.T_max2)) / 2
+
+            return (1 + math.cos(math.pi * self.last_epoch / self.T_max2)) / (
+                1 + math.cos(math.pi * (self.last_epoch - 1) / self.T_max2)) * (
+                    self.last_lr - self.eta_min) + self.eta_min
+
+    def _get_closed_form_lr(self):
+        if self.last_epoch <= self.T_max1:
+            return self.eta_min + (self.base_lr - self.eta_min) * (1 + math.cos(
+                math.pi * self.last_epoch / self.T_max1)) / 2
+        else:
+            return self.eta_min + (self.base_lr - self.eta_min) * (1 + math.cos(
+                math.pi * self.last_epoch / self.T_max2)) / 2
diff --git a/ppocr/postprocess/__init__.py b/ppocr/postprocess/__init__.py
index 3a09030b2..e86a7ea70 100644
--- a/ppocr/postprocess/__init__.py
+++ b/ppocr/postprocess/__init__.py
@@ -37,6 +37,7 @@ from .table_postprocess import TableMasterLabelDecode, TableLabelDecode
 from .picodet_postprocess import PicoDetPostProcess
 from .ct_postprocess import CTPostProcess
 from .drrg_postprocess import DRRGPostprocess
+from .rec_postprocess import SeqLabelDecode
 
 
 def build_post_process(config, global_config=None):
@@ -51,7 +52,7 @@ def build_post_process(config, global_config=None):
         'TableMasterLabelDecode', 'SPINLabelDecode',
         'DistillationSerPostProcess', 'DistillationRePostProcess',
         'VLLabelDecode', 'PicoDetPostProcess', 'CTPostProcess',
-        'RFLLabelDecode', 'DRRGPostprocess'
+        'RFLLabelDecode', 'DRRGPostprocess', 'SeqLabelDecode'
     ]
 
     if config['name'] == 'PSEPostProcess':
diff --git a/ppocr/postprocess/rec_postprocess.py b/ppocr/postprocess/rec_postprocess.py
index 59b5254e4..4d88c278e 100644
--- a/ppocr/postprocess/rec_postprocess.py
+++ b/ppocr/postprocess/rec_postprocess.py
@@ -897,3 +897,36 @@ class VLLabelDecode(BaseRecLabelDecode):
             return text
         label = self.decode(label)
         return text, label
+
+
+class SeqLabelDecode(BaseRecLabelDecode):
+    """ Convert between latex-symbol and symbol-index """
+
+    def __init__(self, character_dict_path=None, use_space_char=False,
+                 **kwargs):
+        super(SeqLabelDecode, self).__init__(character_dict_path,
+                                             use_space_char)
+
+    def decode(self, text_index, preds_prob=None):
+        result_list = []
+        batch_size = len(text_index)
+        for batch_idx in range(batch_size):
+            seq_end = text_index[batch_idx].argmin(0)
+            idx_list = text_index[batch_idx][:seq_end].tolist()
+            symbol_list = [self.character[idx] for idx in idx_list]
+            probs = []
+            if preds_prob is not None:
+                probs = preds_prob[batch_idx][:len(symbol_list)].tolist()
+
+            result_list.append([' '.join(symbol_list), probs])
+        return result_list
+
+    def __call__(self, preds, label=None, *args, **kwargs):
+        pred_prob, _, _, _ = preds
+        preds_idx = pred_prob.argmax(axis=2)
+
+        text = self.decode(preds_idx)
+        if label is None:
+            return text
+        label = self.decode(label)
+        return text, label
diff --git a/ppocr/utils/dict/latex_symbol_dict.txt b/ppocr/utils/dict/latex_symbol_dict.txt
new file mode 100644
index 000000000..b43f1fa8b
--- /dev/null
+++ b/ppocr/utils/dict/latex_symbol_dict.txt
@@ -0,0 +1,111 @@
+eos
+sos
+!
+'
+(
+)
++
+,
+-
+.
+/
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+<
+=
+>
+A
+B
+C
+E
+F
+G
+H
+I
+L
+M
+N
+P
+R
+S
+T
+V
+X
+Y
+[
+\Delta
+\alpha
+\beta
+\cdot
+\cdots
+\cos
+\div
+\exists
+\forall
+\frac
+\gamma
+\geq
+\in
+\infty
+\int
+\lambda
+\ldots
+\leq
+\lim
+\log
+\mu
+\neq
+\phi
+\pi
+\pm
+\prime
+\rightarrow
+\sigma
+\sin
+\sqrt
+\sum
+\tan
+\theta
+\times
+]
+a
+b
+c
+d
+e
+f
+g
+h
+i
+j
+k
+l
+m
+n
+o
+p
+q
+r
+s
+t
+u
+v
+w
+x
+y
+z
+\{
+|
+\}
+{
+}
+^
+_
\ No newline at end of file
diff --git a/test_tipc/configs/rec_d28_can/rec_d28_can.yml b/test_tipc/configs/rec_d28_can/rec_d28_can.yml
new file mode 100644
index 000000000..aeaccb6b0
--- /dev/null
+++ b/test_tipc/configs/rec_d28_can/rec_d28_can.yml
@@ -0,0 +1,114 @@
+Global:
+  use_gpu: True
+  epoch_num: 240
+  log_smooth_window: 20
+  print_batch_step: 10
+  save_model_dir: ./output/rec/can/
+  save_epoch_step: 1
+  # evaluation is run every 1105 iterations
+  eval_batch_step: [0, 1105]
+  cal_metric_during_train: True
+  pretrained_model: ./output/rec/can/CAN
+  checkpoints: ./output/rec/can/CAN
+  save_inference_dir: ./inference/rec_d28_can/
+  use_visualdl: False
+  infer_img: doc/imgs_hme/hme_01.jpeg
+  # for data or label process
+  character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt
+  max_text_length: 36
+  infer_mode: False
+  use_space_char: False
+  save_res_path: ./output/rec/predicts_can.txt
+
+Optimizer:
+  name: Momentum
+  momentum: 0.9
+  clip_norm_global: 100.0
+  lr:
+    name: TwoStepCosine
+    learning_rate: 0.01
+    warmup_epoch: 1
+  weight_decay: 0.0001
+
+Architecture:
+  model_type: rec
+  algorithm: CAN
+  in_channels: 1
+  Transform:
+  Backbone:
+    name: DenseNet 
+    growthRate: 24
+    reduction: 0.5
+    bottleneck: True
+    use_dropout: True
+    input_channel: 1 
+    
+  Head:
+    name: CANHead
+    in_channel: 684
+    out_channel: 111
+    max_text_length: 36
+    ratio: 16
+    attdecoder:
+      is_train: True
+      input_size: 256
+      hidden_size: 256
+      encoder_out_channel: 684
+      dropout: True
+      dropout_ratio: 0.5
+      word_num: 111
+      counting_decoder_out_channel: 111
+      attention:
+        attention_dim: 512
+        word_conv_kernel: 1
+   
+Loss:
+  name: CANLoss
+
+PostProcess:
+  name: SeqLabelDecode
+  character: 111
+
+Metric:
+  name: CANMetric
+  main_indicator: exp_rate
+
+Train:
+  dataset:
+    name: HMERDataSet
+    data_dir: ./train_data/CROHME/training/images/
+    transforms:
+      - DecodeImage:
+          channel_first: False
+      - GrayImageChannelFormat: 
+          normalize: True
+          inverse: True
+      - KeepKeys:
+          keep_keys: ['image', 'label']
+    label_file_list: ["./train_data/CROHME/training/labels.json"]
+  loader:
+    shuffle: True
+    batch_size_per_card: 2
+    drop_last: True
+    num_workers: 1
+    collate_fn: DyMaskCollator
+
+Eval:
+  dataset:
+    name: HMERDataSet
+    data_dir: ./train_data/CROHME/evaluation/images/
+    transforms: 
+      - DecodeImage:
+          channel_first: False
+      - GrayImageChannelFormat:
+          normalize: True
+          inverse: True
+      - KeepKeys:
+          keep_keys: ['image', 'label']
+    label_file_list: ["./train_data/CROHME/evaluation/labels.json"]
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 1
+    num_workers: 4
+    collate_fn: DyMaskCollator
diff --git a/test_tipc/configs/rec_d28_can/train_infer_python.txt b/test_tipc/configs/rec_d28_can/train_infer_python.txt
new file mode 100644
index 000000000..be50c5980
--- /dev/null
+++ b/test_tipc/configs/rec_d28_can/train_infer_python.txt
@@ -0,0 +1,53 @@
+===========================train_params===========================
+model_name:rec_d28_can
+python:python
+gpu_list:0|0,1
+Global.use_gpu:True|True
+Global.auto_cast:null
+Global.epoch_num:lite_train_lite_infer=2|whole_train_whole_infer=240
+Global.save_model_dir:./output/
+Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=8
+Global.pretrained_model:null
+train_model_name:latest
+train_infer_img_dir:./doc/imgs_hme
+null:null
+##
+trainer:norm_train
+norm_train:tools/train.py -c test_tipc/configs/rec_d28_can/rec_d28_can.yml -o
+pact_train:null
+fpgm_train:null
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params===========================
+eval:tools/eval.py -c test_tipc/configs/rec_d28_can/rec_d28_can.yml -o
+null:null
+##
+===========================infer_params===========================
+Global.save_inference_dir:./output/
+Global.checkpoints:
+norm_export:tools/export_model.py -c test_tipc/configs/rec_d28_can/rec_d28_can.yml -o
+quant_export:null
+fpgm_export:null
+distill_export:null
+export1:null
+export2:null
+##
+train_model:./inference/rec_d28_can_train/best_accuracy
+infer_export:tools/export_model.py -c test_tipc/configs/rec_d28_can/rec_d28_can.yml -o
+infer_quant:False
+inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/dict/latex_symbol_dict.txt --rec_image_shape="1,100,100" --rec_algorithm="CAN"
+--use_gpu:True|False
+--enable_mkldnn:False
+--cpu_threads:6
+--rec_batch_num:1
+--use_tensorrt:False
+--precision:fp32
+--rec_model_dir:./output/
+--image_dir:./doc/imgs_hme
+--save_log_path:./test/output/
+--benchmark:True
+null:null
+===========================infer_benchmark_params==========================
+random_infer_input:[{float32,[1,100,100]}]
diff --git a/tools/eval.py b/tools/eval.py
index 3d1d3813d..21f4d94d5 100755
--- a/tools/eval.py
+++ b/tools/eval.py
@@ -74,7 +74,9 @@ def main():
             config['Architecture']["Head"]['out_channels'] = char_num
 
     model = build_model(config['Architecture'])
-    extra_input_models = ["SRN", "NRTR", "SAR", "SEED", "SVTR", "VisionLAN", "RobustScanner"]
+    extra_input_models = [
+        "SRN", "NRTR", "SAR", "SEED", "SVTR", "VisionLAN", "RobustScanner"
+    ]
     extra_input = False
     if config['Architecture']['algorithm'] == 'Distillation':
         for key in config['Architecture']["Models"]:
@@ -83,7 +85,10 @@ def main():
     else:
         extra_input = config['Architecture']['algorithm'] in extra_input_models
     if "model_type" in config['Architecture'].keys():
-        model_type = config['Architecture']['model_type']
+        if config['Architecture']['algorithm'] == 'CAN':
+            model_type = 'can'
+        else:
+            model_type = config['Architecture']['model_type']
     else:
         model_type = None
 
@@ -92,7 +97,7 @@ def main():
     # amp
     use_amp = config["Global"].get("use_amp", False)
     amp_level = config["Global"].get("amp_level", 'O2')
-    amp_custom_black_list = config['Global'].get('amp_custom_black_list',[])
+    amp_custom_black_list = config['Global'].get('amp_custom_black_list', [])
     if use_amp:
         AMP_RELATED_FLAGS_SETTING = {
             'FLAGS_cudnn_batchnorm_spatial_persistent': 1,
@@ -120,7 +125,8 @@ def main():
 
     # start eval
     metric = program.eval(model, valid_dataloader, post_process_class,
-                          eval_class, model_type, extra_input, scaler, amp_level, amp_custom_black_list)
+                          eval_class, model_type, extra_input, scaler,
+                          amp_level, amp_custom_black_list)
     logger.info('metric eval ***************')
     for k, v in metric.items():
         logger.info('{}:{}'.format(k, v))
diff --git a/tools/export_model.py b/tools/export_model.py
index 52f05bfcb..4b90fcae4 100755
--- a/tools/export_model.py
+++ b/tools/export_model.py
@@ -123,6 +123,17 @@ def export_single_model(model,
                 ]
         ]
         model = to_static(model, input_spec=other_shape)
+    elif arch_config["algorithm"] == "CAN":
+        other_shape = [[
+            paddle.static.InputSpec(
+                shape=[None, 1, None, None],
+                dtype="float32"), paddle.static.InputSpec(
+                    shape=[None, 1, None, None], dtype="float32"),
+            paddle.static.InputSpec(
+                shape=[None, arch_config['Head']['max_text_length']],
+                dtype="int64")
+        ]]
+        model = to_static(model, input_spec=other_shape)
     elif arch_config["algorithm"] in ["LayoutLM", "LayoutLMv2", "LayoutXLM"]:
         input_spec = [
             paddle.static.InputSpec(
diff --git a/tools/infer/predict_rec.py b/tools/infer/predict_rec.py
index bffeb2553..c1604798e 100755
--- a/tools/infer/predict_rec.py
+++ b/tools/infer/predict_rec.py
@@ -108,6 +108,13 @@ class TextRecognizer(object):
             }
         elif self.rec_algorithm == "PREN":
             postprocess_params = {'name': 'PRENLabelDecode'}
+        elif self.rec_algorithm == "CAN":
+            self.inverse = args.rec_image_inverse
+            postprocess_params = {
+                'name': 'SeqLabelDecode',
+                "character_dict_path": args.rec_char_dict_path,
+                "use_space_char": args.use_space_char
+            }
         self.postprocess_op = build_post_process(postprocess_params)
         self.predictor, self.input_tensor, self.output_tensors, self.config = \
             utility.create_predictor(args, 'rec', logger)
@@ -351,6 +358,30 @@ class TextRecognizer(object):
 
         return resized_image
 
+    def norm_img_can(self, img, image_shape):
+
+        img = cv2.cvtColor(
+            img, cv2.COLOR_BGR2GRAY)  # CAN only predict gray scale image
+
+        if self.inverse:
+            img = 255 - img
+
+        if self.rec_image_shape[0] == 1:
+            h, w = img.shape
+            _, imgH, imgW = self.rec_image_shape
+            if h < imgH or w < imgW:
+                padding_h = max(imgH - h, 0)
+                padding_w = max(imgW - w, 0)
+                img_padded = np.pad(img, ((0, padding_h), (0, padding_w)),
+                                    'constant',
+                                    constant_values=(255))
+                img = img_padded
+
+        img = np.expand_dims(img, 0) / 255.0  # h,w,c -> c,h,w
+        img = img.astype('float32')
+
+        return img
+
     def __call__(self, img_list):
         img_num = len(img_list)
         # Calculate the aspect ratio of all text bars
@@ -430,6 +461,17 @@ class TextRecognizer(object):
                     word_positions = np.array(range(0, 40)).astype('int64')
                     word_positions = np.expand_dims(word_positions, axis=0)
                     word_positions_list.append(word_positions)
+                elif self.rec_algorithm == "CAN":
+                    norm_img = self.norm_img_can(img_list[indices[ino]],
+                                                 max_wh_ratio)
+                    norm_img = norm_img[np.newaxis, :]
+                    norm_img_batch.append(norm_img)
+                    norm_image_mask = np.ones(norm_img.shape, dtype='float32')
+                    word_label = np.ones([1, 36], dtype='int64')
+                    norm_img_mask_batch = []
+                    word_label_list = []
+                    norm_img_mask_batch.append(norm_image_mask)
+                    word_label_list.append(word_label)
                 else:
                     norm_img = self.resize_norm_img(img_list[indices[ino]],
                                                     max_wh_ratio)
@@ -527,6 +569,33 @@ class TextRecognizer(object):
                     if self.benchmark:
                         self.autolog.times.stamp()
                     preds = outputs[0]
+            elif self.rec_algorithm == "CAN":
+                norm_img_mask_batch = np.concatenate(norm_img_mask_batch)
+                word_label_list = np.concatenate(word_label_list)
+                inputs = [norm_img_batch, norm_img_mask_batch, word_label_list]
+                if self.use_onnx:
+                    input_dict = {}
+                    input_dict[self.input_tensor.name] = norm_img_batch
+                    outputs = self.predictor.run(self.output_tensors,
+                                                 input_dict)
+                    preds = outputs
+                else:
+                    input_names = self.predictor.get_input_names()
+                    input_tensor = []
+                    for i in range(len(input_names)):
+                        input_tensor_i = self.predictor.get_input_handle(
+                            input_names[i])
+                        input_tensor_i.copy_from_cpu(inputs[i])
+                        input_tensor.append(input_tensor_i)
+                    self.input_tensor = input_tensor
+                    self.predictor.run()
+                    outputs = []
+                    for output_tensor in self.output_tensors:
+                        output = output_tensor.copy_to_cpu()
+                        outputs.append(output)
+                    if self.benchmark:
+                        self.autolog.times.stamp()
+                    preds = outputs
             else:
                 if self.use_onnx:
                     input_dict = {}
diff --git a/tools/infer/utility.py b/tools/infer/utility.py
index f6a44e35a..34cad2590 100644
--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
@@ -84,6 +84,7 @@ def init_args():
     # params for text recognizer
     parser.add_argument("--rec_algorithm", type=str, default='SVTR_LCNet')
     parser.add_argument("--rec_model_dir", type=str)
+    parser.add_argument("--rec_image_inverse", type=str2bool, default=True)
     parser.add_argument("--rec_image_shape", type=str, default="3, 48, 320")
     parser.add_argument("--rec_batch_num", type=int, default=6)
     parser.add_argument("--max_text_length", type=int, default=25)
diff --git a/tools/infer_rec.py b/tools/infer_rec.py
index cb8a6ec30..29aab9b57 100755
--- a/tools/infer_rec.py
+++ b/tools/infer_rec.py
@@ -141,6 +141,11 @@ def main():
                     paddle.to_tensor(valid_ratio),
                     paddle.to_tensor(word_positons),
                 ]
+            if config['Architecture']['algorithm'] == "CAN":
+                image_mask = paddle.ones(
+                    (np.expand_dims(
+                        batch[0], axis=0).shape), dtype='float32')
+                label = paddle.ones((1, 36), dtype='int64')
             images = np.expand_dims(batch[0], axis=0)
             images = paddle.to_tensor(images)
             if config['Architecture']['algorithm'] == "SRN":
@@ -149,6 +154,8 @@ def main():
                 preds = model(images, img_metas)
             elif config['Architecture']['algorithm'] == "RobustScanner":
                 preds = model(images, img_metas)
+            elif config['Architecture']['algorithm'] == "CAN":
+                preds = model([images, image_mask, label])
             else:
                 preds = model(images)
             post_result = post_process_class(preds)
diff --git a/tools/program.py b/tools/program.py
index 5d2bd5bfb..c491247a6 100755
--- a/tools/program.py
+++ b/tools/program.py
@@ -273,6 +273,8 @@ def train(config,
                         preds = model(images, data=batch[1:])
                     elif model_type in ["kie"]:
                         preds = model(batch)
+                    elif algorithm in ['CAN']:
+                        preds = model(batch[:3])
                     else:
                         preds = model(images)
                 preds = to_float32(preds)
@@ -286,6 +288,8 @@ def train(config,
                     preds = model(images, data=batch[1:])
                 elif model_type in ["kie", 'sr']:
                     preds = model(batch)
+                elif algorithm in ['CAN']:
+                    preds = model(batch[:3])
                 else:
                     preds = model(images)
                 loss = loss_class(preds, batch)
@@ -302,6 +306,9 @@ def train(config,
                 elif model_type in ['table']:
                     post_result = post_process_class(preds, batch)
                     eval_class(post_result, batch)
+                elif algorithm in ['CAN']:
+                    model_type = 'can'
+                    eval_class(preds[0], batch[2:], epoch_reset=(idx == 0))
                 else:
                     if config['Loss']['name'] in ['MultiLoss', 'MultiLoss_v2'
                                                   ]:  # for multi head loss
@@ -496,6 +503,8 @@ def eval(model,
                         preds = model(images, data=batch[1:])
                     elif model_type in ["kie"]:
                         preds = model(batch)
+                    elif model_type in ['can']:
+                        preds = model(batch[:3])
                     elif model_type in ['sr']:
                         preds = model(batch)
                         sr_img = preds["sr_img"]
@@ -508,6 +517,8 @@ def eval(model,
                     preds = model(images, data=batch[1:])
                 elif model_type in ["kie"]:
                     preds = model(batch)
+                elif model_type in ['can']:
+                    preds = model(batch[:3])
                 elif model_type in ['sr']:
                     preds = model(batch)
                     sr_img = preds["sr_img"]
@@ -532,6 +543,8 @@ def eval(model,
                     eval_class(post_result, batch_numpy)
             elif model_type in ['sr']:
                 eval_class(preds, batch_numpy)
+            elif model_type in ['can']:
+                eval_class(preds[0], batch_numpy[2:], epoch_reset=False)
             else:
                 post_result = post_process_class(preds, batch_numpy[1])
                 eval_class(post_result, batch_numpy)
@@ -629,7 +642,7 @@ def preprocess(is_train=False):
         'CLS', 'PGNet', 'Distillation', 'NRTR', 'TableAttn', 'SAR', 'PSE',
         'SEED', 'SDMGR', 'LayoutXLM', 'LayoutLM', 'LayoutLMv2', 'PREN', 'FCE',
         'SVTR', 'ViTSTR', 'ABINet', 'DB++', 'TableMaster', 'SPIN', 'VisionLAN',
-        'Gestalt', 'SLANet', 'RobustScanner', 'CT', 'RFL', 'DRRG'
+        'Gestalt', 'SLANet', 'RobustScanner', 'CT', 'RFL', 'DRRG', 'CAN'
     ]
 
     if use_xpu:

From 25e56a6f447115737a135a2813225ef50038276a Mon Sep 17 00:00:00 2001
From: dorren <dorren1998@outlook.com>
Date: Sat, 15 Oct 2022 20:45:43 +0800
Subject: [PATCH 05/20] add handwritten mathematiccal expression recognition
 algorithm, co-writer Lllllolita and yeyanli

---
 doc/doc_en/algorithm_rec_can_en.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/doc_en/algorithm_rec_can_en.md b/doc/doc_en/algorithm_rec_can_en.md
index f2bc645af..4d7a64f99 100644
--- a/doc/doc_en/algorithm_rec_can_en.md
+++ b/doc/doc_en/algorithm_rec_can_en.md
@@ -1,4 +1,4 @@
-# RobustScanner
+# CAN
 
 - [1. Introduction](#1)
 - [2. Environment](#2)
@@ -77,7 +77,7 @@ python3 tools/export_model.py -c configs/rec/rec_d28_can.yml -o Global.save_infe
 For RobustScanner text recognition model inference, the following commands can be executed:
 
 ```
-python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_hme/hme_01.jpg" --rec_algorithm="CAN" --rec_batch_num=1 --rec_model_dir="./inference/rec_d28_can/" --rec_image_shape="1, 132, 519" --rec_char_dict_path="./ppocr/utils/dict/latex_symbol_dict.txt"
+python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_hme/hme_01.jpg" --rec_algorithm="CAN" --rec_batch_num=1 --rec_model_dir="./inference/rec_d28_can/" --rec_image_shape="1, 100, 100" --rec_char_dict_path="./ppocr/utils/dict/latex_symbol_dict.txt"
 ```
 
 <a name="4-2"></a>

From c57effb84f53d9416482e6fa15d0e2307a62ac3d Mon Sep 17 00:00:00 2001
From: dorren <dorren1998@outlook.com>
Date: Mon, 17 Oct 2022 15:04:42 +0800
Subject: [PATCH 06/20] update can data loading method and tipc configs, revert
 precommit config

---
 .pre-commit-config.yaml                       |   7 +-
 configs/rec/rec_d28_can.yml                   |  30 +++---
 .../crohme_demo}/hme_00.jpg                   | Bin
 .../crohme_demo}/hme_01.jpg                   | Bin
 .../crohme_demo}/hme_02.jpg                   | Bin
 doc/doc_ch/algorithm_rec_can.md               |  32 +++---
 doc/doc_en/algorithm_rec_can_en.md            |  16 +--
 ppocr/data/__init__.py                        |   3 +-
 ppocr/data/collate_fn.py                      |   6 +-
 ppocr/data/hmer_dataset.py                    |  99 ------------------
 ppocr/data/imaug/label_ops.py                 |  31 +++++-
 test_tipc/configs/rec_d28_can/rec_d28_can.yml |  34 +++---
 .../rec_d28_can/train_infer_python.txt        |  10 +-
 test_tipc/prepare.sh                          |   7 ++
 test_tipc/readme.md                           |   1 +
 tools/program.py                              |   2 +-
 16 files changed, 117 insertions(+), 161 deletions(-)
 rename doc/{imgs_hme => datasets/crohme_demo}/hme_00.jpg (100%)
 rename doc/{imgs_hme => datasets/crohme_demo}/hme_01.jpg (100%)
 rename doc/{imgs_hme => datasets/crohme_demo}/hme_02.jpg (100%)
 delete mode 100644 ppocr/data/hmer_dataset.py

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index b6a299ba4..1584bc76a 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,11 +1,10 @@
-repos:
 -   repo: https://github.com/PaddlePaddle/mirrors-yapf.git
-    rev: 0d79c0c469bab64f7229c9aca2b1186ef47f0e37
+    sha: 0d79c0c469bab64f7229c9aca2b1186ef47f0e37
     hooks:
     -   id: yapf
         files: \.py$
 -   repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: a11d9314b22d8f8c7556443875b731ef05965464
+    sha: a11d9314b22d8f8c7556443875b731ef05965464
     hooks:
     -   id: check-merge-conflict
     -   id: check-symlinks
@@ -16,7 +15,7 @@ repos:
     -   id: trailing-whitespace
         files: \.md$
 -   repo: https://github.com/Lucas-C/pre-commit-hooks
-    rev: v1.0.1
+    sha: v1.0.1
     hooks:
     -   id: forbid-crlf
         files: \.md$
diff --git a/configs/rec/rec_d28_can.yml b/configs/rec/rec_d28_can.yml
index aeaccb6b0..9fe936ae1 100644
--- a/configs/rec/rec_d28_can.yml
+++ b/configs/rec/rec_d28_can.yml
@@ -5,14 +5,14 @@ Global:
   print_batch_step: 10
   save_model_dir: ./output/rec/can/
   save_epoch_step: 1
-  # evaluation is run every 1105 iterations
+  # evaluation is run every 1105 iterations (1 epoch)(batch_size = 8)
   eval_batch_step: [0, 1105]
   cal_metric_during_train: True
-  pretrained_model: ./output/rec/can/CAN
-  checkpoints: ./output/rec/can/CAN
-  save_inference_dir: ./inference/rec_d28_can/
+  pretrained_model:
+  checkpoints:
+  save_inference_dir:
   use_visualdl: False
-  infer_img: doc/imgs_hme/hme_01.jpeg
+  infer_img: doc/datasets/crohme_demo/hme_00.jpg
   # for data or label process
   character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt
   max_text_length: 36
@@ -75,7 +75,7 @@ Metric:
 
 Train:
   dataset:
-    name: HMERDataSet
+    name: PGDataSet
     data_dir: ./train_data/CROHME/training/images/
     transforms:
       - DecodeImage:
@@ -83,19 +83,22 @@ Train:
       - GrayImageChannelFormat: 
           normalize: True
           inverse: True
+      - SeqLabelEncode:
+          character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt
+          lower: False
       - KeepKeys:
           keep_keys: ['image', 'label']
-    label_file_list: ["./train_data/CROHME/training/labels.json"]
+    label_file_list: ["./train_data/CROHME/training/labels.txt"]
   loader:
     shuffle: True
-    batch_size_per_card: 2
-    drop_last: True
-    num_workers: 1
+    batch_size_per_card: 8
+    drop_last: False
+    num_workers: 4
     collate_fn: DyMaskCollator
 
 Eval:
   dataset:
-    name: HMERDataSet
+    name: PGDataSet
     data_dir: ./train_data/CROHME/evaluation/images/
     transforms: 
       - DecodeImage:
@@ -103,9 +106,12 @@ Eval:
       - GrayImageChannelFormat:
           normalize: True
           inverse: True
+      - SeqLabelEncode:
+          character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt
+          lower: False
       - KeepKeys:
           keep_keys: ['image', 'label']
-    label_file_list: ["./train_data/CROHME/evaluation/labels.json"]
+    label_file_list: ["./train_data/CROHME/evaluation/labels.txt"]
   loader:
     shuffle: False
     drop_last: False
diff --git a/doc/imgs_hme/hme_00.jpg b/doc/datasets/crohme_demo/hme_00.jpg
similarity index 100%
rename from doc/imgs_hme/hme_00.jpg
rename to doc/datasets/crohme_demo/hme_00.jpg
diff --git a/doc/imgs_hme/hme_01.jpg b/doc/datasets/crohme_demo/hme_01.jpg
similarity index 100%
rename from doc/imgs_hme/hme_01.jpg
rename to doc/datasets/crohme_demo/hme_01.jpg
diff --git a/doc/imgs_hme/hme_02.jpg b/doc/datasets/crohme_demo/hme_02.jpg
similarity index 100%
rename from doc/imgs_hme/hme_02.jpg
rename to doc/datasets/crohme_demo/hme_02.jpg
diff --git a/doc/doc_ch/algorithm_rec_can.md b/doc/doc_ch/algorithm_rec_can.md
index 9585dae0c..8a012b490 100644
--- a/doc/doc_ch/algorithm_rec_can.md
+++ b/doc/doc_ch/algorithm_rec_can.md
@@ -1,4 +1,4 @@
-# 手写数学公式识别算法-ABINet
+# 手写数学公式识别算法-CAN
 
 - [1. 算法简介](#1)
 - [2. 环境配置](#2)
@@ -27,7 +27,7 @@
 
 |模型    |骨干网络|配置文件|ExpRate|下载链接|
 | ----- | ----- | ----- | ----- | ----- |
-|CAN|DenseNet|[rec_d28_can.yml](../../configs/rec/rec_d28_can.yml)|51.72|[训练模型](https://paddleocr.bj.bcebos.com/rec_r45_abinet_train.tar)|
+|CAN|DenseNet|[rec_d28_can.yml](../../configs/rec/rec_d28_can.yml)|51.72|[训练模型](https://paddleocr.bj.bcebos.com/contribution/can_train.tar)|
 
 <a name="2"></a>
 ## 2. 环境配置
@@ -60,16 +60,21 @@ python3 -m paddle.distributed.launch --gpus '0,1,2,3'  tools/train.py -c configs
 python3 tools/train.py -c configs/rec/rec_d28_can.yml
 -o Train.dataset.transforms.GrayImageChannelFormat.inverse=False
 ```
+- 默认每训练1个epoch（1105次iteration）进行1次评估，若您更改训练的batch_size，或更换数据集，请在训练时作出如下修改
+```
+python3 tools/train.py -c configs/rec/rec_d28_can.yml
+-o Global.eval_batch_step=[0, {length_of_dataset//batch_size}]
+```
 
 #
 <a name="3-2"></a>
 ### 3.2 评估
 
-可下载已训练完成的[模型文件](#model)，使用如下命令进行评估：
+可下载已训练完成的[模型文件](https://paddleocr.bj.bcebos.com/contribution/can_train.tar)，使用如下命令进行评估：
 
 ```shell
-# 注意将pretrained_model的路径设置为本地路径。
-python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_d28_can.yml -o Global.pretrained_model=./rec_d28_can_train/best_accuracy
+# 注意将pretrained_model的路径设置为本地路径。若使用自行训练保存的模型，请注意修改路径和文件名为{path/to/weights}/{model_name}。
+python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_d28_can.yml -o Global.pretrained_model=./rec_d28_can_train/CAN
 ```
 
 <a name="3-3"></a>
@@ -78,9 +83,9 @@ python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec
 使用如下命令进行单张图片预测：
 ```shell
 # 注意将pretrained_model的路径设置为本地路径。
-python3 tools/infer_rec.py -c configs/rec/rec_d28_can.yml -o Architecture.Head.attdecoder.is_train=False Global.infer_img='./doc/imgs_hme/hme_01.jpg' Global.pretrained_model=./rec_d28_can_train/best_accuracy
+python3 tools/infer_rec.py -c configs/rec/rec_d28_can.yml -o Architecture.Head.attdecoder.is_train=False Global.infer_img='./doc/datasets/crohme_demo/hme_00.jpg' Global.pretrained_model=./rec_d28_can_train/CAN
 
-# 预测文件夹下所有图像时，可修改infer_img为文件夹，如 Global.infer_img='./doc/imgs_hme/'。
+# 预测文件夹下所有图像时，可修改infer_img为文件夹，如 Global.infer_img='./doc/datasets/crohme_demo/'。
 ```
 
 
@@ -89,17 +94,16 @@ python3 tools/infer_rec.py -c configs/rec/rec_d28_can.yml -o Architecture.Head.a
 
 <a name="4-1"></a>
 ### 4.1 Python推理
-首先将训练得到best模型，转换成inference model。这里以训练完成的模型为例（[模型下载地址](https://paddleocr.bj.bcebos.com/rec_d28_can_train.tar) )，可以使用如下命令进行转换：
+首先将训练得到best模型，转换成inference model。这里以训练完成的模型为例（[模型下载地址](https://paddleocr.bj.bcebos.com/contribution/can_train.tar) )，可以使用如下命令进行转换：
 
 ```shell
 # 注意将pretrained_model的路径设置为本地路径。
-python3 tools/export_model.py -c configs/rec/rec_d28_can.yml -o Global.save_inference_dir=./inference/rec_d28_can/ Architecture.Head.attdecoder.is_train=False
+python3 tools/export_model.py -c configs/rec/rec_d28_can.yml -o Global.pretrained_model=./rec_d28_can_train/CAN Global.save_inference_dir=./inference/rec_d28_can/ Architecture.Head.attdecoder.is_train=False
 
 # 目前的静态图模型默认的输出长度最大为36，如果您需要预测更长的序列，请在导出模型时指定其输出序列为合适的值，例如 Architecture.Head.max_text_length=72
 ```
 **注意：**
 - 如果您是在自己的数据集上训练的模型，并且调整了字典文件，请注意修改配置文件中的`character_dict_path`是否是所需要的字典文件。
-- 如果您修改了训练时的输入大小，请修改`tools/export_model.py`文件中的对应ABINet的`infer_shape`。
 
 转换成功后，在目录下有三个文件：
 ```
@@ -112,18 +116,18 @@ python3 tools/export_model.py -c configs/rec/rec_d28_can.yml -o Global.save_infe
 执行如下命令进行模型推理：
 
 ```shell
-python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_hme/hme_01.jpg" --rec_algorithm="CAN" --rec_batch_num=1 --rec_model_dir="./inference/rec_d28_can/" --rec_char_dict_path="./ppocr/utils/dict/latex_symbol_dict.txt"
+python3 tools/infer/predict_rec.py --image_dir="./doc/datasets/crohme_demo/hme_00.jpg" --rec_algorithm="CAN" --rec_batch_num=1 --rec_model_dir="./inference/rec_d28_can/" --rec_char_dict_path="./ppocr/utils/dict/latex_symbol_dict.txt"
 
-# 预测文件夹下所有图像时，可修改image_dir为文件夹，如 --image_dir='./doc/imgs_hme/'。
+# 预测文件夹下所有图像时，可修改image_dir为文件夹，如 --image_dir='./doc/datasets/crohme_demo/'。
 
 # 如果您需要在白底黑字的图片上进行预测，请设置 --rec_image_inverse=False
 ```
 
-![测试图片样例](../imgs_hme/hme_00.jpg)
+![测试图片样例](../datasets/crohme_demo/hme_00.jpg)
 
 执行命令后，上面图像的预测结果（识别的文本）会打印到屏幕上，示例如下：
 ```shell
-Predicts of ./doc/imgs_hme/hme_03.jpg:['x _ { k } x x _ { k } + y _ { k } y x _ { k }', []]
+Predicts of ./doc/imgs_hme/hme_00.jpg:['x _ { k } x x _ { k } + y _ { k } y x _ { k }', []]
 ```
 
 
diff --git a/doc/doc_en/algorithm_rec_can_en.md b/doc/doc_en/algorithm_rec_can_en.md
index 4d7a64f99..da6c9c609 100644
--- a/doc/doc_en/algorithm_rec_can_en.md
+++ b/doc/doc_en/algorithm_rec_can_en.md
@@ -25,7 +25,7 @@ Using CROHME handwrittem mathematical expression recognition datasets for traini
 
 |Model|Backbone|config|exprate|Download link|
 | --- | --- | --- | --- | --- |
-|CAN|DenseNet|[rec_d28_can.yml](../../configs/rec/rec_d28_can.yml)|51.72|coming soon|
+|CAN|DenseNet|[rec_d28_can.yml](../../configs/rec/rec_d28_can.yml)|51.72|[trained model](https://paddleocr.bj.bcebos.com/contribution/can_train.tar)|
 
 <a name="2"></a>
 ## 2. Environment
@@ -53,14 +53,14 @@ Evaluation:
 
 ```
 # GPU evaluation
-python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_d28_can.yml -o Global.pretrained_model=./rec_d28_can_train/best_accuracy
+python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_d28_can.yml -o Global.pretrained_model=./rec_d28_can_train/CAN
 ```
 
 Prediction:
 
 ```
 # The configuration file used for prediction must match the training
-python3 tools/infer_rec.py -c configs/rec/rec_d28_can.yml -o Architecture.Head.attdecoder.is_train=False Global.infer_img='./doc/imgs_hme/hme_01.jpg' Global.pretrained_model=./rec_d28_can_train/best_accuracy
+python3 tools/infer_rec.py -c configs/rec/rec_d28_can.yml -o Architecture.Head.attdecoder.is_train=False Global.infer_img='./doc/crohme_demo/hme_00.jpg' Global.pretrained_model=./rec_d28_can_train/CAN
 ```
 
 <a name="4"></a>
@@ -68,16 +68,20 @@ python3 tools/infer_rec.py -c configs/rec/rec_d28_can.yml -o Architecture.Head.a
 
 <a name="4-1"></a>
 ### 4.1 Python Inference
-First, the model saved during the RobustScanner text recognition training process is converted into an inference model. you can use the following command to convert:
+First, the model saved during the CAN handwritten mathematical expression recognition training process is converted into an inference model. you can use the following command to convert:
 
 ```
 python3 tools/export_model.py -c configs/rec/rec_d28_can.yml -o Global.save_inference_dir=./inference/rec_d28_can/ Architecture.Head.attdecoder.is_train=False
+
+# The default output max length of the model is 36. If you need to predict a longer sequence, please specify its output sequence as an appropriate value when exporting the model, as: Architecture.Head.max_ text_ length=72
 ```
 
-For RobustScanner text recognition model inference, the following commands can be executed:
+For CAN handwritten mathematical expression recognition model inference, the following commands can be executed:
 
 ```
-python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_hme/hme_01.jpg" --rec_algorithm="CAN" --rec_batch_num=1 --rec_model_dir="./inference/rec_d28_can/" --rec_image_shape="1, 100, 100" --rec_char_dict_path="./ppocr/utils/dict/latex_symbol_dict.txt"
+python3 tools/infer/predict_rec.py --image_dir="./doc/crohme_demo/hme_00.jpg" --rec_algorithm="CAN" --rec_batch_num=1 --rec_model_dir="./inference/rec_d28_can/" --rec_char_dict_path="./ppocr/utils/dict/latex_symbol_dict.txt"
+
+# If you need to predict on a picture with black characters on a white background, please set: -- rec_ image_ inverse=False
 ```
 
 <a name="4-2"></a>
diff --git a/ppocr/data/__init__.py b/ppocr/data/__init__.py
index 1f3de63de..b602a346d 100644
--- a/ppocr/data/__init__.py
+++ b/ppocr/data/__init__.py
@@ -37,7 +37,6 @@ from ppocr.data.simple_dataset import SimpleDataSet
 from ppocr.data.lmdb_dataset import LMDBDataSet, LMDBDataSetSR
 from ppocr.data.pgnet_dataset import PGDataSet
 from ppocr.data.pubtab_dataset import PubTabDataSet
-from ppocr.data.hmer_dataset import HMERDataSet
 
 __all__ = ['build_dataloader', 'transform', 'create_operators']
 
@@ -56,7 +55,7 @@ def build_dataloader(config, mode, device, logger, seed=None):
 
     support_dict = [
         'SimpleDataSet', 'LMDBDataSet', 'PGDataSet', 'PubTabDataSet',
-        'LMDBDataSetSR', 'HMERDataSet'
+        'LMDBDataSetSR'
     ]
     module_name = config[mode]['dataset']['name']
     assert module_name in support_dict, Exception(
diff --git a/ppocr/data/collate_fn.py b/ppocr/data/collate_fn.py
index fec1e895f..067b2158a 100644
--- a/ppocr/data/collate_fn.py
+++ b/ppocr/data/collate_fn.py
@@ -95,8 +95,8 @@ class DyMaskCollator(object):
                 1] > max_height else max_height
             max_width = item[0].shape[2] if item[0].shape[
                 2] > max_width else max_width
-            max_length = item[1].shape[0] if item[1].shape[
-                0] > max_length else max_length
+            max_length = len(item[1]) if len(item[
+                1]) > max_length else max_length
             proper_items.append(item)
 
         images, image_masks = np.zeros(
@@ -111,7 +111,7 @@ class DyMaskCollator(object):
             _, h, w = proper_items[i][0].shape
             images[i][:, :h, :w] = proper_items[i][0]
             image_masks[i][:, :h, :w] = 1
-            l = proper_items[i][1].shape[0]
+            l = len(proper_items[i][1])
             labels[i][:l] = proper_items[i][1]
             label_masks[i][:l] = 1
 
diff --git a/ppocr/data/hmer_dataset.py b/ppocr/data/hmer_dataset.py
deleted file mode 100644
index d5d92f264..000000000
--- a/ppocr/data/hmer_dataset.py
+++ /dev/null
@@ -1,99 +0,0 @@
-# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os, json, random, traceback
-import numpy as np
-
-from PIL import Image
-from paddle.io import Dataset
-
-from .imaug import transform, create_operators
-
-
-class HMERDataSet(Dataset):
-    def __init__(self, config, mode, logger, seed=None):
-        super(HMERDataSet, self).__init__()
-
-        self.logger = logger
-        self.seed = seed
-        self.mode = mode
-
-        global_config = config['Global']
-        dataset_config = config[mode]['dataset']
-        self.data_dir = config[mode]['dataset']['data_dir']
-
-        label_file_list = dataset_config['label_file_list']
-        data_source_num = len(label_file_list)
-        ratio_list = dataset_config.get("ratio_list", [1.0])
-
-        self.data_lines, self.labels = self.get_image_info_list(label_file_list,
-                                                                ratio_list)
-        self.data_idx_order_list = list(range(len(self.data_lines)))
-        if self.mode == "train" and self.do_shuffle:
-            self.shuffle_data_random()
-
-        if isinstance(ratio_list, (float, int)):
-            ratio_list = [float(ratio_list)] * int(data_source_num)
-
-        assert len(
-            ratio_list
-        ) == data_source_num, "The length of ratio_list should be the same as the file_list."
-
-        self.ops = create_operators(dataset_config['transforms'], global_config)
-        self.need_reset = True in [x < 1 for x in ratio_list]
-
-    def get_image_info_list(self, file_list, ratio_list):
-        if isinstance(file_list, str):
-            file_list = [file_list]
-        labels = {}
-        for idx, file in enumerate(file_list):
-            with open(file, "r") as f:
-                lines = json.load(f)
-                labels.update(lines)
-        data_lines = [name for name in labels.keys()]
-        return data_lines, labels
-
-    def shuffle_data_random(self):
-        random.seed(self.seed)
-        random.shuffle(self.data_lines)
-        return
-
-    def __len__(self):
-        return len(self.data_idx_order_list)
-
-    def __getitem__(self, idx):
-        file_idx = self.data_idx_order_list[idx]
-        data_name = self.data_lines[file_idx]
-        try:
-            file_name = data_name + '.jpg'
-            img_path = os.path.join(self.data_dir, file_name)
-            if not os.path.exists(img_path):
-                raise Exception("{} does not exist!".format(img_path))
-            with open(img_path, 'rb') as f:
-                img = f.read()
-
-            label = self.labels.get(data_name).split()
-            label = np.array([int(item) for item in label])
-
-            data = {'image': img, 'label': label}
-            outs = transform(data, self.ops)
-        except:
-            self.logger.error(
-                "When parsing line {}, error happened with msg: {}".format(
-                    file_name, traceback.format_exc()))
-            outs = None
-        if outs is None:
-            # during evaluation, we should fix the idx to get same results for many times of evaluation.
-            rnd_idx = np.random.randint(self.__len__())
-            return self.__getitem__(rnd_idx)
-        return outs
diff --git a/ppocr/data/imaug/label_ops.py b/ppocr/data/imaug/label_ops.py
index 2a2ac2dec..ae916b2ee 100644
--- a/ppocr/data/imaug/label_ops.py
+++ b/ppocr/data/imaug/label_ops.py
@@ -1476,4 +1476,33 @@ class CTLabelEncode(object):
 
         data['polys'] = boxes
         data['texts'] = txts
-        return data
\ No newline at end of file
+        return data
+
+
+class SeqLabelEncode(BaseRecLabelEncode):
+    def __init__(self,
+                 character_dict_path,
+                 max_text_length=100,
+                 use_space_char=False,
+                 lower=True,
+                 **kwargs):
+        super(SeqLabelEncode, self).__init__(
+            max_text_length, character_dict_path, use_space_char, lower)
+
+    def encode(self, text_seq):
+        text_seq_encoded = []
+        for text in text_seq:
+            if text not in self.character:
+                continue
+            text_seq_encoded.append(self.dict.get(text))
+        if len(text_seq_encoded) == 0:
+            return None
+        return text_seq_encoded
+
+    def __call__(self, data):
+        label = data['label']
+        if isinstance(label, str):
+            label = label.strip().split()
+        label.append(self.end_str)
+        data['label'] = self.encode(label)
+        return data
diff --git a/test_tipc/configs/rec_d28_can/rec_d28_can.yml b/test_tipc/configs/rec_d28_can/rec_d28_can.yml
index aeaccb6b0..ac7b07712 100644
--- a/test_tipc/configs/rec_d28_can/rec_d28_can.yml
+++ b/test_tipc/configs/rec_d28_can/rec_d28_can.yml
@@ -5,14 +5,14 @@ Global:
   print_batch_step: 10
   save_model_dir: ./output/rec/can/
   save_epoch_step: 1
-  # evaluation is run every 1105 iterations
+  # evaluation is run every 1105 iterations (1 epoch)(batch_size = 8)
   eval_batch_step: [0, 1105]
   cal_metric_during_train: True
-  pretrained_model: ./output/rec/can/CAN
-  checkpoints: ./output/rec/can/CAN
-  save_inference_dir: ./inference/rec_d28_can/
+  pretrained_model:
+  checkpoints:
+  save_inference_dir:
   use_visualdl: False
-  infer_img: doc/imgs_hme/hme_01.jpeg
+  infer_img: doc/datasets/crohme_demo/hme_00.jpg
   # for data or label process
   character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt
   max_text_length: 36
@@ -75,37 +75,43 @@ Metric:
 
 Train:
   dataset:
-    name: HMERDataSet
-    data_dir: ./train_data/CROHME/training/images/
+    name: PGDataSet
+    data_dir: ./train_data/CROHME_lite/training/images/
     transforms:
       - DecodeImage:
           channel_first: False
       - GrayImageChannelFormat: 
           normalize: True
           inverse: True
+      - SeqLabelEncode:
+          character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt
+          lower: False
       - KeepKeys:
           keep_keys: ['image', 'label']
-    label_file_list: ["./train_data/CROHME/training/labels.json"]
+    label_file_list: ["./train_data/CROHME_lite/training/labels.txt"]
   loader:
     shuffle: True
-    batch_size_per_card: 2
-    drop_last: True
-    num_workers: 1
+    batch_size_per_card: 8
+    drop_last: False
+    num_workers: 4
     collate_fn: DyMaskCollator
 
 Eval:
   dataset:
-    name: HMERDataSet
-    data_dir: ./train_data/CROHME/evaluation/images/
+    name: PGDataSet
+    data_dir: ./train_data/CROHME_lite/evaluation/images/
     transforms: 
       - DecodeImage:
           channel_first: False
       - GrayImageChannelFormat:
           normalize: True
           inverse: True
+      - SeqLabelEncode:
+          character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt
+          lower: False
       - KeepKeys:
           keep_keys: ['image', 'label']
-    label_file_list: ["./train_data/CROHME/evaluation/labels.json"]
+    label_file_list: ["./train_data/CROHME_lite/evaluation/labels.txt"]
   loader:
     shuffle: False
     drop_last: False
diff --git a/test_tipc/configs/rec_d28_can/train_infer_python.txt b/test_tipc/configs/rec_d28_can/train_infer_python.txt
index be50c5980..731d327cd 100644
--- a/test_tipc/configs/rec_d28_can/train_infer_python.txt
+++ b/test_tipc/configs/rec_d28_can/train_infer_python.txt
@@ -1,6 +1,6 @@
 ===========================train_params===========================
 model_name:rec_d28_can
-python:python
+python:python3.7
 gpu_list:0|0,1
 Global.use_gpu:True|True
 Global.auto_cast:null
@@ -9,7 +9,7 @@ Global.save_model_dir:./output/
 Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=8
 Global.pretrained_model:null
 train_model_name:latest
-train_infer_img_dir:./doc/imgs_hme
+train_infer_img_dir:./doc/datasets/crohme_demo
 null:null
 ##
 trainer:norm_train
@@ -37,15 +37,15 @@ export2:null
 train_model:./inference/rec_d28_can_train/best_accuracy
 infer_export:tools/export_model.py -c test_tipc/configs/rec_d28_can/rec_d28_can.yml -o
 infer_quant:False
-inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/dict/latex_symbol_dict.txt --rec_image_shape="1,100,100" --rec_algorithm="CAN"
+inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/dict/latex_symbol_dict.txt --rec_algorithm="CAN"
 --use_gpu:True|False
 --enable_mkldnn:False
 --cpu_threads:6
 --rec_batch_num:1
 --use_tensorrt:False
 --precision:fp32
---rec_model_dir:./output/
---image_dir:./doc/imgs_hme
+--rec_model_dir:
+--image_dir:./doc/datasets/crohme_demo
 --save_log_path:./test/output/
 --benchmark:True
 null:null
diff --git a/test_tipc/prepare.sh b/test_tipc/prepare.sh
index 5ca426e28..4aab17019 100644
--- a/test_tipc/prepare.sh
+++ b/test_tipc/prepare.sh
@@ -257,6 +257,13 @@ if [ ${MODE} = "lite_train_lite_infer" ];then
         wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/rec_r32_gaspin_bilstm_att_train.tar --no-check-certificate
         cd ./pretrain_models/ && tar xf rec_r32_gaspin_bilstm_att_train.tar && cd ../
     fi
+    if [ ${model_name} == "rec_d28_can" ]; then
+        wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/contribution/can_train.tar --no-check-certificate
+        cd ./pretrain_models/ && tar xf can_train.tar && cd ../
+        wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dataset/CROHME_lite.tar --no-check-certificate
+        cd ./train_data/ && tar xf CROHME_lite.tar && cd ../
+
+    fi
     if [ ${model_name} == "layoutxlm_ser" ]; then
         ${python_name} -m pip install -r ppstructure/kie/requirements.txt
         ${python_name} -m pip install opencv-python -U
diff --git a/test_tipc/readme.md b/test_tipc/readme.md
index 1442ee1c8..9f02c2e30 100644
--- a/test_tipc/readme.md
+++ b/test_tipc/readme.md
@@ -44,6 +44,7 @@
 | SAST   |det_r50_vd_sast_totaltext_v2.0 | 检测  | 支持 | 多机多卡 <br> 混合精度 | - | - |
 | Rosetta|rec_mv3_none_none_ctc_v2.0     | 识别  | 支持 | 多机多卡 <br> 混合精度 | - | - |
 | Rosetta|rec_r34_vd_none_none_ctc_v2.0  | 识别  | 支持 | 多机多卡 <br> 混合精度 | - | - |
+| CAN    |rec_d28_can                    | 识别  | 支持 | 多机多卡 <br> 混合精度 | - | - |
 | CRNN   |rec_mv3_none_bilstm_ctc_v2.0   | 识别  | 支持 | 多机多卡 <br> 混合精度 | - | - |
 | CRNN   |rec_r34_vd_none_bilstm_ctc_v2.0| 识别  | 支持 | 多机多卡 <br> 混合精度 | - | - |
 | StarNet|rec_mv3_tps_bilstm_ctc_v2.0    | 识别  | 支持 | 多机多卡 <br> 混合精度 | - | - |
diff --git a/tools/program.py b/tools/program.py
index c491247a6..a0594e950 100755
--- a/tools/program.py
+++ b/tools/program.py
@@ -544,7 +544,7 @@ def eval(model,
             elif model_type in ['sr']:
                 eval_class(preds, batch_numpy)
             elif model_type in ['can']:
-                eval_class(preds[0], batch_numpy[2:], epoch_reset=False)
+                eval_class(preds[0], batch_numpy[2:], epoch_reset=(idx == 0))
             else:
                 post_result = post_process_class(preds, batch_numpy[1])
                 eval_class(post_result, batch_numpy)

From 9606bec16a7d7b8a1abcfa113a69d40f837b4cc5 Mon Sep 17 00:00:00 2001
From: andyjpaddle <jiangkaitao@baidu.com>
Date: Mon, 17 Oct 2022 07:41:36 +0000
Subject: [PATCH 07/20] fix visionlan default dict

---
 doc/doc_ch/algorithm_rec_visionlan.md    |  2 +-
 doc/doc_en/algorithm_rec_visionlan_en.md |  2 +-
 ppocr/data/imaug/label_ops.py            |  9 +++++++--
 ppocr/postprocess/rec_postprocess.py     | 10 ++++++++--
 4 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/doc/doc_ch/algorithm_rec_visionlan.md b/doc/doc_ch/algorithm_rec_visionlan.md
index df039491d..84b5ef682 100644
--- a/doc/doc_ch/algorithm_rec_visionlan.md
+++ b/doc/doc_ch/algorithm_rec_visionlan.md
@@ -139,7 +139,7 @@ Predicts of ./doc/imgs_words/en/word_2.png:('yourself', 0.9999493)
 ## 5. FAQ
 
 1. MJSynth和SynthText两种数据集来自于[VisionLAN源repo](https://github.com/wangyuxin87/VisionLAN) 。
-2. 我们使用VisionLAN作者提供的预训练模型进行finetune训练。
+2. 我们使用VisionLAN作者提供的预训练模型进行finetune训练，预训练模型配套字典为'ppocr/utils/ic15_dict.txt'。
 
 ## 引用
 
diff --git a/doc/doc_en/algorithm_rec_visionlan_en.md b/doc/doc_en/algorithm_rec_visionlan_en.md
index 70c2ccc47..cf2293b3d 100644
--- a/doc/doc_en/algorithm_rec_visionlan_en.md
+++ b/doc/doc_en/algorithm_rec_visionlan_en.md
@@ -120,7 +120,7 @@ Not supported
 ## 5. FAQ
 
 1. Note that the MJSynth and SynthText datasets come from [VisionLAN repo](https://github.com/wangyuxin87/VisionLAN).
-2. We use the pre-trained model provided by the VisionLAN authors for finetune training.
+2. We use the pre-trained model provided by the VisionLAN authors for finetune training. The dictionary for the pre-trained model is 'ppocr/utils/ic15_dict.txt'.
 
 ## Citation
 
diff --git a/ppocr/data/imaug/label_ops.py b/ppocr/data/imaug/label_ops.py
index 2a2ac2dec..511471c76 100644
--- a/ppocr/data/imaug/label_ops.py
+++ b/ppocr/data/imaug/label_ops.py
@@ -107,6 +107,7 @@ class BaseRecLabelEncode(object):
         self.beg_str = "sos"
         self.end_str = "eos"
         self.lower = lower
+        self.use_default_dict = False
 
         if character_dict_path is None:
             logger = get_logger()
@@ -116,8 +117,11 @@ class BaseRecLabelEncode(object):
             self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
             dict_character = list(self.character_str)
             self.lower = True
+            self.use_default_dict = True
         else:
             self.character_str = []
+            if 'ppocr/utils/ic15_dict.txt' in character_dict_path:
+                self.use_default_dict = True
             with open(character_dict_path, "rb") as fin:
                 lines = fin.readlines()
                 for line in lines:
@@ -1400,8 +1404,9 @@ class VLLabelEncode(BaseRecLabelEncode):
                  **kwargs):
         super(VLLabelEncode, self).__init__(
             max_text_length, character_dict_path, use_space_char, lower)
-        self.character = self.character[10:] + self.character[
-            1:10] + [self.character[0]]
+        if self.use_default_dict:
+            self.character = self.character[10:] + self.character[
+                1:10] + [self.character[0]]
         self.dict = {}
         for i, char in enumerate(self.character):
             self.dict[char] = i
diff --git a/ppocr/postprocess/rec_postprocess.py b/ppocr/postprocess/rec_postprocess.py
index 59b5254e4..98753ef7a 100644
--- a/ppocr/postprocess/rec_postprocess.py
+++ b/ppocr/postprocess/rec_postprocess.py
@@ -26,10 +26,15 @@ class BaseRecLabelDecode(object):
         self.end_str = "eos"
         self.reverse = False
         self.character_str = []
+        self.use_default_dict = False
+
         if character_dict_path is None:
             self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
             dict_character = list(self.character_str)
+            self.use_default_dict = True
         else:
+            if 'ppocr/utils/ic15_dict.txt' in character_dict_path:
+                self.use_default_dict = True
             with open(character_dict_path, "rb") as fin:
                 lines = fin.readlines()
                 for line in lines:
@@ -805,8 +810,9 @@ class VLLabelDecode(BaseRecLabelDecode):
         super(VLLabelDecode, self).__init__(character_dict_path, use_space_char)
         self.max_text_length = kwargs.get('max_text_length', 25)
         self.nclass = len(self.character) + 1
-        self.character = self.character[10:] + self.character[
-            1:10] + [self.character[0]]
+        if self.use_default_dict:
+            self.character = self.character[10:] + self.character[
+                1:10] + [self.character[0]]
 
     def decode(self, text_index, text_prob=None, is_remove_duplicate=False):
         """ convert text-index into text-label. """

From f875556541cf4374287257a3864fcf4fb9d6bcac Mon Sep 17 00:00:00 2001
From: dorren <dorren1998@outlook.com>
Date: Mon, 17 Oct 2022 17:18:14 +0800
Subject: [PATCH 08/20] update can transform method and add copyright info for
 new file

---
 configs/rec/rec_d28_can.yml                   | 23 +++++++------
 ppocr/data/imaug/__init__.py                  |  2 +-
 ppocr/data/imaug/label_ops.py                 |  4 +--
 ppocr/data/imaug/operators.py                 | 24 ++++++++++++++
 ppocr/data/imaug/rec_img_aug.py               | 30 -----------------
 ppocr/losses/rec_can_loss.py                  | 18 +++++++++++
 ppocr/modeling/backbones/rec_densenet.py      | 32 +++++++++++--------
 ppocr/modeling/heads/rec_can_head.py          | 27 +++++++++++++++-
 ppocr/postprocess/__init__.py                 |  4 +--
 ppocr/postprocess/rec_postprocess.py          |  4 +--
 test_tipc/configs/rec_d28_can/rec_d28_can.yml | 31 ++++++++++--------
 .../rec_d28_can/train_infer_python.txt        |  4 +--
 test_tipc/prepare.sh                          |  1 -
 tools/infer/predict_rec.py                    |  2 +-
 14 files changed, 129 insertions(+), 77 deletions(-)

diff --git a/configs/rec/rec_d28_can.yml b/configs/rec/rec_d28_can.yml
index 9fe936ae1..2149100da 100644
--- a/configs/rec/rec_d28_can.yml
+++ b/configs/rec/rec_d28_can.yml
@@ -42,7 +42,6 @@ Architecture:
     bottleneck: True
     use_dropout: True
     input_channel: 1 
-    
   Head:
     name: CANHead
     in_channel: 684
@@ -66,8 +65,8 @@ Loss:
   name: CANLoss
 
 PostProcess:
-  name: SeqLabelDecode
-  character: 111
+  name: CANLabelDecode
+  character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt
 
 Metric:
   name: CANMetric
@@ -75,15 +74,18 @@ Metric:
 
 Train:
   dataset:
-    name: PGDataSet
+    name: SimpleDataSet
     data_dir: ./train_data/CROHME/training/images/
     transforms:
       - DecodeImage:
           channel_first: False
+      - NormalizeImage:
+          mean: [0,0,0]
+          std: [1,1,1]
+          order: 'hwc'
       - GrayImageChannelFormat: 
-          normalize: True
           inverse: True
-      - SeqLabelEncode:
+      - CANLabelEncode:
           character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt
           lower: False
       - KeepKeys:
@@ -98,15 +100,18 @@ Train:
 
 Eval:
   dataset:
-    name: PGDataSet
+    name: SimpleDataSet
     data_dir: ./train_data/CROHME/evaluation/images/
     transforms: 
       - DecodeImage:
           channel_first: False
+      - NormalizeImage:
+          mean: [0,0,0]
+          std: [1,1,1]
+          order: 'hwc'
       - GrayImageChannelFormat:
-          normalize: True
           inverse: True
-      - SeqLabelEncode:
+      - CANLabelEncode:
           character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt
           lower: False
       - KeepKeys:
diff --git a/ppocr/data/imaug/__init__.py b/ppocr/data/imaug/__init__.py
index a64092286..93d97446d 100644
--- a/ppocr/data/imaug/__init__.py
+++ b/ppocr/data/imaug/__init__.py
@@ -27,7 +27,7 @@ from .make_pse_gt import MakePseGt
 from .rec_img_aug import BaseDataAugmentation, RecAug, RecConAug, RecResizeImg, ClsResizeImg, \
     SRNRecResizeImg, GrayRecResizeImg, SARRecResizeImg, PRENResizeImg, \
     ABINetRecResizeImg, SVTRRecResizeImg, ABINetRecAug, VLRecResizeImg, SPINRecResizeImg, RobustScannerRecResizeImg, \
-    RFLRecResizeImg, GrayImageChannelFormat
+    RFLRecResizeImg
 from .ssl_img_aug import SSLRotateResize
 from .randaugment import RandAugment
 from .copy_paste import CopyPaste
diff --git a/ppocr/data/imaug/label_ops.py b/ppocr/data/imaug/label_ops.py
index ae916b2ee..e1389639b 100644
--- a/ppocr/data/imaug/label_ops.py
+++ b/ppocr/data/imaug/label_ops.py
@@ -1479,14 +1479,14 @@ class CTLabelEncode(object):
         return data
 
 
-class SeqLabelEncode(BaseRecLabelEncode):
+class CANLabelEncode(BaseRecLabelEncode):
     def __init__(self,
                  character_dict_path,
                  max_text_length=100,
                  use_space_char=False,
                  lower=True,
                  **kwargs):
-        super(SeqLabelEncode, self).__init__(
+        super(CANLabelEncode, self).__init__(
             max_text_length, character_dict_path, use_space_char, lower)
 
     def encode(self, text_seq):
diff --git a/ppocr/data/imaug/operators.py b/ppocr/data/imaug/operators.py
index 5e84b1aac..4ff2d29ed 100644
--- a/ppocr/data/imaug/operators.py
+++ b/ppocr/data/imaug/operators.py
@@ -498,3 +498,27 @@ class ResizeNormalize(object):
         img_numpy = np.array(img).astype("float32")
         img_numpy = img_numpy.transpose((2, 0, 1)) / 255
         return img_numpy
+
+
+class GrayImageChannelFormat(object):
+    """
+    format gray scale image's channel: (3,h,w) -> (1,h,w)
+    Args:
+        inverse: inverse gray image 
+    """
+
+    def __init__(self, inverse=False, **kwargs):
+        self.inverse = inverse
+
+    def __call__(self, data):
+        img = data['image']
+        img_single_channel = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        img_expanded = np.expand_dims(img_single_channel, 0)
+
+        if self.inverse:
+            data['image'] = np.abs(img_expanded - 1)
+        else:
+            data['image'] = img_expanded
+
+        data['src_image'] = img
+        return data
\ No newline at end of file
diff --git a/ppocr/data/imaug/rec_img_aug.py b/ppocr/data/imaug/rec_img_aug.py
index bc7fbc604..e22153bde 100644
--- a/ppocr/data/imaug/rec_img_aug.py
+++ b/ppocr/data/imaug/rec_img_aug.py
@@ -465,36 +465,6 @@ class RobustScannerRecResizeImg(object):
         return data
 
 
-class GrayImageChannelFormat(object):
-    """
-    format gray scale image's channel: (3,h,w) -> (1,h,w)
-    Args:
-        normalize: True/False 
-            when True convert image dynamic range [0,255]->[0,1]
-        inverse: inverse gray image 
-    """
-
-    def __init__(self, normalize=True, inverse=False, **kwargs):
-        self.normalize = normalize
-        self.inverse = inverse
-
-    def __call__(self, data):
-        img = data['image']
-        img_single_channel = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-        img_single_channel = np.expand_dims(img_single_channel, 0)
-
-        if self.normalize:
-            img_single_channel = img_single_channel / 255.0
-
-        if self.inverse:
-            data['image'] = np.abs(img_single_channel - 1).astype('float32')
-        else:
-            data['image'] = img_single_channel.astype('float32')
-
-        data['src_image'] = img
-        return data
-
-
 def resize_norm_img_sar(img, image_shape, width_downsample_ratio=0.25):
     imgC, imgH, imgW_min, imgW_max = image_shape
     h = img.shape[0]
diff --git a/ppocr/losses/rec_can_loss.py b/ppocr/losses/rec_can_loss.py
index a6c655e0e..227e17f5e 100644
--- a/ppocr/losses/rec_can_loss.py
+++ b/ppocr/losses/rec_can_loss.py
@@ -1,3 +1,21 @@
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is refer from:
+https://github.com/LBH1024/CAN/models/can.py
+"""
+
 import paddle
 import paddle.nn as nn
 import numpy as np
diff --git a/ppocr/modeling/backbones/rec_densenet.py b/ppocr/modeling/backbones/rec_densenet.py
index d3391d408..b9fab765b 100644
--- a/ppocr/modeling/backbones/rec_densenet.py
+++ b/ppocr/modeling/backbones/rec_densenet.py
@@ -1,3 +1,21 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
 import math
 import paddle
 import paddle.nn as nn
@@ -5,14 +23,6 @@ import paddle.nn.functional as F
 
 
 class Bottleneck(nn.Layer):
-    '''
-        ratio: 16
-        growthRate: 24
-        reduction: 0.5
-        bottleneck: True
-        use_dropout: True
-    '''
-
     def __init__(self, nChannels, growthRate, use_dropout):
         super(Bottleneck, self).__init__()
         interChannels = 4 * growthRate
@@ -78,11 +88,7 @@ class DenseNet(nn.Layer):
     def __init__(self, growthRate, reduction, bottleneck, use_dropout,
                  input_channel, **kwargs):
         super(DenseNet, self).__init__()
-        '''
-          ratio: 16
-          growthRate: 24
-          reduction: 0.5
-        '''
+
         nDenseBlocks = 16
         nChannels = 2 * growthRate
 
diff --git a/ppocr/modeling/heads/rec_can_head.py b/ppocr/modeling/heads/rec_can_head.py
index afd78ee9d..732dbfe2d 100644
--- a/ppocr/modeling/heads/rec_can_head.py
+++ b/ppocr/modeling/heads/rec_can_head.py
@@ -1,4 +1,29 @@
-from turtle import forward
+# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is refer from:
+https://github.com/LBH1024/CAN/models/can.py
+https://github.com/LBH1024/CAN/models/counting.py
+https://github.com/LBH1024/CAN/models/decoder.py
+https://github.com/LBH1024/CAN/models/attention.py
+
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
 import paddle.nn as nn
 import paddle
 import math
diff --git a/ppocr/postprocess/__init__.py b/ppocr/postprocess/__init__.py
index e86a7ea70..36a3152f2 100644
--- a/ppocr/postprocess/__init__.py
+++ b/ppocr/postprocess/__init__.py
@@ -37,7 +37,7 @@ from .table_postprocess import TableMasterLabelDecode, TableLabelDecode
 from .picodet_postprocess import PicoDetPostProcess
 from .ct_postprocess import CTPostProcess
 from .drrg_postprocess import DRRGPostprocess
-from .rec_postprocess import SeqLabelDecode
+from .rec_postprocess import CANLabelDecode
 
 
 def build_post_process(config, global_config=None):
@@ -52,7 +52,7 @@ def build_post_process(config, global_config=None):
         'TableMasterLabelDecode', 'SPINLabelDecode',
         'DistillationSerPostProcess', 'DistillationRePostProcess',
         'VLLabelDecode', 'PicoDetPostProcess', 'CTPostProcess',
-        'RFLLabelDecode', 'DRRGPostprocess', 'SeqLabelDecode'
+        'RFLLabelDecode', 'DRRGPostprocess', 'CANLabelDecode'
     ]
 
     if config['name'] == 'PSEPostProcess':
diff --git a/ppocr/postprocess/rec_postprocess.py b/ppocr/postprocess/rec_postprocess.py
index 4d88c278e..0664ac6d9 100644
--- a/ppocr/postprocess/rec_postprocess.py
+++ b/ppocr/postprocess/rec_postprocess.py
@@ -899,12 +899,12 @@ class VLLabelDecode(BaseRecLabelDecode):
         return text, label
 
 
-class SeqLabelDecode(BaseRecLabelDecode):
+class CANLabelDecode(BaseRecLabelDecode):
     """ Convert between latex-symbol and symbol-index """
 
     def __init__(self, character_dict_path=None, use_space_char=False,
                  **kwargs):
-        super(SeqLabelDecode, self).__init__(character_dict_path,
+        super(CANLabelDecode, self).__init__(character_dict_path,
                                              use_space_char)
 
     def decode(self, text_index, preds_prob=None):
diff --git a/test_tipc/configs/rec_d28_can/rec_d28_can.yml b/test_tipc/configs/rec_d28_can/rec_d28_can.yml
index ac7b07712..2149100da 100644
--- a/test_tipc/configs/rec_d28_can/rec_d28_can.yml
+++ b/test_tipc/configs/rec_d28_can/rec_d28_can.yml
@@ -42,7 +42,6 @@ Architecture:
     bottleneck: True
     use_dropout: True
     input_channel: 1 
-    
   Head:
     name: CANHead
     in_channel: 684
@@ -66,8 +65,8 @@ Loss:
   name: CANLoss
 
 PostProcess:
-  name: SeqLabelDecode
-  character: 111
+  name: CANLabelDecode
+  character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt
 
 Metric:
   name: CANMetric
@@ -75,20 +74,23 @@ Metric:
 
 Train:
   dataset:
-    name: PGDataSet
-    data_dir: ./train_data/CROHME_lite/training/images/
+    name: SimpleDataSet
+    data_dir: ./train_data/CROHME/training/images/
     transforms:
       - DecodeImage:
           channel_first: False
+      - NormalizeImage:
+          mean: [0,0,0]
+          std: [1,1,1]
+          order: 'hwc'
       - GrayImageChannelFormat: 
-          normalize: True
           inverse: True
-      - SeqLabelEncode:
+      - CANLabelEncode:
           character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt
           lower: False
       - KeepKeys:
           keep_keys: ['image', 'label']
-    label_file_list: ["./train_data/CROHME_lite/training/labels.txt"]
+    label_file_list: ["./train_data/CROHME/training/labels.txt"]
   loader:
     shuffle: True
     batch_size_per_card: 8
@@ -98,20 +100,23 @@ Train:
 
 Eval:
   dataset:
-    name: PGDataSet
-    data_dir: ./train_data/CROHME_lite/evaluation/images/
+    name: SimpleDataSet
+    data_dir: ./train_data/CROHME/evaluation/images/
     transforms: 
       - DecodeImage:
           channel_first: False
+      - NormalizeImage:
+          mean: [0,0,0]
+          std: [1,1,1]
+          order: 'hwc'
       - GrayImageChannelFormat:
-          normalize: True
           inverse: True
-      - SeqLabelEncode:
+      - CANLabelEncode:
           character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt
           lower: False
       - KeepKeys:
           keep_keys: ['image', 'label']
-    label_file_list: ["./train_data/CROHME_lite/evaluation/labels.txt"]
+    label_file_list: ["./train_data/CROHME/evaluation/labels.txt"]
   loader:
     shuffle: False
     drop_last: False
diff --git a/test_tipc/configs/rec_d28_can/train_infer_python.txt b/test_tipc/configs/rec_d28_can/train_infer_python.txt
index 731d327cd..1794e78cf 100644
--- a/test_tipc/configs/rec_d28_can/train_infer_python.txt
+++ b/test_tipc/configs/rec_d28_can/train_infer_python.txt
@@ -1,7 +1,7 @@
 ===========================train_params===========================
 model_name:rec_d28_can
-python:python3.7
-gpu_list:0|0,1
+python:python
+gpu_list:0|0
 Global.use_gpu:True|True
 Global.auto_cast:null
 Global.epoch_num:lite_train_lite_infer=2|whole_train_whole_infer=240
diff --git a/test_tipc/prepare.sh b/test_tipc/prepare.sh
index 4aab17019..dc0d2fdb0 100644
--- a/test_tipc/prepare.sh
+++ b/test_tipc/prepare.sh
@@ -262,7 +262,6 @@ if [ ${MODE} = "lite_train_lite_infer" ];then
         cd ./pretrain_models/ && tar xf can_train.tar && cd ../
         wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dataset/CROHME_lite.tar --no-check-certificate
         cd ./train_data/ && tar xf CROHME_lite.tar && cd ../
-
     fi
     if [ ${model_name} == "layoutxlm_ser" ]; then
         ${python_name} -m pip install -r ppstructure/kie/requirements.txt
diff --git a/tools/infer/predict_rec.py b/tools/infer/predict_rec.py
index c1604798e..b3ef557c0 100755
--- a/tools/infer/predict_rec.py
+++ b/tools/infer/predict_rec.py
@@ -111,7 +111,7 @@ class TextRecognizer(object):
         elif self.rec_algorithm == "CAN":
             self.inverse = args.rec_image_inverse
             postprocess_params = {
-                'name': 'SeqLabelDecode',
+                'name': 'CANLabelDecode',
                 "character_dict_path": args.rec_char_dict_path,
                 "use_space_char": args.use_space_char
             }

From c44f3bc78ce777c8efc8ef10a344d434bbd73338 Mon Sep 17 00:00:00 2001
From: WenmuZhou <572459439@qq.com>
Date: Mon, 17 Oct 2022 09:51:32 +0000
Subject: [PATCH 09/20] add pse tipc

---
 test_tipc/prepare.sh | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/test_tipc/prepare.sh b/test_tipc/prepare.sh
index 5ca426e28..62cda1d8a 100644
--- a/test_tipc/prepare.sh
+++ b/test_tipc/prepare.sh
@@ -241,6 +241,9 @@ if [ ${MODE} = "lite_train_lite_infer" ];then
     if [ ${model_name} == "ch_ppocr_mobile_v2_0_det_FPGM" ]; then
         ${python_name} -m pip install paddleslim
     fi
+    if [ ${model_name} == "det_r50_vd_pse_v2_0" ]; then
+        wget -nc -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vd_ssld_pretrained.pdparams --no-check-certificate
+    fi
     if [ ${model_name} == "det_mv3_east_v2_0" ]; then
         wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_east_v2.0_train.tar --no-check-certificate
         cd ./pretrain_models/ && tar xf det_mv3_east_v2.0_train.tar && cd ../

From e410d2e1c902c747200e0231377c7daed54a8db7 Mon Sep 17 00:00:00 2001
From: dorren <dorren1998@outlook.com>
Date: Mon, 17 Oct 2022 19:16:02 +0800
Subject: [PATCH 10/20] update tipc config

---
 test_tipc/configs/rec_d28_can/rec_d28_can.yml        | 8 ++++----
 test_tipc/configs/rec_d28_can/train_infer_python.txt | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/test_tipc/configs/rec_d28_can/rec_d28_can.yml b/test_tipc/configs/rec_d28_can/rec_d28_can.yml
index 2149100da..92917c20d 100644
--- a/test_tipc/configs/rec_d28_can/rec_d28_can.yml
+++ b/test_tipc/configs/rec_d28_can/rec_d28_can.yml
@@ -75,7 +75,7 @@ Metric:
 Train:
   dataset:
     name: SimpleDataSet
-    data_dir: ./train_data/CROHME/training/images/
+    data_dir: ./train_data/CROHME_lite/training/images/
     transforms:
       - DecodeImage:
           channel_first: False
@@ -90,7 +90,7 @@ Train:
           lower: False
       - KeepKeys:
           keep_keys: ['image', 'label']
-    label_file_list: ["./train_data/CROHME/training/labels.txt"]
+    label_file_list: ["./train_data/CROHME_lite/training/labels.txt"]
   loader:
     shuffle: True
     batch_size_per_card: 8
@@ -101,7 +101,7 @@ Train:
 Eval:
   dataset:
     name: SimpleDataSet
-    data_dir: ./train_data/CROHME/evaluation/images/
+    data_dir: ./train_data/CROHME_lite/evaluation/images/
     transforms: 
       - DecodeImage:
           channel_first: False
@@ -116,7 +116,7 @@ Eval:
           lower: False
       - KeepKeys:
           keep_keys: ['image', 'label']
-    label_file_list: ["./train_data/CROHME/evaluation/labels.txt"]
+    label_file_list: ["./train_data/CROHME_lite/evaluation/labels.txt"]
   loader:
     shuffle: False
     drop_last: False
diff --git a/test_tipc/configs/rec_d28_can/train_infer_python.txt b/test_tipc/configs/rec_d28_can/train_infer_python.txt
index 1794e78cf..731d327cd 100644
--- a/test_tipc/configs/rec_d28_can/train_infer_python.txt
+++ b/test_tipc/configs/rec_d28_can/train_infer_python.txt
@@ -1,7 +1,7 @@
 ===========================train_params===========================
 model_name:rec_d28_can
-python:python
-gpu_list:0|0
+python:python3.7
+gpu_list:0|0,1
 Global.use_gpu:True|True
 Global.auto_cast:null
 Global.epoch_num:lite_train_lite_infer=2|whole_train_whole_infer=240

From ea557c50eef30dc2f8f5a76d9920877ed4e159c8 Mon Sep 17 00:00:00 2001
From: dorren <dorren1998@outlook.com>
Date: Mon, 17 Oct 2022 19:58:35 +0800
Subject: [PATCH 11/20] update config

---
 configs/rec/rec_d28_can.yml                   | 7 ++-----
 ppocr/modeling/backbones/rec_densenet.py      | 5 +++++
 test_tipc/configs/rec_d28_can/rec_d28_can.yml | 7 ++-----
 3 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/configs/rec/rec_d28_can.yml b/configs/rec/rec_d28_can.yml
index 2149100da..7c3b0fd3d 100644
--- a/configs/rec/rec_d28_can.yml
+++ b/configs/rec/rec_d28_can.yml
@@ -66,7 +66,6 @@ Loss:
 
 PostProcess:
   name: CANLabelDecode
-  character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt
 
 Metric:
   name: CANMetric
@@ -76,6 +75,7 @@ Train:
   dataset:
     name: SimpleDataSet
     data_dir: ./train_data/CROHME/training/images/
+    label_file_list: ["./train_data/CROHME/training/labels.txt"]
     transforms:
       - DecodeImage:
           channel_first: False
@@ -86,11 +86,9 @@ Train:
       - GrayImageChannelFormat: 
           inverse: True
       - CANLabelEncode:
-          character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt
           lower: False
       - KeepKeys:
           keep_keys: ['image', 'label']
-    label_file_list: ["./train_data/CROHME/training/labels.txt"]
   loader:
     shuffle: True
     batch_size_per_card: 8
@@ -102,6 +100,7 @@ Eval:
   dataset:
     name: SimpleDataSet
     data_dir: ./train_data/CROHME/evaluation/images/
+    label_file_list: ["./train_data/CROHME/evaluation/labels.txt"]
     transforms: 
       - DecodeImage:
           channel_first: False
@@ -112,11 +111,9 @@ Eval:
       - GrayImageChannelFormat:
           inverse: True
       - CANLabelEncode:
-          character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt
           lower: False
       - KeepKeys:
           keep_keys: ['image', 'label']
-    label_file_list: ["./train_data/CROHME/evaluation/labels.txt"]
   loader:
     shuffle: False
     drop_last: False
diff --git a/ppocr/modeling/backbones/rec_densenet.py b/ppocr/modeling/backbones/rec_densenet.py
index b9fab765b..65c5fa4f2 100644
--- a/ppocr/modeling/backbones/rec_densenet.py
+++ b/ppocr/modeling/backbones/rec_densenet.py
@@ -11,6 +11,11 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""
+This code is refer from:
+https://github.com/LBH1024/CAN/models/densenet.py
+
+"""
 
 from __future__ import absolute_import
 from __future__ import division
diff --git a/test_tipc/configs/rec_d28_can/rec_d28_can.yml b/test_tipc/configs/rec_d28_can/rec_d28_can.yml
index 92917c20d..550186586 100644
--- a/test_tipc/configs/rec_d28_can/rec_d28_can.yml
+++ b/test_tipc/configs/rec_d28_can/rec_d28_can.yml
@@ -66,7 +66,6 @@ Loss:
 
 PostProcess:
   name: CANLabelDecode
-  character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt
 
 Metric:
   name: CANMetric
@@ -76,6 +75,7 @@ Train:
   dataset:
     name: SimpleDataSet
     data_dir: ./train_data/CROHME_lite/training/images/
+    label_file_list: ["./train_data/CROHME_lite/training/labels.txt"]
     transforms:
       - DecodeImage:
           channel_first: False
@@ -86,11 +86,9 @@ Train:
       - GrayImageChannelFormat: 
           inverse: True
       - CANLabelEncode:
-          character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt
           lower: False
       - KeepKeys:
           keep_keys: ['image', 'label']
-    label_file_list: ["./train_data/CROHME_lite/training/labels.txt"]
   loader:
     shuffle: True
     batch_size_per_card: 8
@@ -102,6 +100,7 @@ Eval:
   dataset:
     name: SimpleDataSet
     data_dir: ./train_data/CROHME_lite/evaluation/images/
+    label_file_list: ["./train_data/CROHME_lite/evaluation/labels.txt"]
     transforms: 
       - DecodeImage:
           channel_first: False
@@ -112,11 +111,9 @@ Eval:
       - GrayImageChannelFormat:
           inverse: True
       - CANLabelEncode:
-          character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt
           lower: False
       - KeepKeys:
           keep_keys: ['image', 'label']
-    label_file_list: ["./train_data/CROHME_lite/evaluation/labels.txt"]
   loader:
     shuffle: False
     drop_last: False

From 273068527f9c7217ff9af34df4a20dbd378b4841 Mon Sep 17 00:00:00 2001
From: WenmuZhou <572459439@qq.com>
Date: Tue, 18 Oct 2022 07:31:07 +0000
Subject: [PATCH 12/20] only in DRRG import DRRGHead

---
 ppocr/modeling/heads/__init__.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/ppocr/modeling/heads/__init__.py b/ppocr/modeling/heads/__init__.py
index 63002140c..c203af016 100755
--- a/ppocr/modeling/heads/__init__.py
+++ b/ppocr/modeling/heads/__init__.py
@@ -24,7 +24,6 @@ def build_head(config):
     from .det_fce_head import FCEHead
     from .e2e_pg_head import PGHead
     from .det_ct_head import CT_Head
-    from .det_drrg_head import DRRGHead
 
     # rec head
     from .rec_ctc_head import CTCHead
@@ -59,6 +58,10 @@ def build_head(config):
         'DRRGHead'
     ]
 
+    if config['name'] == 'DRRGHead':
+        from .det_drrg_head import DRRGHead
+        support_dict.append('DRRGHead')
+
     #table head
 
     module_name = config.pop('name')

From 4078b0fee8c3df6ef223789731e348dea7462a49 Mon Sep 17 00:00:00 2001
From: WenmuZhou <572459439@qq.com>
Date: Tue, 18 Oct 2022 10:03:11 +0000
Subject: [PATCH 13/20] fix pact bug in slanet

---
 ppstructure/kie/requirements.txt               |  2 +-
 ppstructure/table/predict_table.py             | 18 +++++++++---------
 .../layoutxlm_ser/train_pact_infer_python.txt  |  4 ++--
 .../configs/slanet/train_pact_infer_python.txt |  2 +-
 test_tipc/prepare.sh                           |  3 ++-
 5 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/ppstructure/kie/requirements.txt b/ppstructure/kie/requirements.txt
index 11fa98da1..6cfcba764 100644
--- a/ppstructure/kie/requirements.txt
+++ b/ppstructure/kie/requirements.txt
@@ -4,4 +4,4 @@ seqeval
 pypandoc
 attrdict
 python_docx
-https://paddleocr.bj.bcebos.com/ppstructure/whl/paddlenlp-2.3.0.dev0-py3-none-any.whl
+paddlenlp>=2.4.1
diff --git a/ppstructure/table/predict_table.py b/ppstructure/table/predict_table.py
index aeec66dec..fdf611b7f 100644
--- a/ppstructure/table/predict_table.py
+++ b/ppstructure/table/predict_table.py
@@ -58,6 +58,7 @@ def expand(pix, det_box, shape):
 
 class TableSystem(object):
     def __init__(self, args, text_detector=None, text_recognizer=None):
+        self.args = args
         if not args.show_log:
             logger.setLevel(logging.INFO)
 
@@ -99,13 +100,18 @@ class TableSystem(object):
         result = dict()
         time_dict = {'det': 0, 'rec': 0, 'table': 0, 'all': 0, 'match': 0}
         start = time.time()
-
+        if self.args.benchmark:
+            self.autolog.times.start()
         structure_res, elapse = self._structure(copy.deepcopy(img))
+        if self.benchmark:
+            self.autolog.times.stamp()
         result['cell_bbox'] = structure_res[1].tolist()
         time_dict['table'] = elapse
 
         dt_boxes, rec_res, det_elapse, rec_elapse = self._ocr(
             copy.deepcopy(img))
+        if self.benchmark:
+            self.autolog.times.stamp()
         time_dict['det'] = det_elapse
         time_dict['rec'] = rec_elapse
 
@@ -118,24 +124,18 @@ class TableSystem(object):
         toc = time.time()
         time_dict['match'] = toc - tic
         result['html'] = pred_html
-        if self.benchmark:
-            self.autolog.times.end(stamp=True)
         end = time.time()
         time_dict['all'] = end - start
         if self.benchmark:
-            self.autolog.times.stamp()
+            self.autolog.times.end(stamp=True)
         return result, time_dict
 
     def _structure(self, img):
-        if self.benchmark:
-            self.autolog.times.start()
         structure_res, elapse = self.table_structurer(copy.deepcopy(img))
         return structure_res, elapse
 
     def _ocr(self, img):
         h, w = img.shape[:2]
-        if self.benchmark:
-            self.autolog.times.stamp()
         dt_boxes, det_elapse = self.text_detector(copy.deepcopy(img))
         dt_boxes = sorted_boxes(dt_boxes)
 
@@ -233,7 +233,7 @@ def main(args):
     f_html.close()
 
     if args.benchmark:
-        text_sys.autolog.report()
+        table_sys.autolog.report()
 
 
 if __name__ == "__main__":
diff --git a/test_tipc/configs/layoutxlm_ser/train_pact_infer_python.txt b/test_tipc/configs/layoutxlm_ser/train_pact_infer_python.txt
index fbf2a8802..c19b4b73a 100644
--- a/test_tipc/configs/layoutxlm_ser/train_pact_infer_python.txt
+++ b/test_tipc/configs/layoutxlm_ser/train_pact_infer_python.txt
@@ -7,14 +7,14 @@ Global.auto_cast:fp32
 Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=17
 Global.save_model_dir:./output/
 Train.loader.batch_size_per_card:lite_train_lite_infer=4|whole_train_whole_infer=8
-Architecture.Backbone.checkpoints:pretrain_models/ser_LayoutXLM_xfun_zh
+Architecture.Backbone.pretrained:pretrain_models/ser_LayoutXLM_xfun_zh
 train_model_name:latest
 train_infer_img_dir:ppstructure/docs/kie/input/zh_val_42.jpg
 null:null
 ##
 trainer:pact_train
 norm_train:null
-pact_train:deploy/slim/quantization/quant.py -c test_tipc/configs/layoutxlm_ser/ser_layoutxlm_xfund_zh.yml -o
+pact_train:deploy/slim/quantization/quant.py -c test_tipc/configs/layoutxlm_ser/ser_layoutxlm_xfund_zh.yml -o Global.eval_batch_step=[2000,10]
 fpgm_train:null
 distill_train:null
 null:null
diff --git a/test_tipc/configs/slanet/train_pact_infer_python.txt b/test_tipc/configs/slanet/train_pact_infer_python.txt
index 42ed0cf59..98546afa6 100644
--- a/test_tipc/configs/slanet/train_pact_infer_python.txt
+++ b/test_tipc/configs/slanet/train_pact_infer_python.txt
@@ -34,7 +34,7 @@ distill_export:null
 export1:null
 export2:null
 ##
-infer_model:./inference/en_ppocr_mobile_v2.0_table_structure_infer
+infer_model:./inference/en_ppstructure_mobile_v2.0_SLANet_infer
 infer_export:null
 infer_quant:True
 inference:ppstructure/table/predict_table.py --det_model_dir=./inference/en_ppocr_mobile_v2.0_table_det_infer --rec_model_dir=./inference/en_ppocr_mobile_v2.0_table_rec_infer  --rec_char_dict_path=./ppocr/utils/dict/table_dict.txt --table_char_dict_path=./ppocr/utils/dict/table_structure_dict.txt --image_dir=./ppstructure/docs/table/table.jpg --det_limit_side_len=736 --det_limit_type=min --output ./output/table
diff --git a/test_tipc/prepare.sh b/test_tipc/prepare.sh
index 62cda1d8a..da6dfecad 100644
--- a/test_tipc/prepare.sh
+++ b/test_tipc/prepare.sh
@@ -146,6 +146,7 @@ if [ ${MODE} = "lite_train_lite_infer" ];then
     python_name=${array[0]}
     ${python_name} -m pip install -r requirements.txt
     ${python_name} -m pip install https://paddleocr.bj.bcebos.com/libs/auto_log-1.2.0-py3-none-any.whl
+    ${python_name} -m pip install paddleslim==2.3.4
     # pretrain lite train data
     wget -nc -P  ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_5_pretrained.pdparams  --no-check-certificate
     wget -nc -P ./pretrain_models/  https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar  --no-check-certificate
@@ -260,7 +261,7 @@ if [ ${MODE} = "lite_train_lite_infer" ];then
         wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/rec_r32_gaspin_bilstm_att_train.tar --no-check-certificate
         cd ./pretrain_models/ && tar xf rec_r32_gaspin_bilstm_att_train.tar && cd ../
     fi
-    if [ ${model_name} == "layoutxlm_ser" ]; then
+    if [[ ${model_name} =~ "layoutxlm_ser" ]]; then
         ${python_name} -m pip install -r ppstructure/kie/requirements.txt
         ${python_name} -m pip install opencv-python -U
         wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/ppstructure/dataset/XFUND.tar --no-check-certificate

From 4cf04cbee88acb173706c6ea8ad59a07e1bf2ecb Mon Sep 17 00:00:00 2001
From: WenmuZhou <572459439@qq.com>
Date: Wed, 19 Oct 2022 04:02:01 +0000
Subject: [PATCH 14/20] fix table recogition benckmark error

---
 ppstructure/table/predict_structure.py | 32 +++++++++++++++++++
 ppstructure/table/predict_table.py     | 44 +++++---------------------
 2 files changed, 40 insertions(+), 36 deletions(-)

diff --git a/ppstructure/table/predict_structure.py b/ppstructure/table/predict_structure.py
index 0bf100852..08e381a84 100755
--- a/ppstructure/table/predict_structure.py
+++ b/ppstructure/table/predict_structure.py
@@ -68,6 +68,7 @@ def build_pre_process_list(args):
 
 class TableStructurer(object):
     def __init__(self, args):
+        self.args = args
         self.use_onnx = args.use_onnx
         pre_process_list = build_pre_process_list(args)
         if args.table_algorithm not in ['TableMaster']:
@@ -89,8 +90,31 @@ class TableStructurer(object):
         self.predictor, self.input_tensor, self.output_tensors, self.config = \
             utility.create_predictor(args, 'table', logger)
 
+        if args.benchmark:
+            import auto_log
+            pid = os.getpid()
+            gpu_id = utility.get_infer_gpuid()
+            self.autolog = auto_log.AutoLogger(
+                model_name="table",
+                model_precision=args.precision,
+                batch_size=1,
+                data_shape="dynamic",
+                save_path=None,  #args.save_log_path,
+                inference_config=self.config,
+                pids=pid,
+                process_name=None,
+                gpu_ids=gpu_id if args.use_gpu else None,
+                time_keys=[
+                    'preprocess_time', 'inference_time', 'postprocess_time'
+                ],
+                warmup=0,
+                logger=logger)
+
     def __call__(self, img):
         starttime = time.time()
+        if self.args.benchmark:
+            self.autolog.times.start()
+
         ori_im = img.copy()
         data = {'image': img}
         data = transform(data, self.preprocess_op)
@@ -99,6 +123,8 @@ class TableStructurer(object):
             return None, 0
         img = np.expand_dims(img, axis=0)
         img = img.copy()
+        if self.args.benchmark:
+            self.autolog.times.stamp()
         if self.use_onnx:
             input_dict = {}
             input_dict[self.input_tensor.name] = img
@@ -110,6 +136,8 @@ class TableStructurer(object):
             for output_tensor in self.output_tensors:
                 output = output_tensor.copy_to_cpu()
                 outputs.append(output)
+            if self.args.benchmark:
+                self.autolog.times.stamp()
 
         preds = {}
         preds['structure_probs'] = outputs[1]
@@ -125,6 +153,8 @@ class TableStructurer(object):
             '<html>', '<body>', '<table>'
         ] + structure_str_list + ['</table>', '</body>', '</html>']
         elapse = time.time() - starttime
+        if self.args.benchmark:
+            self.autolog.times.end(stamp=True)
         return (structure_str_list, bbox_list), elapse
 
 
@@ -164,6 +194,8 @@ def main(args):
                 total_time += elapse
             count += 1
             logger.info("Predict time of {}: {}".format(image_file, elapse))
+    if args.benchmark:
+        table_structurer.autolog.report()
 
 
 if __name__ == "__main__":
diff --git a/ppstructure/table/predict_table.py b/ppstructure/table/predict_table.py
index fdf611b7f..8f9c71749 100644
--- a/ppstructure/table/predict_table.py
+++ b/ppstructure/table/predict_table.py
@@ -14,7 +14,6 @@
 
 import os
 import sys
-import subprocess
 
 __dir__ = os.path.dirname(os.path.abspath(__file__))
 sys.path.append(__dir__)
@@ -61,57 +60,31 @@ class TableSystem(object):
         self.args = args
         if not args.show_log:
             logger.setLevel(logging.INFO)
-
-        self.text_detector = predict_det.TextDetector(
-            args) if text_detector is None else text_detector
-        self.text_recognizer = predict_rec.TextRecognizer(
-            args) if text_recognizer is None else text_recognizer
-
+        args.benchmark = False
+        self.text_detector = predict_det.TextDetector(copy.deepcopy(
+            args)) if text_detector is None else text_detector
+        self.text_recognizer = predict_rec.TextRecognizer(copy.deepcopy(
+            args)) if text_recognizer is None else text_recognizer
+        args.benchmark = True
         self.table_structurer = predict_strture.TableStructurer(args)
         if args.table_algorithm in ['TableMaster']:
             self.match = TableMasterMatcher()
         else:
             self.match = TableMatch(filter_ocr_result=True)
 
-        self.benchmark = args.benchmark
         self.predictor, self.input_tensor, self.output_tensors, self.config = utility.create_predictor(
             args, 'table', logger)
-        if args.benchmark:
-            import auto_log
-            pid = os.getpid()
-            gpu_id = utility.get_infer_gpuid()
-            self.autolog = auto_log.AutoLogger(
-                model_name="table",
-                model_precision=args.precision,
-                batch_size=1,
-                data_shape="dynamic",
-                save_path=None,  #args.save_log_path,
-                inference_config=self.config,
-                pids=pid,
-                process_name=None,
-                gpu_ids=gpu_id if args.use_gpu else None,
-                time_keys=[
-                    'preprocess_time', 'inference_time', 'postprocess_time'
-                ],
-                warmup=0,
-                logger=logger)
 
     def __call__(self, img, return_ocr_result_in_table=False):
         result = dict()
         time_dict = {'det': 0, 'rec': 0, 'table': 0, 'all': 0, 'match': 0}
         start = time.time()
-        if self.args.benchmark:
-            self.autolog.times.start()
         structure_res, elapse = self._structure(copy.deepcopy(img))
-        if self.benchmark:
-            self.autolog.times.stamp()
         result['cell_bbox'] = structure_res[1].tolist()
         time_dict['table'] = elapse
 
         dt_boxes, rec_res, det_elapse, rec_elapse = self._ocr(
             copy.deepcopy(img))
-        if self.benchmark:
-            self.autolog.times.stamp()
         time_dict['det'] = det_elapse
         time_dict['rec'] = rec_elapse
 
@@ -126,8 +99,6 @@ class TableSystem(object):
         result['html'] = pred_html
         end = time.time()
         time_dict['all'] = end - start
-        if self.benchmark:
-            self.autolog.times.end(stamp=True)
         return result, time_dict
 
     def _structure(self, img):
@@ -233,12 +204,13 @@ def main(args):
     f_html.close()
 
     if args.benchmark:
-        table_sys.autolog.report()
+        table_sys.table_structurer.autolog.report()
 
 
 if __name__ == "__main__":
     args = parse_args()
     if args.use_mp:
+        import subprocess
         p_list = []
         total_process_num = args.total_process_num
         for process_id in range(total_process_num):

From ae9388ef574400e416bdbf18a1111d932a29a257 Mon Sep 17 00:00:00 2001
From: WenmuZhou <572459439@qq.com>
Date: Wed, 19 Oct 2022 07:03:42 +0000
Subject: [PATCH 15/20] update paddleslim version

---
 test_tipc/prepare.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test_tipc/prepare.sh b/test_tipc/prepare.sh
index da6dfecad..177857794 100644
--- a/test_tipc/prepare.sh
+++ b/test_tipc/prepare.sh
@@ -146,7 +146,7 @@ if [ ${MODE} = "lite_train_lite_infer" ];then
     python_name=${array[0]}
     ${python_name} -m pip install -r requirements.txt
     ${python_name} -m pip install https://paddleocr.bj.bcebos.com/libs/auto_log-1.2.0-py3-none-any.whl
-    ${python_name} -m pip install paddleslim==2.3.4
+    ${python_name} -m pip install paddleslim
     # pretrain lite train data
     wget -nc -P  ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_5_pretrained.pdparams  --no-check-certificate
     wget -nc -P ./pretrain_models/  https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar  --no-check-certificate

From 0f58f37c160eb2487d7ebdb4595133c72b068978 Mon Sep 17 00:00:00 2001
From: dorren <dorren1998@outlook.com>
Date: Wed, 19 Oct 2022 16:41:00 +0800
Subject: [PATCH 16/20] update prepare.sh

---
 doc/doc_ch/algorithm_rec_can.md | 2 +-
 test_tipc/prepare.sh            | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/doc/doc_ch/algorithm_rec_can.md b/doc/doc_ch/algorithm_rec_can.md
index 8a012b490..53e29535b 100644
--- a/doc/doc_ch/algorithm_rec_can.md
+++ b/doc/doc_ch/algorithm_rec_can.md
@@ -141,7 +141,7 @@ Predicts of ./doc/imgs_hme/hme_00.jpg:['x _ { k } x x _ { k } + y _ { k } y x _
 <a name="4-2"></a>
 ### 4.2 C++推理部署
 
-由于C++预处理后处理还未支持ABINet，所以暂未支持
+由于C++预处理后处理还未支持CAN，所以暂未支持
 
 <a name="4-3"></a>
 ### 4.3 Serving服务化部署
diff --git a/test_tipc/prepare.sh b/test_tipc/prepare.sh
index dc0d2fdb0..d8e6a2fc4 100644
--- a/test_tipc/prepare.sh
+++ b/test_tipc/prepare.sh
@@ -146,6 +146,7 @@ if [ ${MODE} = "lite_train_lite_infer" ];then
     python_name=${array[0]}
     ${python_name} -m pip install -r requirements.txt
     ${python_name} -m pip install https://paddleocr.bj.bcebos.com/libs/auto_log-1.2.0-py3-none-any.whl
+    ${python_name} -m pip install paddleslim==2.3.4
     # pretrain lite train data
     wget -nc -P  ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_5_pretrained.pdparams  --no-check-certificate
     wget -nc -P ./pretrain_models/  https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar  --no-check-certificate
@@ -241,6 +242,9 @@ if [ ${MODE} = "lite_train_lite_infer" ];then
     if [ ${model_name} == "ch_ppocr_mobile_v2_0_det_FPGM" ]; then
         ${python_name} -m pip install paddleslim
     fi
+    if [ ${model_name} == "det_r50_vd_pse_v2_0" ]; then
+        wget -nc -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vd_ssld_pretrained.pdparams --no-check-certificate
+    fi
     if [ ${model_name} == "det_mv3_east_v2_0" ]; then
         wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_east_v2.0_train.tar --no-check-certificate
         cd ./pretrain_models/ && tar xf det_mv3_east_v2.0_train.tar && cd ../
@@ -263,7 +267,7 @@ if [ ${MODE} = "lite_train_lite_infer" ];then
         wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dataset/CROHME_lite.tar --no-check-certificate
         cd ./train_data/ && tar xf CROHME_lite.tar && cd ../
     fi
-    if [ ${model_name} == "layoutxlm_ser" ]; then
+    if [[ ${model_name} =~ "layoutxlm_ser" ]]; then
         ${python_name} -m pip install -r ppstructure/kie/requirements.txt
         ${python_name} -m pip install opencv-python -U
         wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/ppstructure/dataset/XFUND.tar --no-check-certificate

From b22102d4c54ccaac7a79169ed431c5026b00d12e Mon Sep 17 00:00:00 2001
From: dorren <dorren1998@outlook.com>
Date: Wed, 19 Oct 2022 16:46:27 +0800
Subject: [PATCH 17/20] update prepare.sh again

---
 test_tipc/prepare.sh | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/test_tipc/prepare.sh b/test_tipc/prepare.sh
index d8e6a2fc4..cdc69f25a 100644
--- a/test_tipc/prepare.sh
+++ b/test_tipc/prepare.sh
@@ -261,12 +261,6 @@ if [ ${MODE} = "lite_train_lite_infer" ];then
         wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/rec_r32_gaspin_bilstm_att_train.tar --no-check-certificate
         cd ./pretrain_models/ && tar xf rec_r32_gaspin_bilstm_att_train.tar && cd ../
     fi
-    if [ ${model_name} == "rec_d28_can" ]; then
-        wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/contribution/can_train.tar --no-check-certificate
-        cd ./pretrain_models/ && tar xf can_train.tar && cd ../
-        wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dataset/CROHME_lite.tar --no-check-certificate
-        cd ./train_data/ && tar xf CROHME_lite.tar && cd ../
-    fi
     if [[ ${model_name} =~ "layoutxlm_ser" ]]; then
         ${python_name} -m pip install -r ppstructure/kie/requirements.txt
         ${python_name} -m pip install opencv-python -U
@@ -293,6 +287,12 @@ if [ ${MODE} = "lite_train_lite_infer" ];then
         wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dataset/ct_tipc/total_text_lite2.tar --no-check-certificate
         cd ./train_data && tar xf total_text_lite2.tar && ln -s total_text_lite2 total_text && cd ../
     fi
+    if [ ${model_name} == "rec_d28_can" ]; then
+        wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/contribution/can_train.tar --no-check-certificate
+        cd ./pretrain_models/ && tar xf can_train.tar && cd ../
+        wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dataset/CROHME_lite.tar --no-check-certificate
+        cd ./train_data/ && tar xf CROHME_lite.tar && cd ../
+    fi
 
 elif [ ${MODE} = "whole_train_whole_infer" ];then
     wget -nc -P  ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_5_pretrained.pdparams --no-check-certificate

From db8e6f94af899d0ef472e6277ecbb0cb2c37d68b Mon Sep 17 00:00:00 2001
From: dorren <dorren1998@outlook.com>
Date: Wed, 19 Oct 2022 17:04:19 +0800
Subject: [PATCH 18/20] update prepare.sh and readme

---
 doc/doc_ch/algorithm_rec_can.md | 2 +-
 test_tipc/prepare.sh            | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/doc_ch/algorithm_rec_can.md b/doc/doc_ch/algorithm_rec_can.md
index 53e29535b..4f266cb33 100644
--- a/doc/doc_ch/algorithm_rec_can.md
+++ b/doc/doc_ch/algorithm_rec_can.md
@@ -55,7 +55,7 @@ python3 -m paddle.distributed.launch --gpus '0,1,2,3'  tools/train.py -c configs
 ```
 
 **注意：**
-- 我们提供的数据集，即`CROHME数据集`将手写公式存储为黑底白字的格式，若您自行准备的数据集与之相反，即以白底黑字模式存储，请在训练时做出如下修改
+- 我们提供的数据集，即[`CROHME数据集`](https://paddleocr.bj.bcebos.com/dataset/CROHME.tar)将手写公式存储为黑底白字的格式，若您自行准备的数据集与之相反，即以白底黑字模式存储，请在训练时做出如下修改
 ```
 python3 tools/train.py -c configs/rec/rec_d28_can.yml
 -o Train.dataset.transforms.GrayImageChannelFormat.inverse=False
diff --git a/test_tipc/prepare.sh b/test_tipc/prepare.sh
index cdc69f25a..9291ce8b9 100644
--- a/test_tipc/prepare.sh
+++ b/test_tipc/prepare.sh
@@ -146,7 +146,7 @@ if [ ${MODE} = "lite_train_lite_infer" ];then
     python_name=${array[0]}
     ${python_name} -m pip install -r requirements.txt
     ${python_name} -m pip install https://paddleocr.bj.bcebos.com/libs/auto_log-1.2.0-py3-none-any.whl
-    ${python_name} -m pip install paddleslim==2.3.4
+    ${python_name} -m pip install paddleslim
     # pretrain lite train data
     wget -nc -P  ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_5_pretrained.pdparams  --no-check-certificate
     wget -nc -P ./pretrain_models/  https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar  --no-check-certificate

From 38290a2853dcc1b08513c8f5ed7055b8cc708705 Mon Sep 17 00:00:00 2001
From: andyj <87074272+andyjpaddle@users.noreply.github.com>
Date: Wed, 19 Oct 2022 17:12:12 +0800
Subject: [PATCH 19/20] update visionlan (#7989)

---
 doc/doc_ch/algorithm_overview.md         | 2 +-
 doc/doc_ch/algorithm_rec_visionlan.md    | 4 ++--
 doc/doc_en/algorithm_overview_en.md      | 2 +-
 doc/doc_en/algorithm_rec_visionlan_en.md | 4 ++--
 ppocr/data/imaug/label_ops.py            | 7 -------
 ppocr/postprocess/rec_postprocess.py     | 7 -------
 6 files changed, 6 insertions(+), 20 deletions(-)

diff --git a/doc/doc_ch/algorithm_overview.md b/doc/doc_ch/algorithm_overview.md
index 235763d8a..44c1e117e 100755
--- a/doc/doc_ch/algorithm_overview.md
+++ b/doc/doc_ch/algorithm_overview.md
@@ -102,7 +102,7 @@ PaddleOCR将**持续新增**支持OCR领域前沿算法与模型，**欢迎广
 |SVTR|SVTR-Tiny| 89.25% | rec_svtr_tiny_none_ctc_en | [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/rec_svtr_tiny_none_ctc_en_train.tar) |
 |ViTSTR|ViTSTR| 79.82% | rec_vitstr_none_ce | [训练模型](https://paddleocr.bj.bcebos.com/rec_vitstr_none_ce_train.tar) |
 |ABINet|Resnet45| 90.75% | rec_r45_abinet | [训练模型](https://paddleocr.bj.bcebos.com/rec_r45_abinet_train.tar) |
-|VisionLAN|Resnet45| 90.30% | rec_r45_visionlan | [训练模型](https://paddleocr.bj.bcebos.com/rec_r45_visionlan_train.tar) |
+|VisionLAN|Resnet45| 90.30% | rec_r45_visionlan | [训练模型](https://paddleocr.bj.bcebos.com/VisionLAN/rec_r45_visionlan_train.tar) |
 |SPIN|ResNet32| 90.00% | rec_r32_gaspin_bilstm_att | [训练模型](https://paddleocr.bj.bcebos.com/contribution/rec_r32_gaspin_bilstm_att.tar) |
 |RobustScanner|ResNet31| 87.77% | rec_r31_robustscanner | [训练模型](https://paddleocr.bj.bcebos.com/contribution/rec_r31_robustscanner.tar)|
 |RFL|ResNetRFL| 88.63% | rec_resnet_rfl_att | [训练模型](https://paddleocr.bj.bcebos.com/contribution/rec_resnet_rfl_att_train.tar) |
diff --git a/doc/doc_ch/algorithm_rec_visionlan.md b/doc/doc_ch/algorithm_rec_visionlan.md
index 84b5ef682..b4474c29f 100644
--- a/doc/doc_ch/algorithm_rec_visionlan.md
+++ b/doc/doc_ch/algorithm_rec_visionlan.md
@@ -27,7 +27,7 @@
 
 |模型|骨干网络|配置文件|Acc|下载链接|
 | --- | --- | --- | --- | --- |
-|VisionLAN|ResNet45|[rec_r45_visionlan.yml](../../configs/rec/rec_r45_visionlan.yml)|90.3%|[预训练、训练模型](https://paddleocr.bj.bcebos.com/rec_r45_visionlan_train.tar)|
+|VisionLAN|ResNet45|[rec_r45_visionlan.yml](../../configs/rec/rec_r45_visionlan.yml)|90.3%|[预训练、训练模型](https://paddleocr.bj.bcebos.com/VisionLAN/rec_r45_visionlan_train.tar)|
 
 <a name="2"></a>
 ## 2. 环境配置
@@ -80,7 +80,7 @@ python3 tools/infer_rec.py -c configs/rec/rec_r45_visionlan.yml -o Global.infer_
 
 <a name="4-1"></a>
 ### 4.1 Python推理
-首先将训练得到best模型，转换成inference model。这里以训练完成的模型为例（[模型下载地址](https://paddleocr.bj.bcebos.com/rec_r45_visionlan_train.tar))，可以使用如下命令进行转换：
+首先将训练得到best模型，转换成inference model。这里以训练完成的模型为例（[模型下载地址](https://paddleocr.bj.bcebos.com/VisionLAN/rec_r45_visionlan_train.tar))，可以使用如下命令进行转换：
 
 ```shell
 # 注意将pretrained_model的路径设置为本地路径。
diff --git a/doc/doc_en/algorithm_overview_en.md b/doc/doc_en/algorithm_overview_en.md
index ff84b9a68..2614226e0 100755
--- a/doc/doc_en/algorithm_overview_en.md
+++ b/doc/doc_en/algorithm_overview_en.md
@@ -99,7 +99,7 @@ Refer to [DTRB](https://arxiv.org/abs/1904.01906), the training and evaluation r
 |SVTR|SVTR-Tiny| 89.25% | rec_svtr_tiny_none_ctc_en | [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/rec_svtr_tiny_none_ctc_en_train.tar) |
 |ViTSTR|ViTSTR| 79.82% | rec_vitstr_none_ce | [trained model](https://paddleocr.bj.bcebos.com/rec_vitstr_none_none_train.tar) |
 |ABINet|Resnet45| 90.75% | rec_r45_abinet | [trained model](https://paddleocr.bj.bcebos.com/rec_r45_abinet_train.tar) |
-|VisionLAN|Resnet45| 90.30% | rec_r45_visionlan | [trained model](https://paddleocr.bj.bcebos.com/rec_r45_visionlan_train.tar) |
+|VisionLAN|Resnet45| 90.30% | rec_r45_visionlan | [trained model](https://paddleocr.bj.bcebos.com/VisionLAN/rec_r45_visionlan_train.tar) |
 |SPIN|ResNet32| 90.00% | rec_r32_gaspin_bilstm_att | [trained model](https://paddleocr.bj.bcebos.com/contribution/rec_r32_gaspin_bilstm_att.tar) |
 |RobustScanner|ResNet31| 87.77% | rec_r31_robustscanner | [trained model](https://paddleocr.bj.bcebos.com/contribution/rec_r31_robustscanner.tar)|
 |RFL|ResNetRFL| 88.63% | rec_resnet_rfl_att | [trained model](https://paddleocr.bj.bcebos.com/contribution/rec_resnet_rfl_att_train.tar) |
diff --git a/doc/doc_en/algorithm_rec_visionlan_en.md b/doc/doc_en/algorithm_rec_visionlan_en.md
index cf2293b3d..f67aa3c62 100644
--- a/doc/doc_en/algorithm_rec_visionlan_en.md
+++ b/doc/doc_en/algorithm_rec_visionlan_en.md
@@ -25,7 +25,7 @@ Using MJSynth and SynthText two text recognition datasets for training, and eval
 
 |Model|Backbone|config|Acc|Download link|
 | --- | --- | --- | --- | --- |
-|VisionLAN|ResNet45|[rec_r45_visionlan.yml](../../configs/rec/rec_r45_visionlan.yml)|90.3%|[预训练、训练模型](https://paddleocr.bj.bcebos.com/rec_r45_visionlan_train.tar)|
+|VisionLAN|ResNet45|[rec_r45_visionlan.yml](../../configs/rec/rec_r45_visionlan.yml)|90.3%|[预训练、训练模型](https://paddleocr.bj.bcebos.com/VisionLAN/rec_r45_visionlan_train.tar)|
 
 <a name="2"></a>
 ## 2. Environment
@@ -68,7 +68,7 @@ python3 tools/infer_rec.py -c configs/rec/rec_r45_visionlan.yml -o Global.infer_
 
 <a name="4-1"></a>
 ### 4.1 Python Inference
-First, the model saved during the VisionLAN text recognition training process is converted into an inference model. ( [Model download link](https://paddleocr.bj.bcebos.com/rec_r45_visionlan_train.tar)) ), you can use the following command to convert:
+First, the model saved during the VisionLAN text recognition training process is converted into an inference model. ( [Model download link](https://paddleocr.bj.bcebos.com/VisionLAN/rec_r45_visionlan_train.tar)) ), you can use the following command to convert:
 
 ```
 python3 tools/export_model.py -c configs/rec/rec_r45_visionlan.yml -o Global.pretrained_model=./rec_r45_visionlan_train/best_accuracy Global.save_inference_dir=./inference/rec_r45_visionlan/
diff --git a/ppocr/data/imaug/label_ops.py b/ppocr/data/imaug/label_ops.py
index 511471c76..e8a3fedaf 100644
--- a/ppocr/data/imaug/label_ops.py
+++ b/ppocr/data/imaug/label_ops.py
@@ -107,7 +107,6 @@ class BaseRecLabelEncode(object):
         self.beg_str = "sos"
         self.end_str = "eos"
         self.lower = lower
-        self.use_default_dict = False
 
         if character_dict_path is None:
             logger = get_logger()
@@ -117,11 +116,8 @@ class BaseRecLabelEncode(object):
             self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
             dict_character = list(self.character_str)
             self.lower = True
-            self.use_default_dict = True
         else:
             self.character_str = []
-            if 'ppocr/utils/ic15_dict.txt' in character_dict_path:
-                self.use_default_dict = True
             with open(character_dict_path, "rb") as fin:
                 lines = fin.readlines()
                 for line in lines:
@@ -1404,9 +1400,6 @@ class VLLabelEncode(BaseRecLabelEncode):
                  **kwargs):
         super(VLLabelEncode, self).__init__(
             max_text_length, character_dict_path, use_space_char, lower)
-        if self.use_default_dict:
-            self.character = self.character[10:] + self.character[
-                1:10] + [self.character[0]]
         self.dict = {}
         for i, char in enumerate(self.character):
             self.dict[char] = i
diff --git a/ppocr/postprocess/rec_postprocess.py b/ppocr/postprocess/rec_postprocess.py
index 98753ef7a..2ec572e8d 100644
--- a/ppocr/postprocess/rec_postprocess.py
+++ b/ppocr/postprocess/rec_postprocess.py
@@ -26,15 +26,11 @@ class BaseRecLabelDecode(object):
         self.end_str = "eos"
         self.reverse = False
         self.character_str = []
-        self.use_default_dict = False
 
         if character_dict_path is None:
             self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
             dict_character = list(self.character_str)
-            self.use_default_dict = True
         else:
-            if 'ppocr/utils/ic15_dict.txt' in character_dict_path:
-                self.use_default_dict = True
             with open(character_dict_path, "rb") as fin:
                 lines = fin.readlines()
                 for line in lines:
@@ -810,9 +806,6 @@ class VLLabelDecode(BaseRecLabelDecode):
         super(VLLabelDecode, self).__init__(character_dict_path, use_space_char)
         self.max_text_length = kwargs.get('max_text_length', 25)
         self.nclass = len(self.character) + 1
-        if self.use_default_dict:
-            self.character = self.character[10:] + self.character[
-                1:10] + [self.character[0]]
 
     def decode(self, text_index, text_prob=None, is_remove_duplicate=False):
         """ convert text-index into text-label. """

From 969f4c52fc6a8182713bd25e46e298a6c151d357 Mon Sep 17 00:00:00 2001
From: dorren <dorren1998@outlook.com>
Date: Wed, 19 Oct 2022 18:04:02 +0800
Subject: [PATCH 20/20] update prepare.sh

---
 test_tipc/prepare.sh | 2 --
 1 file changed, 2 deletions(-)

diff --git a/test_tipc/prepare.sh b/test_tipc/prepare.sh
index 9291ce8b9..23d312c86 100644
--- a/test_tipc/prepare.sh
+++ b/test_tipc/prepare.sh
@@ -288,8 +288,6 @@ if [ ${MODE} = "lite_train_lite_infer" ];then
         cd ./train_data && tar xf total_text_lite2.tar && ln -s total_text_lite2 total_text && cd ../
     fi
     if [ ${model_name} == "rec_d28_can" ]; then
-        wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/contribution/can_train.tar --no-check-certificate
-        cd ./pretrain_models/ && tar xf can_train.tar && cd ../
         wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dataset/CROHME_lite.tar --no-check-certificate
         cd ./train_data/ && tar xf CROHME_lite.tar && cd ../
     fi