Merge pull request #4301 from tink2123/rm_rec_char_type

rm rec_char_type
2021-10-19 16:06:44 +08:00 · 2021-10-19 16:06:44 +08:00 · 37331199dd
parent 66d9022043 6eaf11f1d6
commit 37331199dd
40 changed files with 224 additions and 244 deletions
--- a/configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec.yml
+++ b/configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec.yml
@ -14,7 +14,6 @@ Global:
  use_visualdl: false
  infer_img: doc/imgs_words/ch/word_1.jpg
  character_dict_path: ppocr/utils/ppocr_keys_v1.txt
-  character_type: ch
  max_text_length: 25
  infer_mode: false
  use_space_char: true
--- a/configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_distillation.yml
+++ b/configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_distillation.yml
@ -14,7 +14,6 @@ Global:
  use_visualdl: false
  infer_img: doc/imgs_words/ch/word_1.jpg
  character_dict_path: ppocr/utils/ppocr_keys_v1.txt
-  character_type: ch
  max_text_length: 25
  infer_mode: false
  use_space_char: true
--- a/configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_enhanced_ctc_loss.yml
+++ b/configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_enhanced_ctc_loss.yml
@ -14,7 +14,6 @@ Global:
  use_visualdl: false
  infer_img: doc/imgs_words/ch/word_1.jpg
  character_dict_path: ppocr/utils/ppocr_keys_v1.txt
-  character_type: ch
  max_text_length: 25
  infer_mode: false
  use_space_char: true
--- a/configs/rec/ch_ppocr_v2.0/rec_chinese_common_train_v2.0.yml
+++ b/configs/rec/ch_ppocr_v2.0/rec_chinese_common_train_v2.0.yml
@ -15,7 +15,6 @@ Global:
  infer_img: doc/imgs_words/ch/word_1.jpg
  # for data or label process
  character_dict_path: ppocr/utils/ppocr_keys_v1.txt
-  character_type: ch
  max_text_length: 25
  infer_mode: False
  use_space_char: True
--- a/configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml
+++ b/configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml
@ -15,7 +15,6 @@ Global:
  infer_img: doc/imgs_words/ch/word_1.jpg
  # for data or label process
  character_dict_path: ppocr/utils/ppocr_keys_v1.txt
-  character_type: ch
  max_text_length: 25
  infer_mode: False
  use_space_char: True
--- a/configs/rec/multi_language/rec_arabic_lite_train.yml
+++ b/configs/rec/multi_language/rec_arabic_lite_train.yml
@ -15,7 +15,6 @@ Global:
  use_visualdl: false
  infer_img: null
  character_dict_path: ppocr/utils/dict/arabic_dict.txt
-  character_type: arabic
  max_text_length: 25
  infer_mode: false
  use_space_char: true
--- a/configs/rec/multi_language/rec_cyrillic_lite_train.yml
+++ b/configs/rec/multi_language/rec_cyrillic_lite_train.yml
@ -15,7 +15,6 @@ Global:
  use_visualdl: false
  infer_img: null
  character_dict_path: ppocr/utils/dict/cyrillic_dict.txt
-  character_type: cyrillic
  max_text_length: 25
  infer_mode: false
  use_space_char: true
--- a/configs/rec/multi_language/rec_devanagari_lite_train.yml
+++ b/configs/rec/multi_language/rec_devanagari_lite_train.yml
@ -15,7 +15,6 @@ Global:
  use_visualdl: false
  infer_img: null
  character_dict_path: ppocr/utils/dict/devanagari_dict.txt
-  character_type: devanagari
  max_text_length: 25
  infer_mode: false
  use_space_char: true
--- a/configs/rec/multi_language/rec_en_number_lite_train.yml
+++ b/configs/rec/multi_language/rec_en_number_lite_train.yml
@ -16,7 +16,6 @@ Global:
  infer_img:
  # for data or label process
  character_dict_path: ppocr/utils/en_dict.txt
-  character_type: EN
  max_text_length: 25
  infer_mode: False
  use_space_char: True
--- a/configs/rec/multi_language/rec_french_lite_train.yml
+++ b/configs/rec/multi_language/rec_french_lite_train.yml
@ -16,7 +16,6 @@ Global:
  infer_img:
  # for data or label process
  character_dict_path: ppocr/utils/dict/french_dict.txt
-  character_type: french
  max_text_length: 25
  infer_mode: False
  use_space_char: False
--- a/configs/rec/multi_language/rec_german_lite_train.yml
+++ b/configs/rec/multi_language/rec_german_lite_train.yml
@ -16,7 +16,6 @@ Global:
  infer_img:
  # for data or label process
  character_dict_path: ppocr/utils/dict/german_dict.txt
-  character_type: german
  max_text_length: 25
  infer_mode: False
  use_space_char: False
--- a/configs/rec/multi_language/rec_japan_lite_train.yml
+++ b/configs/rec/multi_language/rec_japan_lite_train.yml
@ -16,7 +16,6 @@ Global:
  infer_img:
  # for data or label process
  character_dict_path: ppocr/utils/dict/japan_dict.txt
-  character_type: japan
  max_text_length: 25
  infer_mode: False
  use_space_char: False
--- a/configs/rec/multi_language/rec_korean_lite_train.yml
+++ b/configs/rec/multi_language/rec_korean_lite_train.yml
@ -16,7 +16,6 @@ Global:
  infer_img:
  # for data or label process
  character_dict_path: ppocr/utils/dict/korean_dict.txt
-  character_type: korean
  max_text_length: 25
  infer_mode: False
  use_space_char: False
--- a/configs/rec/multi_language/rec_latin_lite_train.yml
+++ b/configs/rec/multi_language/rec_latin_lite_train.yml
@ -15,7 +15,6 @@ Global:
  use_visualdl: false
  infer_img: null
  character_dict_path: ppocr/utils/dict/latin_dict.txt
-  character_type: latin
  max_text_length: 25
  infer_mode: false
  use_space_char: true
--- a/configs/rec/rec_icdar15_train.yml
+++ b/configs/rec/rec_icdar15_train.yml
@ -15,7 +15,6 @@ Global:
  infer_img: doc/imgs_words_en/word_10.png
  # for data or label process
  character_dict_path: ppocr/utils/en_dict.txt
-  character_type: EN
  max_text_length: 25
  infer_mode: False
  use_space_char: False
--- a/configs/rec/rec_mtb_nrtr.yml
+++ b/configs/rec/rec_mtb_nrtr.yml
@ -14,8 +14,7 @@ Global:
  use_visualdl: False
  infer_img: doc/imgs_words_en/word_10.png
  # for data or label process
-  character_dict_path: 
-  character_type: EN_symbol
+  character_dict_path: ppocr/utils/EN_symbol_dict.txt
  max_text_length: 25
  infer_mode: False
  use_space_char: True
--- a/configs/rec/rec_mv3_none_bilstm_ctc.yml
+++ b/configs/rec/rec_mv3_none_bilstm_ctc.yml
@ -14,8 +14,7 @@ Global:
  use_visualdl: False
  infer_img: doc/imgs_words_en/word_10.png
  # for data or label process
-  character_dict_path: 
-  character_type: en
+  character_dict_path:
  max_text_length: 25
  infer_mode: False
  use_space_char: False
--- a/configs/rec/rec_mv3_none_none_ctc.yml
+++ b/configs/rec/rec_mv3_none_none_ctc.yml
@ -15,7 +15,6 @@ Global:
  infer_img: doc/imgs_words_en/word_10.png
  # for data or label process
  character_dict_path:
-  character_type: en
  max_text_length: 25
  infer_mode: False
  use_space_char: False
--- a/configs/rec/rec_mv3_tps_bilstm_att.yml
+++ b/configs/rec/rec_mv3_tps_bilstm_att.yml
@ -14,8 +14,7 @@ Global:
  use_visualdl: False
  infer_img: doc/imgs_words/ch/word_1.jpg
  # for data or label process
-  character_dict_path: 
-  character_type: en
+  character_dict_path:
  max_text_length: 25
  infer_mode: False
  use_space_char: False
--- a/configs/rec/rec_mv3_tps_bilstm_ctc.yml
+++ b/configs/rec/rec_mv3_tps_bilstm_ctc.yml
@ -15,7 +15,6 @@ Global:
  infer_img: doc/imgs_words_en/word_10.png
  # for data or label process
  character_dict_path:
-  character_type: en
  max_text_length: 25
  infer_mode: False
  use_space_char: False
--- a/configs/rec/rec_r31_sar.yml
+++ b/configs/rec/rec_r31_sar.yml
@ -15,7 +15,6 @@ Global:
  infer_img: 
  # for data or label process
  character_dict_path: ppocr/utils/dict90.txt
-  character_type: EN_symbol
  max_text_length: 30
  infer_mode: False
  use_space_char: False
--- a/configs/rec/rec_r34_vd_none_bilstm_ctc.yml
+++ b/configs/rec/rec_r34_vd_none_bilstm_ctc.yml
@ -14,8 +14,7 @@ Global:
  use_visualdl: False
  infer_img: doc/imgs_words_en/word_10.png
  # for data or label process
-  character_dict_path: 
-  character_type: en
+  character_dict_path:
  max_text_length: 25
  infer_mode: False
  use_space_char: False
--- a/configs/rec/rec_r34_vd_none_none_ctc.yml
+++ b/configs/rec/rec_r34_vd_none_none_ctc.yml
@ -15,7 +15,6 @@ Global:
  infer_img: doc/imgs_words_en/word_10.png
  # for data or label process
  character_dict_path:
-  character_type: en
  max_text_length: 25
  infer_mode: False
  use_space_char: False
--- a/configs/rec/rec_r34_vd_tps_bilstm_att.yml
+++ b/configs/rec/rec_r34_vd_tps_bilstm_att.yml
@ -14,8 +14,7 @@ Global:
  use_visualdl: False
  infer_img: doc/imgs_words/ch/word_1.jpg
  # for data or label process
-  character_dict_path: 
-  character_type: en
+  character_dict_path:
  max_text_length: 25
  infer_mode: False
  use_space_char: False
--- a/configs/rec/rec_r34_vd_tps_bilstm_ctc.yml
+++ b/configs/rec/rec_r34_vd_tps_bilstm_ctc.yml
@ -14,8 +14,7 @@ Global:
  use_visualdl: False
  infer_img: doc/imgs_words_en/word_10.png
  # for data or label process
-  character_dict_path: 
-  character_type: en
+  character_dict_path:
  max_text_length: 25
  infer_mode: False
  use_space_char: False
--- a/configs/rec/rec_r50_fpn_srn.yml
+++ b/configs/rec/rec_r50_fpn_srn.yml
@ -14,8 +14,7 @@ Global:
  use_visualdl: False
  infer_img: doc/imgs_words/ch/word_1.jpg
  # for data or label process
-  character_dict_path: 
-  character_type: en
+  character_dict_path:
  max_text_length: 25
  num_heads: 8
  infer_mode: False
--- a/configs/rec/rec_resnet_stn_bilstm_att.yml
+++ b/configs/rec/rec_resnet_stn_bilstm_att.yml
@ -14,8 +14,7 @@ Global:
  use_visualdl: False
  infer_img: doc/imgs_words_en/word_10.png
  # for data or label process
-  character_dict_path: 
-  character_type: EN_symbol
+  character_dict_path: ppocr/utils/EN_symbol_dict.txt
  max_text_length: 100
  infer_mode: False
  use_space_char: False
--- a/doc/doc_ch/config.md
+++ b/doc/doc_ch/config.md
@ -37,10 +37,9 @@
 |      checkpoints         |    加载模型参数路径            |       None        |    用于中断后加载参数继续训练 |
 |      use_visualdl  |    设置是否启用visualdl进行可视化log展示 |          False        |    [教程地址](https://www.paddlepaddle.org.cn/paddle/visualdl) |
 |      infer_img            |    设置预测图像路径或文件夹路径     |       ./infer_img | \|
-|      character_dict_path |    设置字典路径            |  ./ppocr/utils/ppocr_keys_v1.txt  |    \                 |
+|      character_dict_path |    设置字典路径            |  ./ppocr/utils/ppocr_keys_v1.txt  |    如果为空，则默认使用小写字母+数字作为字典                 |
 |      max_text_length     |    设置文本最大长度        |       25          |                \                 |
-|      character_type      |    设置字符类型            |       ch          |    en/ch, en时将使用默认dict，ch时使用自定义dict|
-|      use_space_char     |    设置是否识别空格             |        True      |          仅在 character_type=ch 时支持空格                 |
+|      use_space_char     |    设置是否识别空格             |        True      |                           |
 |      label_list          |    设置方向分类器支持的角度       |    ['0','180']    |     仅在方向分类器中生效 |
 |      save_res_path          |    设置检测模型的结果保存地址       |    ./output/det_db/predicts_db.txt    |     仅在检测模型中生效 |

@ -177,7 +176,7 @@ PaddleOCR目前已支持80种（除中文外）语种识别，`configs/rec/multi
    --dict {path/of/dict} \             # 字典文件路径
    -o Global.use_gpu=False             # 是否使用gpu
    ...
-    
+
    ```

 意大利文由拉丁字母组成，因此执行完命令后会得到名为 rec_latin_lite_train.yml 的配置文件。
@ -191,38 +190,37 @@ PaddleOCR目前已支持80种（除中文外）语种识别，`configs/rec/multi
      use_gpu: True
      epoch_num: 500
      ...
-      character_type: it  # 需要识别的语种
      character_dict_path:  {path/of/dict} # 字典文件所在路径
-   
+
   Train:
      dataset:
        name: SimpleDataSet
        data_dir: train_data/ # 数据存放根目录
        label_file_list: ["./train_data/train_list.txt"] # 训练集label路径
      ...
-   
+
   Eval:
      dataset:
        name: SimpleDataSet
        data_dir: train_data/ # 数据存放根目录
        label_file_list: ["./train_data/val_list.txt"] # 验证集label路径
      ...
-   
+
   ```

 目前PaddleOCR支持的多语言算法有：

-| 配置文件 |  算法名称 |   backbone |   trans   |   seq      |     pred     |  language | character_type |
-| :--------: |  :-------:   | :-------:  |   :-------:   |   :-----:   |  :-----:   | :-----:  | :-----:  |
-| rec_chinese_cht_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 中文繁体  | chinese_cht|
-| rec_en_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 英语（区分大小写）   | EN |
-| rec_french_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 法语 |  french |
-| rec_ger_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 德语   | german |
-| rec_japan_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 日语  | japan |
-| rec_korean_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 韩语  | korean |
-| rec_latin_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 拉丁字母  | latin |
-| rec_arabic_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 阿拉伯字母 |  ar |
-| rec_cyrillic_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 斯拉夫字母  | cyrillic |
-| rec_devanagari_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 梵文字母  | devanagari |
+| 配置文件 |  算法名称 |   backbone |   trans   |   seq      |     pred     |  language |
+| :--------: |  :-------:   | :-------:  |   :-------:   |   :-----:   |  :-----:   | :-----:  |
+| rec_chinese_cht_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 中文繁体  |
+| rec_en_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 英语（区分大小写）   |
+| rec_french_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 法语 |  
+| rec_ger_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 德语   |
+| rec_japan_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 日语  |
+| rec_korean_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 韩语  |
+| rec_latin_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 拉丁字母  |
+| rec_arabic_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 阿拉伯字母 |  
+| rec_cyrillic_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 斯拉夫字母  |
+| rec_devanagari_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 梵文字母  |

 更多支持语种请参考: [多语言模型](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_ch/multi_languages.md#%E8%AF%AD%E7%A7%8D%E7%BC%A9%E5%86%99)
--- a/doc/doc_ch/inference.md
+++ b/doc/doc_ch/inference.md
@ -273,7 +273,7 @@ python3 tools/export_model.py -c configs/rec/rec_r34_vd_none_bilstm_ctc.yml -o G
 CRNN 文本识别模型推理，可以执行如下命令：

 ```
-python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./inference/rec_crnn/" --rec_image_shape="3, 32, 100" --rec_char_type="en"
+python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./inference/rec_crnn/" --rec_image_shape="3, 32, 100" --rec_char_dict_path="./ppocr/utils/ic15_dict.txt"
 ```

 ![](../imgs_words_en/word_336.png)
@ -288,7 +288,7 @@ Predicts of ./doc/imgs_words_en/word_336.png:('super', 0.9999073)

 - 训练时采用的图像分辨率不同，训练上述模型采用的图像分辨率是[3，32，100]，而中文模型训练时，为了保证长文本的识别效果，训练时采用的图像分辨率是[3, 32, 320]。预测推理程序默认的的形状参数是训练中文采用的图像分辨率，即[3, 32, 320]。因此，这里推理上述英文模型时，需要通过参数rec_image_shape设置识别图像的形状。

- 字符列表，DTRB论文中实验只是针对26个小写英文本母和10个数字进行实验，总共36个字符。所有大小字符都转成了小写字符，不在上面列表的字符都忽略，认为是空格。因此这里没有输入字符字典，而是通过如下命令生成字典.因此在推理时需要设置参数rec_char_type，指定为英文"en"。
+- 字符列表，DTRB论文中实验只是针对26个小写英文本母和10个数字进行实验，总共36个字符。所有大小字符都转成了小写字符，不在上面列表的字符都忽略，认为是空格。因此这里没有输入字符字典，而是通过如下命令生成字典.因此在推理时需要设置参数rec_char_dict_path，指定为英文字典"./ppocr/utils/ic15_dict.txt"。

 ```
 self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
@ -303,15 +303,15 @@ dict_character = list(self.character_str)
 python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" \
                                   --rec_model_dir="./inference/srn/" \
                                   --rec_image_shape="1, 64, 256" \
-                                   --rec_char_type="en" \
+                                   --rec_char_dict_path="./ppocr/utils/ic15_dict.txt" \
                                   --rec_algorithm="SRN"
 ```

 ### 4. 自定义文本识别字典的推理
-如果训练时修改了文本的字典，在使用inference模型预测时，需要通过`--rec_char_dict_path`指定使用的字典路径，并且设置 `rec_char_type=ch`
+如果训练时修改了文本的字典，在使用inference模型预测时，需要通过`--rec_char_dict_path`指定使用的字典路径

 ```
-python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./your inference model" --rec_image_shape="3, 32, 100" --rec_char_type="ch" --rec_char_dict_path="your text dict path"
+python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./your inference model" --rec_image_shape="3, 32, 100"  --rec_char_dict_path="your text dict path"
 ```

 <a name="多语言模型的推理"></a>
@ -320,7 +320,7 @@ python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png
 需要通过 `--vis_font_path` 指定可视化的字体路径，`doc/fonts/` 路径下有默认提供的小语种字体，例如韩文识别：

 ```
-python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/korean/1.jpg" --rec_model_dir="./your inference model" --rec_char_type="korean" --rec_char_dict_path="ppocr/utils/dict/korean_dict.txt" --vis_font_path="doc/fonts/korean.ttf"
+python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/korean/1.jpg" --rec_model_dir="./your inference model"  --rec_char_dict_path="ppocr/utils/dict/korean_dict.txt" --vis_font_path="doc/fonts/korean.ttf"
 ```
 ![](../imgs_words/korean/1.jpg)

@ -388,7 +388,7 @@ python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --de
 下面给出基于EAST文本检测和STAR-Net文本识别执行命令：

 ```
-python3 tools/infer/predict_system.py --image_dir="./doc/imgs_en/img_10.jpg" --det_model_dir="./inference/det_east/" --det_algorithm="EAST" --rec_model_dir="./inference/starnet/" --rec_image_shape="3, 32, 100" --rec_char_type="en"
+python3 tools/infer/predict_system.py --image_dir="./doc/imgs_en/img_10.jpg" --det_model_dir="./inference/det_east/" --det_algorithm="EAST" --rec_model_dir="./inference/starnet/" --rec_image_shape="3, 32, 100" --rec_char_dict_path="./ppocr/utils/ic15_dict.txt"
 ```

 执行命令后，识别结果图像如下：
--- a/doc/doc_ch/recognition.md
+++ b/doc/doc_ch/recognition.md
@ -159,7 +159,6 @@ PaddleOCR内置了一部分字典，可以按需使用。
 - 自定义字典

 如需自定义dic文件，请在 `configs/rec/rec_icdar15_train.yml` 中添加 `character_dict_path` 字段, 指向您的字典路径。
-并将 `character_type` 设置为 `ch`。

 <a name="支持空格"></a>
 ### 1.4 添加空格类别
@ -246,8 +245,6 @@ Global:
  ...
  # 添加自定义字典，如修改字典请将路径指向新字典
  character_dict_path: ppocr/utils/ppocr_keys_v1.txt
-  # 修改字符类型
-  character_type: ch
  ...
  # 识别空格
  use_space_char: True
@ -311,18 +308,18 @@ PaddleOCR目前已支持80种（除中文外）语种识别，`configs/rec/multi

 按语系划分，目前PaddleOCR支持的语种有：

-| 配置文件 |  算法名称 |   backbone |   trans   |   seq      |     pred     |  language | character_type |
-| :--------: |  :-------:   | :-------:  |   :-------:   |   :-----:   |  :-----:   | :-----:  | :-----:  |
-| rec_chinese_cht_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 中文繁体  | chinese_cht|
-| rec_en_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 英语（区分大小写）   | EN |
-| rec_french_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 法语 |  french |
-| rec_ger_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 德语   | german |
-| rec_japan_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 日语  | japan |
-| rec_korean_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 韩语  | korean |
-| rec_latin_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 拉丁字母  | latin |
-| rec_arabic_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 阿拉伯字母 |  ar |
-| rec_cyrillic_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 斯拉夫字母  | cyrillic |
-| rec_devanagari_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 梵文字母  | devanagari |
+| 配置文件 |  算法名称 |   backbone |   trans   |   seq      |     pred     |  language |
+| :--------: |  :-------:   | :-------:  |   :-------:   |   :-----:   |  :-----:   | :-----:  |
+| rec_chinese_cht_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 中文繁体  |
+| rec_en_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 英语（区分大小写）   |
+| rec_french_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 法语 |  
+| rec_ger_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 德语   |
+| rec_japan_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 日语  |
+| rec_korean_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 韩语  |
+| rec_latin_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 拉丁字母  |
+| rec_arabic_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 阿拉伯字母 |
+| rec_cyrillic_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 斯拉夫字母  |
+| rec_devanagari_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 梵文字母  |

 更多支持语种请参考: [多语言模型](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_ch/multi_languages.md#%E8%AF%AD%E7%A7%8D%E7%BC%A9%E5%86%99)

--- a/doc/doc_en/config_en.md
+++ b/doc/doc_en/config_en.md
@ -1,4 +1,4 @@
-# Configuration 
+# Configuration

 - [1. Optional Parameter List](#1-optional-parameter-list)
 - [2. Intorduction to Global Parameters of Configuration File](#2-intorduction-to-global-parameters-of-configuration-file)
@ -37,9 +37,8 @@ Take rec_chinese_lite_train_v2.0.yml as an example
 |      checkpoints         |    set model parameter path            |       None        |   Used to load parameters after interruption to continue training|
 |      use_visualdl  |    Set whether to enable visualdl for visual log display |          False        |    [Tutorial](https://www.paddlepaddle.org.cn/paddle/visualdl) |
 |      infer_img            |    Set inference image path or folder path     |       ./infer_img | \|
-|      character_dict_path |    Set dictionary path            |  ./ppocr/utils/ppocr_keys_v1.txt  |    \                 |
+|      character_dict_path |    Set dictionary path            |  ./ppocr/utils/ppocr_keys_v1.txt  |    If the character_dict_path is None, model can only recognize number and lower letters                 |
 |      max_text_length     |    Set the maximum length of text        |       25          |                \                 |
-|      character_type      |    Set character type            |       ch          |    en/ch, the default dict will be used for en, and the custom dict will be used for ch |
 |      use_space_char     |    Set whether to recognize spaces             |        True      |          Only support in character_type=ch mode                 |
 |      label_list          |    Set the angle supported by the direction classifier       |    ['0','180']    |     Only valid in angle classifier model |
 |      save_res_path          |    Set the save address of the test model results       |    ./output/det_db/predicts_db.txt    |     Only valid in the text detection model |
@ -196,40 +195,39 @@ Italian is made up of Latin letters, so after executing the command, you will ge
      use_gpu: True
      epoch_num: 500
      ...
-      character_type: it  # language
      character_dict_path:  {path/of/dict} # path of dict
-   
+
   Train:
      dataset:
        name: SimpleDataSet
        data_dir: train_data/ # root directory of training data
        label_file_list: ["./train_data/train_list.txt"] # train label path
      ...
-   
+
   Eval:
      dataset:
        name: SimpleDataSet
        data_dir: train_data/ # root directory of val data
        label_file_list: ["./train_data/val_list.txt"] # val label path
      ...
-   
+
   ```


 Currently, the multi-language algorithms supported by PaddleOCR are:

-| Configuration file |  Algorithm name |   backbone |   trans   |   seq      |     pred     |  language | character_type |
-| :--------: |  :-------:   | :-------:  |   :-------:   |   :-----:   |  :-----:   | :-----:  | :-----:  |
-| rec_chinese_cht_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | chinese traditional  | chinese_cht|
-| rec_en_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | English(Case sensitive)   | EN |
-| rec_french_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | French |  french |
-| rec_ger_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | German   | german |
-| rec_japan_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | Japanese | japan |
-| rec_korean_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | Korean  | korean |
-| rec_latin_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | Latin  | latin |
-| rec_arabic_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | arabic |  ar |
-| rec_cyrillic_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | cyrillic   | cyrillic |
-| rec_devanagari_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | devanagari  | devanagari |
+| Configuration file |  Algorithm name |   backbone |   trans   |   seq      |     pred     |  language |
+| :--------: |  :-------:   | :-------:  |   :-------:   |   :-----:   |  :-----:   | :-----:  |
+| rec_chinese_cht_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | chinese traditional  |
+| rec_en_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | English(Case sensitive)   |
+| rec_french_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | French |  
+| rec_ger_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | German   |
+| rec_japan_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | Japanese |
+| rec_korean_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | Korean  |
+| rec_latin_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | Latin  |
+| rec_arabic_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | arabic |  
+| rec_cyrillic_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | cyrillic   |
+| rec_devanagari_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | devanagari  |

 For more supported languages, please refer to : [Multi-language model](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_en/multi_languages_en.md#4-support-languages-and-abbreviations)

--- a/doc/doc_en/inference_en.md
+++ b/doc/doc_en/inference_en.md
@ -21,7 +21,7 @@ Next, we first introduce how to convert a trained model into an inference model,
    - [2.2 DB Text Detection Model Inference](#DB_DETECTION)
    - [2.3 East Text Detection Model Inference](#EAST_DETECTION)
    - [2.4 Sast Text Detection Model Inference](#SAST_DETECTION)
-    
+
 - [3. Text Recognition Model Inference](#RECOGNITION_MODEL_INFERENCE)
    - [3.1 Lightweight Chinese Text Recognition Model Reference](#LIGHTWEIGHT_RECOGNITION)
    - [3.2 CTC-Based Text Recognition Model Inference](#CTC-BASED_RECOGNITION)
@ -281,7 +281,7 @@ python3 tools/export_model.py -c configs/det/rec_r34_vd_none_bilstm_ctc.yml -o G
 For CRNN text recognition model inference, execute the following commands:

 ```
-python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./inference/starnet/" --rec_image_shape="3, 32, 100" --rec_char_type="en"
+python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./inference/starnet/" --rec_image_shape="3, 32, 100" --rec_char_dict_path="./ppocr/utils/ic15_dict.txt"
 ```

 ![](../imgs_words_en/word_336.png)
@ -314,7 +314,7 @@ with the training, such as: --rec_image_shape="1, 64, 256"
 python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" \
                                    --rec_model_dir="./inference/srn/" \
                                    --rec_image_shape="1, 64, 256" \
-                                    --rec_char_type="en" \
+                                    --rec_char_dict_path="./ppocr/utils/ic15_dict.txt" \
                                    --rec_algorithm="SRN"
 ```

@ -323,7 +323,7 @@ python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png
 If the text dictionary is modified during training, when using the inference model to predict, you need to specify the dictionary path used by `--rec_char_dict_path`, and set `rec_char_type=ch`

 ```
-python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./your inference model" --rec_image_shape="3, 32, 100" --rec_char_type="ch" --rec_char_dict_path="your text dict path"
+python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./your inference model" --rec_image_shape="3, 32, 100"  --rec_char_dict_path="your text dict path"
 ```

 <a name="MULTILINGUAL_MODEL_INFERENCE"></a>
@ -333,7 +333,7 @@ If you need to predict other language models, when using inference model predict
 You need to specify the visual font path through `--vis_font_path`. There are small language fonts provided by default under the `doc/fonts` path, such as Korean recognition:

 ```
-python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/korean/1.jpg" --rec_model_dir="./your inference model" --rec_char_type="korean" --rec_char_dict_path="ppocr/utils/dict/korean_dict.txt" --vis_font_path="doc/fonts/korean.ttf"
+python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/korean/1.jpg" --rec_model_dir="./your inference model"  --rec_char_dict_path="ppocr/utils/dict/korean_dict.txt" --vis_font_path="doc/fonts/korean.ttf"
 ```
 ![](../imgs_words/korean/1.jpg)

@ -399,7 +399,7 @@ If you want to try other detection algorithms or recognition algorithms, please
 The following command uses the combination of the EAST text detection and STAR-Net text recognition:

 ```
-python3 tools/infer/predict_system.py --image_dir="./doc/imgs_en/img_10.jpg" --det_model_dir="./inference/det_east/" --det_algorithm="EAST" --rec_model_dir="./inference/starnet/" --rec_image_shape="3, 32, 100" --rec_char_type="en"
+python3 tools/infer/predict_system.py --image_dir="./doc/imgs_en/img_10.jpg" --det_model_dir="./inference/det_east/" --det_algorithm="EAST" --rec_model_dir="./inference/starnet/" --rec_image_shape="3, 32, 100" --rec_char_dict_path="./ppocr/utils/ic15_dict.txt"
 ```

 After executing the command, the recognition result image is as follows:
--- a/doc/doc_en/recognition_en.md
+++ b/doc/doc_en/recognition_en.md
@ -161,7 +161,7 @@ The current multi-language model is still in the demo stage and will continue to
 If you like, you can submit the dictionary file to [dict](../../ppocr/utils/dict) and we will thank you in the Repo.


-To customize the dict file, please modify the `character_dict_path` field in `configs/rec/rec_icdar15_train.yml` and set `character_type` to `ch`.
+To customize the dict file, please modify the `character_dict_path` field in `configs/rec/rec_icdar15_train.yml` .

 - Custom dictionary

@ -172,8 +172,6 @@ If you need to customize dic file, please add character_dict_path field in confi

 If you want to support the recognition of the `space` category, please set the `use_space_char` field in the yml file to `True`.

-**Note: use_space_char only takes effect when character_type=ch**
-
 <a name="TRAINING"></a>
 ## 2.Training

@ -250,7 +248,6 @@ Global:
  # Add a custom dictionary, such as modify the dictionary, please point the path to the new dictionary
  character_dict_path: ppocr/utils/ppocr_keys_v1.txt
  # Modify character type
-  character_type: ch
  ...
  # Whether to recognize spaces
  use_space_char: True
@ -312,18 +309,18 @@ Eval:

 Currently, the multi-language algorithms supported by PaddleOCR are:

-| Configuration file |  Algorithm name |   backbone |   trans   |   seq      |     pred     |  language | character_type |
-| :--------: |  :-------:   | :-------:  |   :-------:   |   :-----:   |  :-----:   | :-----:  | :-----:  |
-| rec_chinese_cht_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | chinese traditional  | chinese_cht|
-| rec_en_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | English(Case sensitive)   | EN |
-| rec_french_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | French |  french |
-| rec_ger_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | German   | german |
-| rec_japan_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | Japanese | japan |
-| rec_korean_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | Korean  | korean |
-| rec_latin_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | Latin  | latin |
-| rec_arabic_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | arabic |  ar |
-| rec_cyrillic_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | cyrillic   | cyrillic |
-| rec_devanagari_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | devanagari  | devanagari |
+| Configuration file |  Algorithm name |   backbone |   trans   |   seq      |     pred     |  language |
+| :--------: |  :-------:   | :-------:  |   :-------:   |   :-----:   |  :-----:   | :-----:  |
+| rec_chinese_cht_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | chinese traditional  |
+| rec_en_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | English(Case sensitive)   |
+| rec_french_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | French |  
+| rec_ger_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | German   |
+| rec_japan_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | Japanese |
+| rec_korean_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | Korean  |
+| rec_latin_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | Latin  |
+| rec_arabic_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | arabic |
+| rec_cyrillic_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | cyrillic   |
+| rec_devanagari_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | devanagari  |

 For more supported languages, please refer to : [Multi-language model](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_en/multi_languages_en.md#4-support-languages-and-abbreviations)

@ -471,6 +468,3 @@ inference/det_db/
  ```
  python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./your inference model" --rec_image_shape="3, 32, 100" --rec_char_type="ch" --rec_char_dict_path="your text dict path"
  ```
-
-  
-
--- a/ppocr/data/imaug/label_ops.py
+++ b/ppocr/data/imaug/label_ops.py
@ -21,6 +21,8 @@ import numpy as np
 import string
 import json

+from ppocr.utils.logging import get_logger
+

 class ClsLabelEncode(object):
    def __init__(self, label_list, **kwargs):
@ -92,31 +94,23 @@ class BaseRecLabelEncode(object):
    def __init__(self,
                 max_text_length,
                 character_dict_path=None,
-                 character_type='ch',
                 use_space_char=False):
-        support_character_type = [
-            'ch', 'en', 'EN_symbol', 'french', 'german', 'japan', 'korean',
-            'EN', 'it', 'xi', 'pu', 'ru', 'ar', 'ta', 'ug', 'fa', 'ur', 'rs',
-            'oc', 'rsc', 'bg', 'uk', 'be', 'te', 'ka', 'chinese_cht', 'hi',
-            'mr', 'ne', 'latin', 'arabic', 'cyrillic', 'devanagari'
-        ]
-        assert character_type in support_character_type, "Only {} are supported now but get {}".format(
-            support_character_type, character_type)

        self.max_text_len = max_text_length
        self.beg_str = "sos"
        self.end_str = "eos"
-        if character_type == "en":
+        self.lower = False
+
+        if character_dict_path is None:
+            logger = get_logger()
+            logger.warning(
+                "The character_dict_path is None, model can only recognize number and lower letters"
+            )
            self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
            dict_character = list(self.character_str)
-        elif character_type == "EN_symbol":
-            # same with ASTER setting (use 94 char).
-            self.character_str = string.printable[:-6]
-            dict_character = list(self.character_str)
-        elif character_type in support_character_type:
+            self.lower = True
+        else:
            self.character_str = ""
-            assert character_dict_path is not None, "character_dict_path should not be None when character_type is {}".format(
-                character_type)
            with open(character_dict_path, "rb") as fin:
                lines = fin.readlines()
                for line in lines:
@ -125,7 +119,6 @@ class BaseRecLabelEncode(object):
            if use_space_char:
                self.character_str += " "
            dict_character = list(self.character_str)
-        self.character_type = character_type
        dict_character = self.add_special_char(dict_character)
        self.dict = {}
        for i, char in enumerate(dict_character):
@ -147,7 +140,7 @@ class BaseRecLabelEncode(object):
        """
        if len(text) == 0 or len(text) > self.max_text_len:
            return None
-        if self.character_type == "en":
+        if self.lower:
            text = text.lower()
        text_list = []
        for char in text:
@ -167,13 +160,11 @@ class NRTRLabelEncode(BaseRecLabelEncode):
    def __init__(self,
                 max_text_length,
                 character_dict_path=None,
-                 character_type='EN_symbol',
                 use_space_char=False,
                 **kwargs):

-        super(NRTRLabelEncode,
-              self).__init__(max_text_length, character_dict_path,
-                             character_type, use_space_char)
+        super(NRTRLabelEncode, self).__init__(
+            max_text_length, character_dict_path, use_space_char)

    def __call__(self, data):
        text = data['label']
@ -200,12 +191,10 @@ class CTCLabelEncode(BaseRecLabelEncode):
    def __init__(self,
                 max_text_length,
                 character_dict_path=None,
-                 character_type='ch',
                 use_space_char=False,
                 **kwargs):
-        super(CTCLabelEncode,
-              self).__init__(max_text_length, character_dict_path,
-                             character_type, use_space_char)
+        super(CTCLabelEncode, self).__init__(
+            max_text_length, character_dict_path, use_space_char)

    def __call__(self, data):
        text = data['label']
@ -231,12 +220,10 @@ class E2ELabelEncodeTest(BaseRecLabelEncode):
    def __init__(self,
                 max_text_length,
                 character_dict_path=None,
-                 character_type='EN',
                 use_space_char=False,
                 **kwargs):
-        super(E2ELabelEncodeTest,
-              self).__init__(max_text_length, character_dict_path,
-                             character_type, use_space_char)
+        super(E2ELabelEncodeTest, self).__init__(
+            max_text_length, character_dict_path, use_space_char)

    def __call__(self, data):
        import json
@ -305,12 +292,10 @@ class AttnLabelEncode(BaseRecLabelEncode):
    def __init__(self,
                 max_text_length,
                 character_dict_path=None,
-                 character_type='ch',
                 use_space_char=False,
                 **kwargs):
-        super(AttnLabelEncode,
-              self).__init__(max_text_length, character_dict_path,
-                             character_type, use_space_char)
+        super(AttnLabelEncode, self).__init__(
+            max_text_length, character_dict_path, use_space_char)

    def add_special_char(self, dict_character):
        self.beg_str = "sos"
@ -353,12 +338,10 @@ class SEEDLabelEncode(BaseRecLabelEncode):
    def __init__(self,
                 max_text_length,
                 character_dict_path=None,
-                 character_type='ch',
                 use_space_char=False,
                 **kwargs):
-        super(SEEDLabelEncode,
-              self).__init__(max_text_length, character_dict_path,
-                             character_type, use_space_char)
+        super(SEEDLabelEncode, self).__init__(
+            max_text_length, character_dict_path, use_space_char)

    def add_special_char(self, dict_character):
        self.end_str = "eos"
@ -385,12 +368,10 @@ class SRNLabelEncode(BaseRecLabelEncode):
    def __init__(self,
                 max_text_length=25,
                 character_dict_path=None,
-                 character_type='en',
                 use_space_char=False,
                 **kwargs):
-        super(SRNLabelEncode,
-              self).__init__(max_text_length, character_dict_path,
-                             character_type, use_space_char)
+        super(SRNLabelEncode, self).__init__(
+            max_text_length, character_dict_path, use_space_char)

    def add_special_char(self, dict_character):
        dict_character = dict_character + [self.beg_str, self.end_str]
@ -598,12 +579,10 @@ class SARLabelEncode(BaseRecLabelEncode):
    def __init__(self,
                 max_text_length,
                 character_dict_path=None,
-                 character_type='ch',
                 use_space_char=False,
                 **kwargs):
-        super(SARLabelEncode,
-              self).__init__(max_text_length, character_dict_path,
-                             character_type, use_space_char)
+        super(SARLabelEncode, self).__init__(
+            max_text_length, character_dict_path, use_space_char)

    def add_special_char(self, dict_character):
        beg_end_str = "<BOS/EOS>"
--- a/ppocr/data/imaug/rec_img_aug.py
+++ b/ppocr/data/imaug/rec_img_aug.py
@ -87,17 +87,17 @@ class RecResizeImg(object):
    def __init__(self,
                 image_shape,
                 infer_mode=False,
-                 character_type='ch',
+                 character_dict_path='./ppocr/utils/ppocr_keys_v1.txt',
                 padding=True,
                 **kwargs):
        self.image_shape = image_shape
        self.infer_mode = infer_mode
-        self.character_type = character_type
+        self.character_dict_path = character_dict_path
        self.padding = padding

    def __call__(self, data):
        img = data['image']
-        if self.infer_mode and self.character_type == "ch":
+        if self.infer_mode and self.character_dict_path is not None:
            norm_img = resize_norm_img_chinese(img, self.image_shape)
        else:
            norm_img = resize_norm_img(img, self.image_shape, self.padding)
--- a/ppocr/postprocess/rec_postprocess.py
+++ b/ppocr/postprocess/rec_postprocess.py
@ -21,33 +21,15 @@ import re
 class BaseRecLabelDecode(object):
    """ Convert between text-label and text-index """

-    def __init__(self,
-                 character_dict_path=None,
-                 character_type='ch',
-                 use_space_char=False):
-        support_character_type = [
-            'ch', 'en', 'EN_symbol', 'french', 'german', 'japan', 'korean',
-            'it', 'xi', 'pu', 'ru', 'ar', 'ta', 'ug', 'fa', 'ur', 'rs', 'oc',
-            'rsc', 'bg', 'uk', 'be', 'te', 'ka', 'chinese_cht', 'hi', 'mr',
-            'ne', 'EN', 'latin', 'arabic', 'cyrillic', 'devanagari'
-        ]
-        assert character_type in support_character_type, "Only {} are supported now but get {}".format(
-            support_character_type, character_type)
-
+    def __init__(self, character_dict_path=None, use_space_char=False):
        self.beg_str = "sos"
        self.end_str = "eos"

-        if character_type == "en":
+        self.character_str = []
+        if character_dict_path is None:
            self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
            dict_character = list(self.character_str)
-        elif character_type == "EN_symbol":
-            # same with ASTER setting (use 94 char).
-            self.character_str = string.printable[:-6]
-            dict_character = list(self.character_str)
-        elif character_type in support_character_type:
-            self.character_str = []
-            assert character_dict_path is not None, "character_dict_path should not be None when character_type is {}".format(
-                character_type)
+        else:
            with open(character_dict_path, "rb") as fin:
                lines = fin.readlines()
                for line in lines:
@ -57,9 +39,6 @@ class BaseRecLabelDecode(object):
                self.character_str.append(" ")
            dict_character = list(self.character_str)

-        else:
-            raise NotImplementedError
-        self.character_type = character_type
        dict_character = self.add_special_char(dict_character)
        self.dict = {}
        for i, char in enumerate(dict_character):
@ -102,13 +81,10 @@ class BaseRecLabelDecode(object):
 class CTCLabelDecode(BaseRecLabelDecode):
    """ Convert between text-label and text-index """

-    def __init__(self,
-                 character_dict_path=None,
-                 character_type='ch',
-                 use_space_char=False,
+    def __init__(self, character_dict_path=None, use_space_char=False,
                 **kwargs):
        super(CTCLabelDecode, self).__init__(character_dict_path,
-                                             character_type, use_space_char)
+                                             use_space_char)

    def __call__(self, preds, label=None, *args, **kwargs):
        if isinstance(preds, tuple):
@ -136,13 +112,12 @@ class DistillationCTCLabelDecode(CTCLabelDecode):

    def __init__(self,
                 character_dict_path=None,
-                 character_type='ch',
                 use_space_char=False,
                 model_name=["student"],
                 key=None,
                 **kwargs):
-        super(DistillationCTCLabelDecode, self).__init__(
-            character_dict_path, character_type, use_space_char)
+        super(DistillationCTCLabelDecode, self).__init__(character_dict_path,
+                                                         use_space_char)
        if not isinstance(model_name, list):
            model_name = [model_name]
        self.model_name = model_name
@ -162,13 +137,9 @@ class DistillationCTCLabelDecode(CTCLabelDecode):
 class NRTRLabelDecode(BaseRecLabelDecode):
    """ Convert between text-label and text-index """

-    def __init__(self,
-                 character_dict_path=None,
-                 character_type='EN_symbol',
-                 use_space_char=True,
-                 **kwargs):
+    def __init__(self, character_dict_path=None, use_space_char=True, **kwargs):
        super(NRTRLabelDecode, self).__init__(character_dict_path,
-                                              character_type, use_space_char)
+                                              use_space_char)

    def __call__(self, preds, label=None, *args, **kwargs):

@ -230,13 +201,10 @@ class NRTRLabelDecode(BaseRecLabelDecode):
 class AttnLabelDecode(BaseRecLabelDecode):
    """ Convert between text-label and text-index """

-    def __init__(self,
-                 character_dict_path=None,
-                 character_type='ch',
-                 use_space_char=False,
+    def __init__(self, character_dict_path=None, use_space_char=False,
                 **kwargs):
        super(AttnLabelDecode, self).__init__(character_dict_path,
-                                              character_type, use_space_char)
+                                              use_space_char)

    def add_special_char(self, dict_character):
        self.beg_str = "sos"
@ -313,13 +281,10 @@ class AttnLabelDecode(BaseRecLabelDecode):
 class SEEDLabelDecode(BaseRecLabelDecode):
    """ Convert between text-label and text-index """

-    def __init__(self,
-                 character_dict_path=None,
-                 character_type='ch',
-                 use_space_char=False,
+    def __init__(self, character_dict_path=None, use_space_char=False,
                 **kwargs):
        super(SEEDLabelDecode, self).__init__(character_dict_path,
-                                              character_type, use_space_char)
+                                              use_space_char)

    def add_special_char(self, dict_character):
        self.beg_str = "sos"
@ -394,13 +359,10 @@ class SEEDLabelDecode(BaseRecLabelDecode):
 class SRNLabelDecode(BaseRecLabelDecode):
    """ Convert between text-label and text-index """

-    def __init__(self,
-                 character_dict_path=None,
-                 character_type='en',
-                 use_space_char=False,
+    def __init__(self, character_dict_path=None, use_space_char=False,
                 **kwargs):
        super(SRNLabelDecode, self).__init__(character_dict_path,
-                                             character_type, use_space_char)
+                                             use_space_char)
        self.max_text_length = kwargs.get('max_text_length', 25)

    def __call__(self, preds, label=None, *args, **kwargs):
@ -616,13 +578,10 @@ class TableLabelDecode(object):
 class SARLabelDecode(BaseRecLabelDecode):
    """ Convert between text-label and text-index """

-    def __init__(self,
-                 character_dict_path=None,
-                 character_type='ch',
-                 use_space_char=False,
+    def __init__(self, character_dict_path=None, use_space_char=False,
                 **kwargs):
        super(SARLabelDecode, self).__init__(character_dict_path,
-                                             character_type, use_space_char)
+                                             use_space_char)

        self.rm_symbol = kwargs.get('rm_symbol', False)

--- a/ppocr/utils/EN_symbol_dict.txt
+++ b/ppocr/utils/EN_symbol_dict.txt
@ -0,0 +1,94 @@
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+a
+b
+c
+d
+e
+f
+g
+h
+i
+j
+k
+l
+m
+n
+o
+p
+q
+r
+s
+t
+u
+v
+w
+x
+y
+z
+A
+B
+C
+D
+E
+F
+G
+H
+I
+J
+K
+L
+M
+N
+O
+P
+Q
+R
+S
+T
+U
+V
+W
+X
+Y
+Z
+!
+"
+#
+$
+%
+&
+'
+(
+)
+*
+
+,
+-
+.
+/
+:
+;
+<
+=
+>
+?
+@
+[
+\
+]
+^
+_
+`
+{
+|
+}
+~
--- a/tools/infer/predict_cls.py
+++ b/tools/infer/predict_cls.py
@ -131,14 +131,9 @@ def main(args):
        img_list.append(img)
    try:
        img_list, cls_res, predict_time = text_classifier(img_list)
-    except:
+    except Exception as E:
        logger.info(traceback.format_exc())
-        logger.info(
-            "ERROR!!!! \n"
-            "Please read the FAQ：https://github.com/PaddlePaddle/PaddleOCR#faq \n"
-            "If your model has tps module:  "
-            "TPS does not support variable shape.\n"
-            "Please set --rec_image_shape='3,32,100' and --rec_char_type='en' ")
+        logger.info(E)
        exit()
    for ino in range(len(img_list)):
        logger.info("Predicts of {}:{}".format(valid_image_file_list[ino],
--- a/tools/infer/predict_rec.py
+++ b/tools/infer/predict_rec.py
@ -38,40 +38,34 @@ logger = get_logger()
 class TextRecognizer(object):
    def __init__(self, args):
        self.rec_image_shape = [int(v) for v in args.rec_image_shape.split(",")]
-        self.character_type = args.rec_char_type
        self.rec_batch_num = args.rec_batch_num
        self.rec_algorithm = args.rec_algorithm
        postprocess_params = {
            'name': 'CTCLabelDecode',
-            "character_type": args.rec_char_type,
            "character_dict_path": args.rec_char_dict_path,
            "use_space_char": args.use_space_char
        }
        if self.rec_algorithm == "SRN":
            postprocess_params = {
                'name': 'SRNLabelDecode',
-                "character_type": args.rec_char_type,
                "character_dict_path": args.rec_char_dict_path,
                "use_space_char": args.use_space_char
            }
        elif self.rec_algorithm == "RARE":
            postprocess_params = {
                'name': 'AttnLabelDecode',
-                "character_type": args.rec_char_type,
                "character_dict_path": args.rec_char_dict_path,
                "use_space_char": args.use_space_char
            }
        elif self.rec_algorithm == 'NRTR':
            postprocess_params = {
                'name': 'NRTRLabelDecode',
-                "character_type": args.rec_char_type,
                "character_dict_path": args.rec_char_dict_path,
                "use_space_char": args.use_space_char
            }
        elif self.rec_algorithm == "SAR":
            postprocess_params = {
                'name': 'SARLabelDecode',
-                "character_type": args.rec_char_type,
                "character_dict_path": args.rec_char_dict_path,
                "use_space_char": args.use_space_char
            }
--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
@ -74,7 +74,6 @@ def init_args():
    parser.add_argument("--rec_algorithm", type=str, default='CRNN')
    parser.add_argument("--rec_model_dir", type=str)
    parser.add_argument("--rec_image_shape", type=str, default="3, 32, 320")
-    parser.add_argument("--rec_char_type", type=str, default='ch')
    parser.add_argument("--rec_batch_num", type=int, default=6)
    parser.add_argument("--max_text_length", type=int, default=25)
    parser.add_argument(