Global: use_gpu: True epoch_num: 100 log_smooth_window: 20 print_batch_step: 10 save_model_dir: ./output/rec/svtr_cppd_base_ch/ save_epoch_step: 10 # evaluation is run every 2000 iterations after the 0th iteration eval_batch_step: [0, 2000] cal_metric_during_train: True pretrained_model: checkpoints: save_inference_dir: use_visualdl: False infer_img: doc/imgs_words_en/word_10.png # for data or label process character_dict_path: ppocr/utils/ppocr_keys_v1.txt max_text_length: 25 infer_mode: False use_space_char: False save_res_path: ./output/rec/predicts_svtr_cppd_base_ch.txt Optimizer: name: AdamW beta1: 0.9 beta2: 0.99 epsilon: 1.e-8 weight_decay: 0.05 no_weight_decay_name: norm pos_embed char_node_embed pos_node_embed char_pos_embed vis_pos_embed one_dim_param_no_weight_decay: True lr: name: Cosine learning_rate: 0.0005 # 4gpus 128bs warmup_epoch: 5 Architecture: model_type: rec algorithm: CPPD Transform: Backbone: name: SVTRNet img_size: [32, 256] patch_merging: 'Conv' embed_dim: [128, 256, 384] depth: [6, 6, 4] num_heads: [4, 8, 12] mixer: ['Conv','Conv','Conv','Conv','Conv','Conv', 'Conv','Conv', 'Global','Global','Global','Global','Global','Global','Global','Global','Global','Global'] local_mixer: [[5, 5], [5, 5], [5, 5]] last_stage: False prenorm: True Head: name: CPPDHead dim: 384 vis_seq: 128 ch: &ch True Loss: name: CPPDLoss ignore_index: &ignore_index 7000 # must be greater than the number of character classes smoothing: True sideloss_weight: 1.0 PostProcess: name: CPPDLabelDecode Metric: name: RecMetric main_indicator: acc Train: dataset: name: LMDBDataSet data_dir: ./train_data/data_lmdb_release/training/ transforms: - DecodeImage: # load image img_mode: BGR channel_first: False - CPPDLabelEncode: # Class handling label ignore_index: *ignore_index ch: *ch - SVTRRecResizeImg: image_shape: [3, 32, 256] padding: True - KeepKeys: keep_keys: ['image', 'label', 'label_node', 'label_index', 'length'] # dataloader will return list in this order loader: shuffle: True batch_size_per_card: 256 drop_last: True num_workers: 8 Eval: dataset: name: LMDBDataSet data_dir: ./train_data/data_lmdb_release/evaluation/ transforms: - DecodeImage: # load image img_mode: BGR channel_first: False - CPPDLabelEncode: # Class handling label ignore_index: *ignore_index ch: *ch - SVTRRecResizeImg: image_shape: [3, 32, 256] padding: True - KeepKeys: keep_keys: ['image', 'label', 'label_node','label_index','length'] # dataloader will return list in this order loader: shuffle: False drop_last: False batch_size_per_card: 256 num_workers: 2