PaddleOCR/configs/rec/PP-FormuaNet/PP-FormulaNet-S.yaml

118 lines
2.9 KiB
YAML

Global:
model_name: PP-FormulaNet-S # To use static model for inference.
use_gpu: True
epoch_num: 20
log_smooth_window: 10
print_batch_step: 10
save_model_dir: ./output/rec/pp_formulanet_s/
save_epoch_step: 2
# evaluation is run every 179 iterations (1 epoch)(batch_size = 56) # max_seq_len: 1024
eval_batch_step: [0, 179]
cal_metric_during_train: True
pretrained_model:
checkpoints:
save_inference_dir:
use_visualdl: False
infer_img: doc/datasets/pme_demo/0000013.png
infer_mode: False
use_space_char: False
rec_char_dict_path: &rec_char_dict_path ppocr/utils/dict/unimernet_tokenizer
max_new_tokens: &max_new_tokens 1024
input_size: &input_size [384, 384]
save_res_path: ./output/rec/predicts_pp_formulanet_s.txt
allow_resize_largeImg: False
start_ema: True
d2s_train_image_shape: [1,384,384]
Optimizer:
name: AdamW
beta1: 0.9
beta2: 0.999
weight_decay: 0.05
lr:
name: LinearWarmupCosine
learning_rate: 0.0001
Architecture:
model_type: rec
algorithm: PP-FormulaNet-S
in_channels: 3
Transform:
Backbone:
name: PPHGNetV2_B4_Formula
class_num: 1024
Head:
name: PPFormulaNet_Head
max_new_tokens: *max_new_tokens
decoder_start_token_id: 0
decoder_ffn_dim: 1536
decoder_hidden_size: 384
decoder_layers: 2
temperature: 0.2
do_sample: False
top_p: 0.95
encoder_hidden_size: 2048
is_export: False
length_aware: True
use_parallel: True
parallel_step: 3
Loss:
name: PPFormulaNet_S_Loss
parallel_step: 3
PostProcess:
name: UniMERNetDecode
rec_char_dict_path: *rec_char_dict_path
Metric:
name: LaTeXOCRMetric
main_indicator: exp_rate
cal_bleu_score: True
Train:
dataset:
name: SimpleDataSet
data_dir: ./ocr_rec_latexocr_dataset_example
label_file_list: ["./ocr_rec_latexocr_dataset_example/train.txt"]
transforms:
- UniMERNetImgDecode:
input_size: *input_size
- UniMERNetTrainTransform:
- LatexImageFormat:
- UniMERNetLabelEncode:
rec_char_dict_path: *rec_char_dict_path
max_seq_len: *max_new_tokens
- KeepKeys:
keep_keys: ['image', 'label', 'attention_mask']
loader:
shuffle: False
drop_last: False
batch_size_per_card: 14
num_workers: 0
collate_fn: UniMERNetCollator
Eval:
dataset:
name: SimpleDataSet
data_dir: ./ocr_rec_latexocr_dataset_example
label_file_list: ["./ocr_rec_latexocr_dataset_example/val.txt"]
transforms:
- UniMERNetImgDecode:
input_size: *input_size
- UniMERNetTestTransform:
- LatexImageFormat:
- UniMERNetLabelEncode:
max_seq_len: *max_new_tokens
rec_char_dict_path: *rec_char_dict_path
- KeepKeys:
keep_keys: ['image', 'label', 'attention_mask', 'filename']
loader:
shuffle: False
drop_last: False
batch_size_per_card: 30
num_workers: 0
collate_fn: UniMERNetCollator