118 lines
2.9 KiB
YAML
118 lines
2.9 KiB
YAML
Global:
|
|
model_name: PP-FormulaNet-S # To use static model for inference.
|
|
use_gpu: True
|
|
epoch_num: 20
|
|
log_smooth_window: 10
|
|
print_batch_step: 10
|
|
save_model_dir: ./output/rec/pp_formulanet_s/
|
|
save_epoch_step: 2
|
|
# evaluation is run every 179 iterations (1 epoch)(batch_size = 56) # max_seq_len: 1024
|
|
eval_batch_step: [0, 179]
|
|
cal_metric_during_train: True
|
|
pretrained_model:
|
|
checkpoints:
|
|
save_inference_dir:
|
|
use_visualdl: False
|
|
infer_img: doc/datasets/pme_demo/0000013.png
|
|
infer_mode: False
|
|
use_space_char: False
|
|
rec_char_dict_path: &rec_char_dict_path ppocr/utils/dict/unimernet_tokenizer
|
|
max_new_tokens: &max_new_tokens 1024
|
|
input_size: &input_size [384, 384]
|
|
save_res_path: ./output/rec/predicts_pp_formulanet_s.txt
|
|
allow_resize_largeImg: False
|
|
start_ema: True
|
|
d2s_train_image_shape: [1,384,384]
|
|
|
|
Optimizer:
|
|
name: AdamW
|
|
beta1: 0.9
|
|
beta2: 0.999
|
|
weight_decay: 0.05
|
|
lr:
|
|
name: LinearWarmupCosine
|
|
learning_rate: 0.0001
|
|
|
|
Architecture:
|
|
model_type: rec
|
|
algorithm: PP-FormulaNet-S
|
|
in_channels: 3
|
|
Transform:
|
|
Backbone:
|
|
name: PPHGNetV2_B4_Formula
|
|
class_num: 1024
|
|
|
|
Head:
|
|
name: PPFormulaNet_Head
|
|
max_new_tokens: *max_new_tokens
|
|
decoder_start_token_id: 0
|
|
decoder_ffn_dim: 1536
|
|
decoder_hidden_size: 384
|
|
decoder_layers: 2
|
|
temperature: 0.2
|
|
do_sample: False
|
|
top_p: 0.95
|
|
encoder_hidden_size: 2048
|
|
is_export: False
|
|
length_aware: True
|
|
use_parallel: True
|
|
parallel_step: 3
|
|
|
|
Loss:
|
|
name: PPFormulaNet_S_Loss
|
|
parallel_step: 3
|
|
|
|
PostProcess:
|
|
name: UniMERNetDecode
|
|
rec_char_dict_path: *rec_char_dict_path
|
|
|
|
Metric:
|
|
name: LaTeXOCRMetric
|
|
main_indicator: exp_rate
|
|
cal_bleu_score: True
|
|
|
|
Train:
|
|
dataset:
|
|
name: SimpleDataSet
|
|
data_dir: ./ocr_rec_latexocr_dataset_example
|
|
label_file_list: ["./ocr_rec_latexocr_dataset_example/train.txt"]
|
|
transforms:
|
|
- UniMERNetImgDecode:
|
|
input_size: *input_size
|
|
- UniMERNetTrainTransform:
|
|
- LatexImageFormat:
|
|
- UniMERNetLabelEncode:
|
|
rec_char_dict_path: *rec_char_dict_path
|
|
max_seq_len: *max_new_tokens
|
|
- KeepKeys:
|
|
keep_keys: ['image', 'label', 'attention_mask']
|
|
|
|
loader:
|
|
shuffle: False
|
|
drop_last: False
|
|
batch_size_per_card: 14
|
|
num_workers: 0
|
|
collate_fn: UniMERNetCollator
|
|
|
|
Eval:
|
|
dataset:
|
|
name: SimpleDataSet
|
|
data_dir: ./ocr_rec_latexocr_dataset_example
|
|
label_file_list: ["./ocr_rec_latexocr_dataset_example/val.txt"]
|
|
transforms:
|
|
- UniMERNetImgDecode:
|
|
input_size: *input_size
|
|
- UniMERNetTestTransform:
|
|
- LatexImageFormat:
|
|
- UniMERNetLabelEncode:
|
|
max_seq_len: *max_new_tokens
|
|
rec_char_dict_path: *rec_char_dict_path
|
|
- KeepKeys:
|
|
keep_keys: ['image', 'label', 'attention_mask', 'filename']
|
|
loader:
|
|
shuffle: False
|
|
drop_last: False
|
|
batch_size_per_card: 30
|
|
num_workers: 0
|
|
collate_fn: UniMERNetCollator
|