PaddleOCR/configs/rec/rec_unimernet.yml
2025-03-08 00:24:37 +08:00

115 lines
2.8 KiB
YAML

Global:
use_gpu: True
epoch_num: 40
log_smooth_window: 10
print_batch_step: 10
save_model_dir: ./output/rec/unimernet/
save_epoch_step: 5
# evaluation is run every 37880 iterations after the 0th iteration
eval_batch_step: [0, 37880]
cal_metric_during_train: True
pretrained_model:
checkpoints:
save_inference_dir:
use_visualdl: False
infer_img: doc/datasets/pme_demo/0000013.png
infer_mode: False
use_space_char: False
rec_char_dict_path: &rec_char_dict_path ppocr/utils/dict/unimernet_tokenizer
input_size: &input_size [192, 672]
max_seq_len: &max_seq_len 1024
save_res_path: ./output/rec/predicts_unimernet.txt
allow_resize_largeImg: False
d2s_train_image_shape: [1,192,672]
Optimizer:
name: AdamW
beta1: 0.9
beta2: 0.999
weight_decay: 0.05
lr:
name: LinearWarmupCosine
learning_rate: 1e-4
start_lr: 1e-5
min_lr: 1e-8
warmup_steps: 5000
Architecture:
model_type: rec
algorithm: UniMERNet
in_channels: 3
Transform:
Backbone:
name: DonutSwinModel
hidden_size : 1024
num_layers: 4
num_heads: [4, 8, 16, 32]
add_pooling_layer: True
use_mask_token: False
Head:
name: UniMERNetHead
max_new_tokens: 1536
decoder_start_token_id: 0
temperature: 0.2
do_sample: False
top_p: 0.95
encoder_hidden_size: 1024
is_export: False
length_aware: True
Loss:
name: UniMERNetLoss
PostProcess:
name: UniMERNetDecode
rec_char_dict_path: *rec_char_dict_path
Metric:
name: LaTeXOCRMetric
main_indicator: exp_rate
cal_bleu_score: True
Train:
dataset:
name: SimpleDataSet
data_dir: ./train_data/UniMERNet/
label_file_list: ["./train_data/UniMERNet/train_unimernet_1M.txt"]
transforms:
- UniMERNetImgDecode:
input_size: *input_size
- UniMERNetTrainTransform:
- UniMERNetImageFormat:
- UniMERNetLabelEncode:
rec_char_dict_path: *rec_char_dict_path
max_seq_len: *max_seq_len
- KeepKeys:
keep_keys: ['image', 'label', 'attention_mask']
loader:
shuffle: False
drop_last: False
batch_size_per_card: 7
num_workers: 0
collate_fn: UniMERNetCollator
Eval:
dataset:
name: SimpleDataSet
data_dir: ./train_data/UniMERNet/UniMER-Test/cpe
label_file_list: ["./train_data/UniMERNet/test_unimernet_cpe.txt"]
transforms:
- UniMERNetImgDecode:
input_size: *input_size
- UniMERNetTestTransform:
- UniMERNetImageFormat:
- UniMERNetLabelEncode:
max_seq_len: *max_seq_len
rec_char_dict_path: *rec_char_dict_path
- KeepKeys:
keep_keys: ['image', 'label', 'attention_mask']
loader:
shuffle: False
drop_last: False
batch_size_per_card: 30
num_workers: 0
collate_fn: UniMERNetCollator