PaddleOCR/configs/kie/layoutlm_series/re_layoutxlm_xfund_zh.yml

124 lines
3.2 KiB
YAML
Raw Normal View History

Global:
use_gpu: True
epoch_num: &epoch_num 130
log_smooth_window: 10
print_batch_step: 10
save_model_dir: ./output/re_layoutxlm_xfund_zh
save_epoch_step: 2000
# evaluation is run every 10 iterations after the 0th iteration
2022-01-05 22:35:21 +08:00
eval_batch_step: [ 0, 19 ]
cal_metric_during_train: False
save_inference_dir:
use_visualdl: False
2022-01-07 12:56:45 +08:00
seed: 2022
infer_img: ppstructure/docs/kie/input/zh_val_21.jpg
save_res_path: ./output/re_layoutxlm_xfund_zh/res/
Architecture:
model_type: kie
algorithm: &algorithm "LayoutXLM"
Transform:
Backbone:
name: LayoutXLMForRe
2022-01-06 11:35:30 +08:00
pretrained: True
2022-02-12 15:17:38 +08:00
checkpoints:
Loss:
name: LossFromOutput
key: loss
reduction: mean
Optimizer:
name: AdamW
beta1: 0.9
beta2: 0.999
clip_norm: 10
lr:
2022-02-12 21:35:13 +08:00
learning_rate: 0.00005
warmup_epoch: 10
regularizer:
2022-01-05 22:49:04 +08:00
name: L2
factor: 0.00000
PostProcess:
name: VQAReTokenLayoutLMPostProcess
Metric:
name: VQAReTokenMetric
main_indicator: hmean
Train:
dataset:
name: SimpleDataSet
data_dir: train_data/XFUND/zh_train/image
label_file_list:
- train_data/XFUND/zh_train/train.json
ratio_list: [ 1.0 ]
transforms:
- DecodeImage: # load image
img_mode: RGB
channel_first: False
- VQATokenLabelEncode: # Class handling label
contains_re: True
algorithm: *algorithm
class_path: &class_path train_data/XFUND/class_list_xfun.txt
- VQATokenPad:
max_seq_len: &max_seq_len 512
return_attention_mask: True
- VQAReTokenRelation:
- VQAReTokenChunk:
max_seq_len: *max_seq_len
2022-09-20 22:13:27 +08:00
- TensorizeEntitiesRelations:
- Resize:
size: [224,224]
- NormalizeImage:
scale: 1
mean: [ 123.675, 116.28, 103.53 ]
std: [ 58.395, 57.12, 57.375 ]
order: 'hwc'
- ToCHWImage:
- KeepKeys:
keep_keys: [ 'input_ids', 'bbox','attention_mask', 'token_type_ids', 'image', 'entities', 'relations'] # dataloader will return list in this order
loader:
shuffle: True
drop_last: False
batch_size_per_card: 2
2022-02-12 15:17:38 +08:00
num_workers: 8
Eval:
dataset:
name: SimpleDataSet
data_dir: train_data/XFUND/zh_val/image
label_file_list:
- train_data/XFUND/zh_val/val.json
transforms:
- DecodeImage: # load image
img_mode: RGB
channel_first: False
- VQATokenLabelEncode: # Class handling label
contains_re: True
algorithm: *algorithm
class_path: *class_path
- VQATokenPad:
max_seq_len: *max_seq_len
return_attention_mask: True
- VQAReTokenRelation:
- VQAReTokenChunk:
max_seq_len: *max_seq_len
2022-09-20 22:13:27 +08:00
- TensorizeEntitiesRelations:
- Resize:
size: [224,224]
- NormalizeImage:
scale: 1
mean: [ 123.675, 116.28, 103.53 ]
std: [ 58.395, 57.12, 57.375 ]
order: 'hwc'
- ToCHWImage:
- KeepKeys:
keep_keys: [ 'input_ids', 'bbox', 'attention_mask', 'token_type_ids', 'image', 'entities', 'relations'] # dataloader will return list in this order
loader:
shuffle: False
drop_last: False
batch_size_per_card: 8
2022-02-12 15:17:38 +08:00
num_workers: 8