PaddleOCR/configs/kie/layoutlm_series/re_layoutxlm_xfund_zh.yml

Global:
  use_gpu: True
  epoch_num: &epoch_num 130
  log_smooth_window: 10
  print_batch_step: 10
  save_model_dir: ./output/re_layoutxlm_xfund_zh
  save_epoch_step: 2000
  # evaluation is run every 10 iterations after the 0th iteration
  eval_batch_step: [ 0, 19 ]
  cal_metric_during_train: False
  save_inference_dir:
  use_visualdl: False
  seed: 2022
  infer_img: ppstructure/docs/kie/input/zh_val_21.jpg
  save_res_path: ./output/re_layoutxlm_xfund_zh/res/

Architecture:
  model_type: kie
  algorithm: &algorithm "LayoutXLM"
  Transform:
  Backbone:
    name: LayoutXLMForRe
    pretrained: True
    checkpoints:

Loss:
  name: LossFromOutput
  key: loss
  reduction: mean

Optimizer:
  name: AdamW
  beta1: 0.9
  beta2: 0.999
  clip_norm: 10
  lr:
    learning_rate: 0.00005
    warmup_epoch: 10
  regularizer:
    name: L2
    factor: 0.00000
    
PostProcess:
  name: VQAReTokenLayoutLMPostProcess

Metric:
  name: VQAReTokenMetric
  main_indicator: hmean

Train:
  dataset:
    name: SimpleDataSet
    data_dir: train_data/XFUND/zh_train/image
    label_file_list: 
      - train_data/XFUND/zh_train/train.json
    ratio_list: [ 1.0 ]
    transforms:
      - DecodeImage: # load image
          img_mode: RGB
          channel_first: False
      - VQATokenLabelEncode: # Class handling label
          contains_re: True
          algorithm: *algorithm
          class_path: &class_path train_data/XFUND/class_list_xfun.txt
      - VQATokenPad:
          max_seq_len: &max_seq_len 512
          return_attention_mask: True
      - VQAReTokenRelation:
      - VQAReTokenChunk:
          max_seq_len: *max_seq_len
      - TensorizeEntitiesRelations:
      - Resize:
          size: [224,224]
      - NormalizeImage:
          scale: 1
          mean: [ 123.675, 116.28, 103.53 ]
          std: [ 58.395, 57.12, 57.375 ]
          order: 'hwc'
      - ToCHWImage:
      - KeepKeys:
          keep_keys: [ 'input_ids', 'bbox','attention_mask', 'token_type_ids', 'image', 'entities', 'relations'] # dataloader will return list in this order
  loader:
    shuffle: True
    drop_last: False
    batch_size_per_card: 2
    num_workers: 8

Eval:
  dataset:
    name: SimpleDataSet
    data_dir: train_data/XFUND/zh_val/image
    label_file_list:
      - train_data/XFUND/zh_val/val.json
    transforms:
      - DecodeImage: # load image
          img_mode: RGB
          channel_first: False
      - VQATokenLabelEncode: # Class handling label
          contains_re: True
          algorithm: *algorithm
          class_path: *class_path
      - VQATokenPad:
          max_seq_len: *max_seq_len
          return_attention_mask: True
      - VQAReTokenRelation:
      - VQAReTokenChunk:
          max_seq_len: *max_seq_len
      - TensorizeEntitiesRelations:
      - Resize:
          size: [224,224]
      - NormalizeImage:
          scale: 1
          mean: [ 123.675, 116.28, 103.53 ]
          std: [ 58.395, 57.12, 57.375 ]
          order: 'hwc'
      - ToCHWImage:
      - KeepKeys:
          keep_keys: [ 'input_ids', 'bbox', 'attention_mask', 'token_type_ids', 'image', 'entities', 'relations'] # dataloader will return list in this order
  loader:
    shuffle: False
    drop_last: False
    batch_size_per_card: 8
    num_workers: 8
vqa code integrated into ppocr training system 2022-01-05 19:03:45 +08:00			`Global:`
			`use_gpu: True`
add vqa code (#7096) * add vqa code * add order ocr info * rename tb-yx order * polish configs * add trt offline-tuning * fix seed and remove unused configs 2022-08-06 15:41:20 +08:00			`epoch_num: &epoch_num 130`
vqa code integrated into ppocr training system 2022-01-05 19:03:45 +08:00			`log_smooth_window: 10`
			`print_batch_step: 10`
add vqa code (#7096) * add vqa code * add order ocr info * rename tb-yx order * polish configs * add trt offline-tuning * fix seed and remove unused configs 2022-08-06 15:41:20 +08:00			`save_model_dir: ./output/re_layoutxlm_xfund_zh`
vqa code integrated into ppocr training system 2022-01-05 19:03:45 +08:00			`save_epoch_step: 2000`
			`# evaluation is run every 10 iterations after the 0th iteration`
move imgs to doc 2022-01-05 22:35:21 +08:00			`eval_batch_step: [ 0, 19 ]`
vqa code integrated into ppocr training system 2022-01-05 19:03:45 +08:00			`cal_metric_during_train: False`
			`save_inference_dir:`
			`use_visualdl: False`
add seed 2022-01-07 12:56:45 +08:00			`seed: 2022`
polish kie doc and code (#7255) * add fapiao kie * fix readme * fix fanli * add readme * add how to do kie en * add algo kie * add algo overview en * rename vqa to kie * fix read gif 2022-08-21 10:55:49 +08:00			`infer_img: ppstructure/docs/kie/input/zh_val_21.jpg`
add vqa code (#7096) * add vqa code * add order ocr info * rename tb-yx order * polish configs * add trt offline-tuning * fix seed and remove unused configs 2022-08-06 15:41:20 +08:00			`save_res_path: ./output/re_layoutxlm_xfund_zh/res/`
vqa code integrated into ppocr training system 2022-01-05 19:03:45 +08:00
			`Architecture:`
polish kie doc and code (#7255) * add fapiao kie * fix readme * fix fanli * add readme * add how to do kie en * add algo kie * add algo overview en * rename vqa to kie * fix read gif 2022-08-21 10:55:49 +08:00			`model_type: kie`
vqa code integrated into ppocr training system 2022-01-05 19:03:45 +08:00			`algorithm: &algorithm "LayoutXLM"`
			`Transform:`
			`Backbone:`
			`name: LayoutXLMForRe`
add pretrained params to backbone 2022-01-06 11:35:30 +08:00			`pretrained: True`
add layoutlmv2 2022-02-12 15:17:38 +08:00			`checkpoints:`
vqa code integrated into ppocr training system 2022-01-05 19:03:45 +08:00
			`Loss:`
			`name: LossFromOutput`
			`key: loss`
			`reduction: mean`

			`Optimizer:`
			`name: AdamW`
			`beta1: 0.9`
			`beta2: 0.999`
			`clip_norm: 10`
			`lr:`
add Const lr 2022-02-12 21:35:13 +08:00			`learning_rate: 0.00005`
			`warmup_epoch: 10`
vqa code integrated into ppocr training system 2022-01-05 19:03:45 +08:00			`regularizer:`
rm const decay 2022-01-05 22:49:04 +08:00			`name: L2`
vqa code integrated into ppocr training system 2022-01-05 19:03:45 +08:00			`factor: 0.00000`

			`PostProcess:`
			`name: VQAReTokenLayoutLMPostProcess`

			`Metric:`
			`name: VQAReTokenMetric`
			`main_indicator: hmean`

			`Train:`
			`dataset:`
			`name: SimpleDataSet`
			`data_dir: train_data/XFUND/zh_train/image`
			`label_file_list:`
add dygraph2static support of layoutlm series SER model 2022-07-01 16:52:08 +08:00			`- train_data/XFUND/zh_train/train.json`
vqa code integrated into ppocr training system 2022-01-05 19:03:45 +08:00			`ratio_list: [ 1.0 ]`
			`transforms:`
			`- DecodeImage: # load image`
			`img_mode: RGB`
			`channel_first: False`
			`- VQATokenLabelEncode: # Class handling label`
			`contains_re: True`
			`algorithm: *algorithm`
add dygraph2static support of layoutlm series SER model 2022-07-01 16:52:08 +08:00			`class_path: &class_path train_data/XFUND/class_list_xfun.txt`
vqa code integrated into ppocr training system 2022-01-05 19:03:45 +08:00			`- VQATokenPad:`
			`max_seq_len: &max_seq_len 512`
			`return_attention_mask: True`
			`- VQAReTokenRelation:`
			`- VQAReTokenChunk:`
			`max_seq_len: *max_seq_len`
add re predict 2022-09-20 22:13:27 +08:00			`- TensorizeEntitiesRelations:`
vqa code integrated into ppocr training system 2022-01-05 19:03:45 +08:00			`- Resize:`
			`size: [224,224]`
			`- NormalizeImage:`
			`scale: 1`
			`mean: [ 123.675, 116.28, 103.53 ]`
			`std: [ 58.395, 57.12, 57.375 ]`
			`order: 'hwc'`
			`- ToCHWImage:`
			`- KeepKeys:`
add dygraph2static support of layoutlm series SER model 2022-07-01 16:52:08 +08:00			`keep_keys: [ 'input_ids', 'bbox','attention_mask', 'token_type_ids', 'image', 'entities', 'relations'] # dataloader will return list in this order`
vqa code integrated into ppocr training system 2022-01-05 19:03:45 +08:00			`loader:`
			`shuffle: True`
			`drop_last: False`
add vqa code (#7096) * add vqa code * add order ocr info * rename tb-yx order * polish configs * add trt offline-tuning * fix seed and remove unused configs 2022-08-06 15:41:20 +08:00			`batch_size_per_card: 2`
add layoutlmv2 2022-02-12 15:17:38 +08:00			`num_workers: 8`
vqa code integrated into ppocr training system 2022-01-05 19:03:45 +08:00
			`Eval:`
			`dataset:`
			`name: SimpleDataSet`
			`data_dir: train_data/XFUND/zh_val/image`
			`label_file_list:`
add dygraph2static support of layoutlm series SER model 2022-07-01 16:52:08 +08:00			`- train_data/XFUND/zh_val/val.json`
vqa code integrated into ppocr training system 2022-01-05 19:03:45 +08:00			`transforms:`
			`- DecodeImage: # load image`
			`img_mode: RGB`
			`channel_first: False`
			`- VQATokenLabelEncode: # Class handling label`
			`contains_re: True`
			`algorithm: *algorithm`
			`class_path: *class_path`
			`- VQATokenPad:`
			`max_seq_len: *max_seq_len`
			`return_attention_mask: True`
			`- VQAReTokenRelation:`
			`- VQAReTokenChunk:`
			`max_seq_len: *max_seq_len`
add re predict 2022-09-20 22:13:27 +08:00			`- TensorizeEntitiesRelations:`
vqa code integrated into ppocr training system 2022-01-05 19:03:45 +08:00			`- Resize:`
			`size: [224,224]`
			`- NormalizeImage:`
			`scale: 1`
			`mean: [ 123.675, 116.28, 103.53 ]`
			`std: [ 58.395, 57.12, 57.375 ]`
			`order: 'hwc'`
			`- ToCHWImage:`
			`- KeepKeys:`
add dygraph2static support of layoutlm series SER model 2022-07-01 16:52:08 +08:00			`keep_keys: [ 'input_ids', 'bbox', 'attention_mask', 'token_type_ids', 'image', 'entities', 'relations'] # dataloader will return list in this order`
vqa code integrated into ppocr training system 2022-01-05 19:03:45 +08:00			`loader:`
			`shuffle: False`
			`drop_last: False`
			`batch_size_per_card: 8`
add layoutlmv2 2022-02-12 15:17:38 +08:00			`num_workers: 8`