From 6d2bc9f573736a54965b0b656db9fd8d12a32726 Mon Sep 17 00:00:00 2001 From: liuhongen1234567 <65936492+liuhongen1234567@users.noreply.github.com> Date: Mon, 2 Dec 2024 20:03:12 +0800 Subject: [PATCH] add d2s_train_image_shape for static train (#14312) --- configs/rec/PP-OCRv4/ch_PP-OCRv4_rec.yml | 1 + configs/rec/PP-OCRv4/ch_PP-OCRv4_rec_hgnet.yml | 2 +- configs/rec/SVTRv2/rec_repsvtr_ch.yml | 1 + configs/rec/SVTRv2/rec_svtrv2_ch.yml | 2 +- configs/rec/rec_latex_ocr.yml | 1 + configs/table/SLANet_ch.yml | 1 + ppocr/modeling/heads/rec_nrtr_head.py | 3 ++- 7 files changed, 8 insertions(+), 3 deletions(-) diff --git a/configs/rec/PP-OCRv4/ch_PP-OCRv4_rec.yml b/configs/rec/PP-OCRv4/ch_PP-OCRv4_rec.yml index 827f5eef3..8bdb819b5 100644 --- a/configs/rec/PP-OCRv4/ch_PP-OCRv4_rec.yml +++ b/configs/rec/PP-OCRv4/ch_PP-OCRv4_rec.yml @@ -19,6 +19,7 @@ Global: use_space_char: true distributed: true save_res_path: ./output/rec/predicts_ppocrv3.txt + d2s_train_image_shape: [3, 48, 320] Optimizer: diff --git a/configs/rec/PP-OCRv4/ch_PP-OCRv4_rec_hgnet.yml b/configs/rec/PP-OCRv4/ch_PP-OCRv4_rec_hgnet.yml index dba966e7c..7316dbeba 100644 --- a/configs/rec/PP-OCRv4/ch_PP-OCRv4_rec_hgnet.yml +++ b/configs/rec/PP-OCRv4/ch_PP-OCRv4_rec_hgnet.yml @@ -19,7 +19,7 @@ Global: use_space_char: true distributed: true save_res_path: ./output/rec/predicts_ppocrv3.txt - + d2s_train_image_shape: [3, 48, 320] Optimizer: name: Adam diff --git a/configs/rec/SVTRv2/rec_repsvtr_ch.yml b/configs/rec/SVTRv2/rec_repsvtr_ch.yml index 4fd643bc9..d1f692350 100644 --- a/configs/rec/SVTRv2/rec_repsvtr_ch.yml +++ b/configs/rec/SVTRv2/rec_repsvtr_ch.yml @@ -19,6 +19,7 @@ Global: use_space_char: true distributed: true save_res_path: ./output/rec/predicts_repsvtr.txt + d2s_train_image_shape: [3, 48, 320] Optimizer: name: AdamW diff --git a/configs/rec/SVTRv2/rec_svtrv2_ch.yml b/configs/rec/SVTRv2/rec_svtrv2_ch.yml index 70efe1093..e954ec904 100644 --- a/configs/rec/SVTRv2/rec_svtrv2_ch.yml +++ b/configs/rec/SVTRv2/rec_svtrv2_ch.yml @@ -19,7 +19,7 @@ Global: use_space_char: true distributed: true save_res_path: ./output/rec/predicts_svrtv2.txt - + d2s_train_image_shape: [3, 48, 320] Optimizer: name: AdamW diff --git a/configs/rec/rec_latex_ocr.yml b/configs/rec/rec_latex_ocr.yml index 2c604bf81..f87a10848 100644 --- a/configs/rec/rec_latex_ocr.yml +++ b/configs/rec/rec_latex_ocr.yml @@ -18,6 +18,7 @@ Global: use_space_char: False rec_char_dict_path: ppocr/utils/dict/latex_ocr_tokenizer.json save_res_path: ./output/rec/predicts_latexocr.txt + d2s_train_image_shape: [1,256,256] Optimizer: name: AdamW diff --git a/configs/table/SLANet_ch.yml b/configs/table/SLANet_ch.yml index c16f7efed..70ea66970 100644 --- a/configs/table/SLANet_ch.yml +++ b/configs/table/SLANet_ch.yml @@ -21,6 +21,7 @@ Global: infer_mode: False use_sync_bn: True save_res_path: output/infer + d2s_train_image_shape: [3, 488, 488] Optimizer: name: Adam diff --git a/ppocr/modeling/heads/rec_nrtr_head.py b/ppocr/modeling/heads/rec_nrtr_head.py index ad01438be..423d5a11a 100644 --- a/ppocr/modeling/heads/rec_nrtr_head.py +++ b/ppocr/modeling/heads/rec_nrtr_head.py @@ -355,7 +355,8 @@ class Transformer(nn.Layer): """ mask = paddle.zeros([sz, sz], dtype="float32") mask_inf = paddle.triu( - paddle.full(shape=[sz, sz], dtype="float32", fill_value="-inf"), diagonal=1 + paddle.full(shape=[sz, sz], dtype="float32", fill_value=float("-inf")), + diagonal=1, ) mask = mask + mask_inf return mask.unsqueeze([0, 1])