Merge pull request #7485 from WenmuZhou/table_label

support onnx infer of SLANet
2025-06-03 21:53:39 +08:00 · 2022-09-05 16:36:34 +08:00 · 2022-09-05 16:36:34 +08:00 · cf31719c75
commit cf31719c75
parent e019468040 93d9d68255
4 changed files with 23 additions and 17 deletions
--- a/configs/table/SLANet.yml
+++ b/configs/table/SLANet.yml
@ -12,7 +12,7 @@ Global:
  checkpoints:
  save_inference_dir: ./output/SLANet/infer
  use_visualdl: False
-  infer_img: doc/table/table.jpg
+  infer_img: ppstructure/docs/table/table.jpg
  # for data or label process
  character_dict_path: ppocr/utils/dict/table_structure_dict.txt
  character_type: en
--- a/configs/table/SLANet_ch.yml
+++ b/configs/table/SLANet_ch.yml
@ -12,7 +12,7 @@ Global:
  checkpoints: 
  save_inference_dir: ./output/SLANet_ch/infer
  use_visualdl: False
-  infer_img: doc/table/table.jpg
+  infer_img: ppstructure/docs/table/table.jpg
  # for data or label process
  character_dict_path: ppocr/utils/dict/table_structure_dict_ch.txt
  character_type: en
--- a/ppocr/modeling/heads/table_att_head.py
+++ b/ppocr/modeling/heads/table_att_head.py
@ -166,6 +166,7 @@ class SLAHead(nn.Layer):
        self.max_text_length = max_text_length
        self.emb = self._char_to_onehot
        self.num_embeddings = out_channels
+        self.loc_reg_num = loc_reg_num

        # structure
        self.structure_attention_cell = AttentionGRUCell(
@ -213,15 +214,17 @@ class SLAHead(nn.Layer):
        fea = fea.transpose([0, 2, 1])  # (NTC)(batch, width, channels)

        hidden = paddle.zeros((batch_size, self.hidden_size))
-        structure_preds = []
-        loc_preds = []
+        structure_preds = paddle.zeros((batch_size, self.max_text_length + 1, self.num_embeddings))
+        loc_preds = paddle.zeros((batch_size, self.max_text_length + 1, self.loc_reg_num))
+        structure_preds.stop_gradient = True
+        loc_preds.stop_gradient = True
        if self.training and targets is not None:
            structure = targets[0]
            for i in range(self.max_text_length + 1):
                hidden, structure_step, loc_step = self._decode(structure[:, i],
                                                                fea, hidden)
-                structure_preds.append(structure_step)
-                loc_preds.append(loc_step)
+                structure_preds[:, i, :] = structure_step
+                loc_preds[:, i, :] = loc_step
        else:
            pre_chars = paddle.zeros(shape=[batch_size], dtype="int32")
            max_text_length = paddle.to_tensor(self.max_text_length)
@ -231,10 +234,8 @@ class SLAHead(nn.Layer):
                hidden, structure_step, loc_step = self._decode(pre_chars, fea,
                                                                hidden)
                pre_chars = structure_step.argmax(axis=1, dtype="int32")
-                structure_preds.append(structure_step)
-                loc_preds.append(loc_step)
-        structure_preds = paddle.stack(structure_preds, axis=1)
-        loc_preds = paddle.stack(loc_preds, axis=1)
+                structure_preds[:, i, :] = structure_step
+                loc_preds[:, i, :] = loc_step
        if not self.training:
            structure_preds = F.softmax(structure_preds)
        return {'structure_probs': structure_preds, 'loc_preds': loc_preds}
--- a/ppstructure/table/predict_structure.py
+++ b/ppstructure/table/predict_structure.py
@ -68,6 +68,7 @@ def build_pre_process_list(args):

 class TableStructurer(object):
    def __init__(self, args):
+        self.use_onnx = args.use_onnx
        pre_process_list = build_pre_process_list(args)
        if args.table_algorithm not in ['TableMaster']:
            postprocess_params = {
@ -98,7 +99,11 @@ class TableStructurer(object):
            return None, 0
        img = np.expand_dims(img, axis=0)
        img = img.copy()
-
+        if self.use_onnx:
+            input_dict = {}
+            input_dict[self.input_tensor.name] = img
+            outputs = self.predictor.run(self.output_tensors, input_dict)
+        else:
            self.input_tensor.copy_from_cpu(img)
            self.predictor.run()
            outputs = []