mirror of
https://github.com/PaddlePaddle/PaddleOCR.git
synced 2025-06-03 21:53:39 +08:00
add support for svtr static training (#6328)
This commit is contained in:
parent
8077020e86
commit
2e80aab477
@ -40,11 +40,29 @@ def apply_to_static(model, config, logger):
|
|||||||
return model
|
return model
|
||||||
assert "image_shape" in config[
|
assert "image_shape" in config[
|
||||||
"Global"], "image_shape must be assigned for static training mode..."
|
"Global"], "image_shape must be assigned for static training mode..."
|
||||||
supported_list = ["DB"]
|
supported_list = ["DB", "SVTR"]
|
||||||
assert config["Architecture"][
|
if config["Architecture"]["algorithm"] in ["Distillation"]:
|
||||||
"algorithm"] in supported_list, f"algorithms that supports static training must in in {supported_list} but got {config['Architecture']['algorithm']}"
|
algo = list(config["Architecture"]["Models"].values())[0]["algorithm"]
|
||||||
|
else:
|
||||||
|
algo = config["Architecture"]["algorithm"]
|
||||||
|
assert algo in supported_list, f"algorithms that supports static training must in in {supported_list} but got {algo}"
|
||||||
|
|
||||||
|
specs = [
|
||||||
|
InputSpec(
|
||||||
|
[None] + config["Global"]["image_shape"], dtype='float32')
|
||||||
|
]
|
||||||
|
|
||||||
|
if algo == "SVTR":
|
||||||
|
specs.append([
|
||||||
|
InputSpec(
|
||||||
|
[None, config["Global"]["max_text_length"]],
|
||||||
|
dtype='int64'), InputSpec(
|
||||||
|
[None, config["Global"]["max_text_length"]], dtype='int64'),
|
||||||
|
InputSpec(
|
||||||
|
[None], dtype='int64'), InputSpec(
|
||||||
|
[None], dtype='float64')
|
||||||
|
])
|
||||||
|
|
||||||
specs = [InputSpec([None] + config["Global"]["image_shape"])]
|
|
||||||
model = to_static(model, input_spec=specs)
|
model = to_static(model, input_spec=specs)
|
||||||
logger.info("Successfully to apply @to_static with specs: {}".format(specs))
|
logger.info("Successfully to apply @to_static with specs: {}".format(specs))
|
||||||
return model
|
return model
|
||||||
|
@ -83,7 +83,7 @@ class SAREncoder(nn.Layer):
|
|||||||
|
|
||||||
def forward(self, feat, img_metas=None):
|
def forward(self, feat, img_metas=None):
|
||||||
if img_metas is not None:
|
if img_metas is not None:
|
||||||
assert len(img_metas[0]) == feat.shape[0]
|
assert len(img_metas[0]) == paddle.shape(feat)[0]
|
||||||
|
|
||||||
valid_ratios = None
|
valid_ratios = None
|
||||||
if img_metas is not None and self.mask:
|
if img_metas is not None and self.mask:
|
||||||
@ -98,9 +98,10 @@ class SAREncoder(nn.Layer):
|
|||||||
|
|
||||||
if valid_ratios is not None:
|
if valid_ratios is not None:
|
||||||
valid_hf = []
|
valid_hf = []
|
||||||
T = holistic_feat.shape[1]
|
T = paddle.shape(holistic_feat)[1]
|
||||||
for i in range(len(valid_ratios)):
|
for i in range(paddle.shape(valid_ratios)[0]):
|
||||||
valid_step = min(T, math.ceil(T * valid_ratios[i])) - 1
|
valid_step = paddle.minimum(
|
||||||
|
T, paddle.ceil(valid_ratios[i] * T).astype('int32')) - 1
|
||||||
valid_hf.append(holistic_feat[i, valid_step, :])
|
valid_hf.append(holistic_feat[i, valid_step, :])
|
||||||
valid_hf = paddle.stack(valid_hf, axis=0)
|
valid_hf = paddle.stack(valid_hf, axis=0)
|
||||||
else:
|
else:
|
||||||
@ -247,13 +248,14 @@ class ParallelSARDecoder(BaseDecoder):
|
|||||||
# bsz * (seq_len + 1) * h * w * attn_size
|
# bsz * (seq_len + 1) * h * w * attn_size
|
||||||
attn_weight = self.conv1x1_2(attn_weight)
|
attn_weight = self.conv1x1_2(attn_weight)
|
||||||
# bsz * (seq_len + 1) * h * w * 1
|
# bsz * (seq_len + 1) * h * w * 1
|
||||||
bsz, T, h, w, c = attn_weight.shape
|
bsz, T, h, w, c = paddle.shape(attn_weight)
|
||||||
assert c == 1
|
assert c == 1
|
||||||
|
|
||||||
if valid_ratios is not None:
|
if valid_ratios is not None:
|
||||||
# cal mask of attention weight
|
# cal mask of attention weight
|
||||||
for i in range(len(valid_ratios)):
|
for i in range(paddle.shape(valid_ratios)[0]):
|
||||||
valid_width = min(w, math.ceil(w * valid_ratios[i]))
|
valid_width = paddle.minimum(
|
||||||
|
w, paddle.ceil(valid_ratios[i] * w).astype("int32"))
|
||||||
if valid_width < w:
|
if valid_width < w:
|
||||||
attn_weight[i, :, :, valid_width:, :] = float('-inf')
|
attn_weight[i, :, :, valid_width:, :] = float('-inf')
|
||||||
|
|
||||||
@ -288,7 +290,7 @@ class ParallelSARDecoder(BaseDecoder):
|
|||||||
img_metas: [label, valid_ratio]
|
img_metas: [label, valid_ratio]
|
||||||
'''
|
'''
|
||||||
if img_metas is not None:
|
if img_metas is not None:
|
||||||
assert len(img_metas[0]) == feat.shape[0]
|
assert paddle.shape(img_metas[0])[0] == paddle.shape(feat)[0]
|
||||||
|
|
||||||
valid_ratios = None
|
valid_ratios = None
|
||||||
if img_metas is not None and self.mask:
|
if img_metas is not None and self.mask:
|
||||||
@ -302,7 +304,6 @@ class ParallelSARDecoder(BaseDecoder):
|
|||||||
# bsz * (seq_len + 1) * C
|
# bsz * (seq_len + 1) * C
|
||||||
out_dec = self._2d_attention(
|
out_dec = self._2d_attention(
|
||||||
in_dec, feat, out_enc, valid_ratios=valid_ratios)
|
in_dec, feat, out_enc, valid_ratios=valid_ratios)
|
||||||
# bsz * (seq_len + 1) * num_classes
|
|
||||||
|
|
||||||
return out_dec[:, 1:, :] # bsz * seq_len * num_classes
|
return out_dec[:, 1:, :] # bsz * seq_len * num_classes
|
||||||
|
|
||||||
@ -395,7 +396,6 @@ class SARHead(nn.Layer):
|
|||||||
|
|
||||||
if self.training:
|
if self.training:
|
||||||
label = targets[0] # label
|
label = targets[0] # label
|
||||||
label = paddle.to_tensor(label, dtype='int64')
|
|
||||||
final_out = self.decoder(
|
final_out = self.decoder(
|
||||||
feat, holistic_feat, label, img_metas=targets)
|
feat, holistic_feat, label, img_metas=targets)
|
||||||
else:
|
else:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user