modified pr
parent
bbca1e0d66
commit
1cda437c4d
|
@ -11,6 +11,7 @@ inference/
|
|||
inference_results/
|
||||
output/
|
||||
train_data/
|
||||
log/
|
||||
*.DS_Store
|
||||
*.vs
|
||||
*.user
|
||||
|
|
|
@ -61,7 +61,6 @@ Loss:
|
|||
|
||||
PostProcess:
|
||||
name: SPINAttnLabelDecode
|
||||
character_dict_path: ./ppocr/utils/dict/spin_dict.txt
|
||||
use_space_char: False
|
||||
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
|
||||
# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
@ -12,6 +12,7 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
@ -19,6 +20,9 @@ from __future__ import print_function
|
|||
import paddle
|
||||
from paddle import nn
|
||||
|
||||
'''This code is refer from:
|
||||
https://github.com/hikopensource/DAVAR-Lab-OCR
|
||||
'''
|
||||
|
||||
class SPINAttentionLoss(nn.Layer):
|
||||
def __init__(self, reduction='mean', ignore_index=-100, **kwargs):
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
|
||||
# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
@ -80,98 +80,6 @@ class SPINAttentionHead(nn.Layer):
|
|||
return probs
|
||||
|
||||
|
||||
class AttentionGRUCell(nn.Layer):
|
||||
def __init__(self, input_size, hidden_size, num_embeddings, use_gru=False):
|
||||
super(AttentionGRUCell, self).__init__()
|
||||
self.i2h = nn.Linear(input_size, hidden_size, bias_attr=False)
|
||||
self.h2h = nn.Linear(hidden_size, hidden_size)
|
||||
self.score = nn.Linear(hidden_size, 1, bias_attr=False)
|
||||
|
||||
self.rnn = nn.GRUCell(
|
||||
input_size=input_size + num_embeddings, hidden_size=hidden_size)
|
||||
|
||||
self.hidden_size = hidden_size
|
||||
|
||||
def forward(self, prev_hidden, batch_H, char_onehots):
|
||||
|
||||
batch_H_proj = self.i2h(batch_H)
|
||||
prev_hidden_proj = paddle.unsqueeze(self.h2h(prev_hidden), axis=1)
|
||||
|
||||
res = paddle.add(batch_H_proj, prev_hidden_proj)
|
||||
res = paddle.tanh(res)
|
||||
e = self.score(res)
|
||||
|
||||
alpha = F.softmax(e, axis=1)
|
||||
alpha = paddle.transpose(alpha, [0, 2, 1])
|
||||
context = paddle.squeeze(paddle.mm(alpha, batch_H), axis=1)
|
||||
concat_context = paddle.concat([context, char_onehots], 1)
|
||||
|
||||
cur_hidden = self.rnn(concat_context, prev_hidden)
|
||||
|
||||
return cur_hidden, alpha
|
||||
|
||||
|
||||
class AttentionLSTM(nn.Layer):
|
||||
def __init__(self, in_channels, out_channels, hidden_size, **kwargs):
|
||||
super(AttentionLSTM, self).__init__()
|
||||
self.input_size = in_channels
|
||||
self.hidden_size = hidden_size
|
||||
self.num_classes = out_channels
|
||||
|
||||
self.attention_cell = AttentionLSTMCell(
|
||||
in_channels, hidden_size, out_channels, use_gru=False)
|
||||
self.generator = nn.Linear(hidden_size, out_channels)
|
||||
|
||||
def _char_to_onehot(self, input_char, onehot_dim):
|
||||
input_ont_hot = F.one_hot(input_char, onehot_dim)
|
||||
return input_ont_hot
|
||||
|
||||
def forward(self, inputs, targets=None, batch_max_length=25):
|
||||
batch_size = inputs.shape[0]
|
||||
num_steps = batch_max_length
|
||||
|
||||
hidden = (paddle.zeros((batch_size, self.hidden_size)), paddle.zeros(
|
||||
(batch_size, self.hidden_size)))
|
||||
output_hiddens = []
|
||||
|
||||
if targets is not None:
|
||||
for i in range(num_steps):
|
||||
# one-hot vectors for a i-th char
|
||||
char_onehots = self._char_to_onehot(
|
||||
targets[:, i], onehot_dim=self.num_classes)
|
||||
hidden, alpha = self.attention_cell(hidden, inputs,
|
||||
char_onehots)
|
||||
|
||||
hidden = (hidden[1][0], hidden[1][1])
|
||||
output_hiddens.append(paddle.unsqueeze(hidden[0], axis=1))
|
||||
output = paddle.concat(output_hiddens, axis=1)
|
||||
probs = self.generator(output)
|
||||
|
||||
else:
|
||||
targets = paddle.zeros(shape=[batch_size], dtype="int32")
|
||||
probs = None
|
||||
|
||||
for i in range(num_steps):
|
||||
char_onehots = self._char_to_onehot(
|
||||
targets, onehot_dim=self.num_classes)
|
||||
hidden, alpha = self.attention_cell(hidden, inputs,
|
||||
char_onehots)
|
||||
probs_step = self.generator(hidden[0])
|
||||
hidden = (hidden[1][0], hidden[1][1])
|
||||
if probs is None:
|
||||
probs = paddle.unsqueeze(probs_step, axis=1)
|
||||
else:
|
||||
probs = paddle.concat(
|
||||
[probs, paddle.unsqueeze(
|
||||
probs_step, axis=1)], axis=1)
|
||||
|
||||
next_input = probs_step.argmax(axis=1)
|
||||
|
||||
targets = next_input
|
||||
|
||||
return probs
|
||||
|
||||
|
||||
class AttentionLSTMCell(nn.Layer):
|
||||
def __init__(self, input_size, hidden_size, num_embeddings, use_gru=False):
|
||||
super(AttentionLSTMCell, self).__init__()
|
||||
|
|
|
@ -70,17 +70,6 @@ class BidirectionalLSTM(nn.Layer):
|
|||
self.linear = nn.Linear(hidden_size * 2, output_size)
|
||||
|
||||
def forward(self, input_feature):
|
||||
"""
|
||||
|
||||
Args:
|
||||
input_feature (Torch.Tensor): visual feature [batch_size x T x input_size]
|
||||
|
||||
Returns:
|
||||
Torch.Tensor: LSTM output contextual feature [batch_size x T x output_size]
|
||||
|
||||
"""
|
||||
|
||||
# self.rnn.flatten_parameters() # error in export_model
|
||||
recurrent, _ = self.rnn(input_feature) # batch_size x T x input_size -> batch_size x T x (2*hidden_size)
|
||||
if self.with_linear:
|
||||
output = self.linear(recurrent) # batch_size x T x output_size
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||
# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
@ -71,14 +71,14 @@ class SP_TransformerNetwork(nn.Layer):
|
|||
"""
|
||||
|
||||
Args:
|
||||
batch_I (torch.Tensor): batch of input images [batch_size x nc x I_height x I_width]
|
||||
batch_I (Tensor): batch of input images [batch_size x nc x I_height x I_width]
|
||||
weights:
|
||||
offsets: the predicted offset by AIN, a scalar
|
||||
lambda_color: the learnable update gate \alpha in Equa. (5) as
|
||||
g(x) = (1 - \alpha) \odot x + \alpha \odot x_{offsets}
|
||||
|
||||
Returns:
|
||||
torch.Tensor: transformed images by SPN as Equa. (4) in Ref. [1]
|
||||
Tensor: transformed images by SPN as Equa. (4) in Ref. [1]
|
||||
[batch_size x I_channel_num x I_r_height x I_r_width]
|
||||
|
||||
"""
|
||||
|
@ -114,8 +114,6 @@ class GA_SPIN_Transformer(nn.Layer):
|
|||
in_channels (int): channel of input features,
|
||||
set it to 1 if the grayscale images and 3 if RGB input
|
||||
I_r_size (tuple): size of rectified images (used in STN transformations)
|
||||
inputDataType (str): the type of input data,
|
||||
only support 'torch.cuda.FloatTensor' this version
|
||||
offsets (bool): set it to False if use SPN w.o. AIN,
|
||||
and set it to True if use SPIN (both with SPN and AIN)
|
||||
norm_type (str): the normalization type of the module,
|
||||
|
@ -123,6 +121,7 @@ class GA_SPIN_Transformer(nn.Layer):
|
|||
default_type (int): the K chromatic space,
|
||||
set it to 3/5/6 depend on the complexity of transformation intensities
|
||||
loc_lr (float): learning rate of location network
|
||||
stn (bool): whther to use stn.
|
||||
|
||||
"""
|
||||
super(GA_SPIN_Transformer, self).__init__()
|
||||
|
@ -233,12 +232,12 @@ class GA_SPIN_Transformer(nn.Layer):
|
|||
def forward(self, x, return_weight=False):
|
||||
"""
|
||||
Args:
|
||||
x (torch.cuda.FloatTensor): input image batch
|
||||
x (Tensor): input image batch
|
||||
return_weight (bool): set to False by default,
|
||||
if set to True return the predicted offsets of AIN, denoted as x_{offsets}
|
||||
|
||||
Returns:
|
||||
torch.Tensor: rectified image [batch_size x I_channel_num x I_height x I_width], the same as the input size
|
||||
Tensor: rectified image [batch_size x I_channel_num x I_height x I_width], the same as the input size
|
||||
"""
|
||||
|
||||
if self.spt:
|
||||
|
|
|
@ -73,12 +73,6 @@ def export_single_model(model, arch_config, save_path, logger, quanter=None):
|
|||
shape=[None, 3, 64, 512], dtype="float32"),
|
||||
]
|
||||
model = to_static(model, input_spec=other_shape)
|
||||
elif arch_config["algorithm"] == "SPIN":
|
||||
other_shape = [
|
||||
paddle.static.InputSpec(
|
||||
shape=[None, 1, 32, 100], dtype="float32"),
|
||||
]
|
||||
model = to_static(model, input_spec=other_shape)
|
||||
else:
|
||||
infer_shape = [3, -1, -1]
|
||||
if arch_config["model_type"] == "rec":
|
||||
|
|
Loading…
Reference in New Issue