[TIPC]update tipc scripts and rm fluid api (#11098)
* [TIPC]update xpu tipc script * update tipc scripts and remove fluid apipull/11144/head
parent
673c95262b
commit
344b7594e4
|
@ -11,7 +11,6 @@
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
This code is refer from:
|
This code is refer from:
|
||||||
https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/textrecog/encoders/channel_reduction_encoder.py
|
https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/textrecog/encoders/channel_reduction_encoder.py
|
||||||
|
@ -28,6 +27,7 @@ from paddle import ParamAttr
|
||||||
import paddle.nn as nn
|
import paddle.nn as nn
|
||||||
import paddle.nn.functional as F
|
import paddle.nn.functional as F
|
||||||
|
|
||||||
|
|
||||||
class BaseDecoder(nn.Layer):
|
class BaseDecoder(nn.Layer):
|
||||||
def __init__(self, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
@ -48,9 +48,11 @@ class BaseDecoder(nn.Layer):
|
||||||
self.train_mode = train_mode
|
self.train_mode = train_mode
|
||||||
|
|
||||||
if train_mode:
|
if train_mode:
|
||||||
return self.forward_train(feat, out_enc, label, valid_ratios, word_positions)
|
return self.forward_train(feat, out_enc, label, valid_ratios,
|
||||||
|
word_positions)
|
||||||
return self.forward_test(feat, out_enc, valid_ratios, word_positions)
|
return self.forward_test(feat, out_enc, valid_ratios, word_positions)
|
||||||
|
|
||||||
|
|
||||||
class ChannelReductionEncoder(nn.Layer):
|
class ChannelReductionEncoder(nn.Layer):
|
||||||
"""Change the channel number with a one by one convoluational layer.
|
"""Change the channel number with a one by one convoluational layer.
|
||||||
|
|
||||||
|
@ -59,14 +61,16 @@ class ChannelReductionEncoder(nn.Layer):
|
||||||
out_channels (int): Number of output channels.
|
out_channels (int): Number of output channels.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self, in_channels, out_channels, **kwargs):
|
||||||
in_channels,
|
|
||||||
out_channels,
|
|
||||||
**kwargs):
|
|
||||||
super(ChannelReductionEncoder, self).__init__()
|
super(ChannelReductionEncoder, self).__init__()
|
||||||
|
|
||||||
self.layer = nn.Conv2D(
|
self.layer = nn.Conv2D(
|
||||||
in_channels, out_channels, kernel_size=1, stride=1, padding=0, weight_attr=nn.initializer.XavierNormal())
|
in_channels,
|
||||||
|
out_channels,
|
||||||
|
kernel_size=1,
|
||||||
|
stride=1,
|
||||||
|
padding=0,
|
||||||
|
weight_attr=nn.initializer.XavierNormal())
|
||||||
|
|
||||||
def forward(self, feat):
|
def forward(self, feat):
|
||||||
"""
|
"""
|
||||||
|
@ -84,8 +88,8 @@ def masked_fill(x, mask, value):
|
||||||
y = paddle.full(x.shape, value, x.dtype)
|
y = paddle.full(x.shape, value, x.dtype)
|
||||||
return paddle.where(mask, y, x)
|
return paddle.where(mask, y, x)
|
||||||
|
|
||||||
class DotProductAttentionLayer(nn.Layer):
|
|
||||||
|
|
||||||
|
class DotProductAttentionLayer(nn.Layer):
|
||||||
def __init__(self, dim_model=None):
|
def __init__(self, dim_model=None):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
|
@ -99,7 +103,7 @@ class DotProductAttentionLayer(nn.Layer):
|
||||||
logits = paddle.reshape(logits, [n, c, h, w])
|
logits = paddle.reshape(logits, [n, c, h, w])
|
||||||
if valid_ratios is not None:
|
if valid_ratios is not None:
|
||||||
# cal mask of attention weight
|
# cal mask of attention weight
|
||||||
with paddle.fluid.framework._stride_in_no_check_dy2st_diff():
|
with paddle.base.framework._stride_in_no_check_dy2st_diff():
|
||||||
for i, valid_ratio in enumerate(valid_ratios):
|
for i, valid_ratio in enumerate(valid_ratios):
|
||||||
valid_width = min(w, int(w * valid_ratio + 0.5))
|
valid_width = min(w, int(w * valid_ratio + 0.5))
|
||||||
if valid_width < w:
|
if valid_width < w:
|
||||||
|
@ -113,6 +117,7 @@ class DotProductAttentionLayer(nn.Layer):
|
||||||
glimpse = paddle.transpose(glimpse, (0, 2, 1))
|
glimpse = paddle.transpose(glimpse, (0, 2, 1))
|
||||||
return glimpse
|
return glimpse
|
||||||
|
|
||||||
|
|
||||||
class SequenceAttentionDecoder(BaseDecoder):
|
class SequenceAttentionDecoder(BaseDecoder):
|
||||||
"""Sequence attention decoder for RobustScanner.
|
"""Sequence attention decoder for RobustScanner.
|
||||||
|
|
||||||
|
@ -181,8 +186,8 @@ class SequenceAttentionDecoder(BaseDecoder):
|
||||||
self.prediction = None
|
self.prediction = None
|
||||||
if not self.return_feature:
|
if not self.return_feature:
|
||||||
pred_num_classes = num_classes - 1
|
pred_num_classes = num_classes - 1
|
||||||
self.prediction = nn.Linear(
|
self.prediction = nn.Linear(dim_model if encode_value else
|
||||||
dim_model if encode_value else dim_input, pred_num_classes)
|
dim_input, pred_num_classes)
|
||||||
|
|
||||||
def forward_train(self, feat, out_enc, targets, valid_ratios):
|
def forward_train(self, feat, out_enc, targets, valid_ratios):
|
||||||
"""
|
"""
|
||||||
|
@ -243,12 +248,13 @@ class SequenceAttentionDecoder(BaseDecoder):
|
||||||
seq_len = self.max_seq_len
|
seq_len = self.max_seq_len
|
||||||
batch_size = feat.shape[0]
|
batch_size = feat.shape[0]
|
||||||
|
|
||||||
decode_sequence = (paddle.ones((batch_size, seq_len), dtype='int64') * self.start_idx)
|
decode_sequence = (paddle.ones(
|
||||||
|
(batch_size, seq_len), dtype='int64') * self.start_idx)
|
||||||
|
|
||||||
outputs = []
|
outputs = []
|
||||||
for i in range(seq_len):
|
for i in range(seq_len):
|
||||||
step_out = self.forward_test_step(feat, out_enc, decode_sequence,
|
step_out = self.forward_test_step(feat, out_enc, decode_sequence, i,
|
||||||
i, valid_ratios)
|
valid_ratios)
|
||||||
outputs.append(step_out)
|
outputs.append(step_out)
|
||||||
max_idx = paddle.argmax(step_out, axis=1, keepdim=False)
|
max_idx = paddle.argmax(step_out, axis=1, keepdim=False)
|
||||||
if i < seq_len - 1:
|
if i < seq_len - 1:
|
||||||
|
@ -306,7 +312,6 @@ class SequenceAttentionDecoder(BaseDecoder):
|
||||||
|
|
||||||
|
|
||||||
class PositionAwareLayer(nn.Layer):
|
class PositionAwareLayer(nn.Layer):
|
||||||
|
|
||||||
def __init__(self, dim_model, rnn_layers=2):
|
def __init__(self, dim_model, rnn_layers=2):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
|
@ -384,16 +389,16 @@ class PositionAttentionDecoder(BaseDecoder):
|
||||||
|
|
||||||
self.embedding = nn.Embedding(self.max_seq_len + 1, self.dim_model)
|
self.embedding = nn.Embedding(self.max_seq_len + 1, self.dim_model)
|
||||||
|
|
||||||
self.position_aware_module = PositionAwareLayer(
|
self.position_aware_module = PositionAwareLayer(self.dim_model,
|
||||||
self.dim_model, rnn_layers)
|
rnn_layers)
|
||||||
|
|
||||||
self.attention_layer = DotProductAttentionLayer()
|
self.attention_layer = DotProductAttentionLayer()
|
||||||
|
|
||||||
self.prediction = None
|
self.prediction = None
|
||||||
if not self.return_feature:
|
if not self.return_feature:
|
||||||
pred_num_classes = num_classes - 1
|
pred_num_classes = num_classes - 1
|
||||||
self.prediction = nn.Linear(
|
self.prediction = nn.Linear(dim_model if encode_value else
|
||||||
dim_model if encode_value else dim_input, pred_num_classes)
|
dim_input, pred_num_classes)
|
||||||
|
|
||||||
def _get_position_index(self, length, batch_size):
|
def _get_position_index(self, length, batch_size):
|
||||||
position_index_list = []
|
position_index_list = []
|
||||||
|
@ -403,7 +408,8 @@ class PositionAttentionDecoder(BaseDecoder):
|
||||||
batch_position_index = paddle.stack(position_index_list, axis=0)
|
batch_position_index = paddle.stack(position_index_list, axis=0)
|
||||||
return batch_position_index
|
return batch_position_index
|
||||||
|
|
||||||
def forward_train(self, feat, out_enc, targets, valid_ratios, position_index):
|
def forward_train(self, feat, out_enc, targets, valid_ratios,
|
||||||
|
position_index):
|
||||||
"""
|
"""
|
||||||
Args:
|
Args:
|
||||||
feat (Tensor): Tensor of shape :math:`(N, D_i, H, W)`.
|
feat (Tensor): Tensor of shape :math:`(N, D_i, H, W)`.
|
||||||
|
@ -434,9 +440,9 @@ class PositionAttentionDecoder(BaseDecoder):
|
||||||
query = paddle.transpose(query, (0, 2, 1))
|
query = paddle.transpose(query, (0, 2, 1))
|
||||||
key = paddle.reshape(position_out_enc, (n, c_enc, h * w))
|
key = paddle.reshape(position_out_enc, (n, c_enc, h * w))
|
||||||
if self.encode_value:
|
if self.encode_value:
|
||||||
value = paddle.reshape(out_enc,(n, c_enc, h * w))
|
value = paddle.reshape(out_enc, (n, c_enc, h * w))
|
||||||
else:
|
else:
|
||||||
value = paddle.reshape(feat,(n, c_feat, h * w))
|
value = paddle.reshape(feat, (n, c_feat, h * w))
|
||||||
|
|
||||||
attn_out = self.attention_layer(query, key, value, h, w, valid_ratios)
|
attn_out = self.attention_layer(query, key, value, h, w, valid_ratios)
|
||||||
attn_out = paddle.transpose(attn_out, (0, 2, 1)) # [n, len_q, dim_v]
|
attn_out = paddle.transpose(attn_out, (0, 2, 1)) # [n, len_q, dim_v]
|
||||||
|
@ -472,9 +478,9 @@ class PositionAttentionDecoder(BaseDecoder):
|
||||||
query = paddle.transpose(query, (0, 2, 1))
|
query = paddle.transpose(query, (0, 2, 1))
|
||||||
key = paddle.reshape(position_out_enc, (n, c_enc, h * w))
|
key = paddle.reshape(position_out_enc, (n, c_enc, h * w))
|
||||||
if self.encode_value:
|
if self.encode_value:
|
||||||
value = paddle.reshape(out_enc,(n, c_enc, h * w))
|
value = paddle.reshape(out_enc, (n, c_enc, h * w))
|
||||||
else:
|
else:
|
||||||
value = paddle.reshape(feat,(n, c_feat, h * w))
|
value = paddle.reshape(feat, (n, c_feat, h * w))
|
||||||
|
|
||||||
attn_out = self.attention_layer(query, key, value, h, w, valid_ratios)
|
attn_out = self.attention_layer(query, key, value, h, w, valid_ratios)
|
||||||
attn_out = paddle.transpose(attn_out, (0, 2, 1)) # [n, len_q, dim_v]
|
attn_out = paddle.transpose(attn_out, (0, 2, 1)) # [n, len_q, dim_v]
|
||||||
|
@ -484,8 +490,8 @@ class PositionAttentionDecoder(BaseDecoder):
|
||||||
|
|
||||||
return self.prediction(attn_out)
|
return self.prediction(attn_out)
|
||||||
|
|
||||||
class RobustScannerFusionLayer(nn.Layer):
|
|
||||||
|
|
||||||
|
class RobustScannerFusionLayer(nn.Layer):
|
||||||
def __init__(self, dim_model, dim=-1):
|
def __init__(self, dim_model, dim=-1):
|
||||||
super(RobustScannerFusionLayer, self).__init__()
|
super(RobustScannerFusionLayer, self).__init__()
|
||||||
|
|
||||||
|
@ -500,6 +506,7 @@ class RobustScannerFusionLayer(nn.Layer):
|
||||||
output = F.glu(output, self.dim)
|
output = F.glu(output, self.dim)
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
|
||||||
class RobustScannerDecoder(BaseDecoder):
|
class RobustScannerDecoder(BaseDecoder):
|
||||||
"""Decoder for RobustScanner.
|
"""Decoder for RobustScanner.
|
||||||
|
|
||||||
|
@ -561,8 +568,7 @@ class RobustScannerDecoder(BaseDecoder):
|
||||||
padding_idx=padding_idx,
|
padding_idx=padding_idx,
|
||||||
dropout=hybrid_decoder_dropout,
|
dropout=hybrid_decoder_dropout,
|
||||||
encode_value=encode_value,
|
encode_value=encode_value,
|
||||||
return_feature=True
|
return_feature=True)
|
||||||
)
|
|
||||||
|
|
||||||
# init position decoder
|
# init position decoder
|
||||||
self.position_decoder = PositionAttentionDecoder(
|
self.position_decoder = PositionAttentionDecoder(
|
||||||
|
@ -573,9 +579,7 @@ class RobustScannerDecoder(BaseDecoder):
|
||||||
max_seq_len=max_seq_len,
|
max_seq_len=max_seq_len,
|
||||||
mask=mask,
|
mask=mask,
|
||||||
encode_value=encode_value,
|
encode_value=encode_value,
|
||||||
return_feature=True
|
return_feature=True)
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
self.fusion_module = RobustScannerFusionLayer(
|
self.fusion_module = RobustScannerFusionLayer(
|
||||||
self.dim_model if encode_value else dim_input)
|
self.dim_model if encode_value else dim_input)
|
||||||
|
@ -584,7 +588,8 @@ class RobustScannerDecoder(BaseDecoder):
|
||||||
self.prediction = nn.Linear(dim_model if encode_value else dim_input,
|
self.prediction = nn.Linear(dim_model if encode_value else dim_input,
|
||||||
pred_num_classes)
|
pred_num_classes)
|
||||||
|
|
||||||
def forward_train(self, feat, out_enc, target, valid_ratios, word_positions):
|
def forward_train(self, feat, out_enc, target, valid_ratios,
|
||||||
|
word_positions):
|
||||||
"""
|
"""
|
||||||
Args:
|
Args:
|
||||||
feat (Tensor): Tensor of shape :math:`(N, D_i, H, W)`.
|
feat (Tensor): Tensor of shape :math:`(N, D_i, H, W)`.
|
||||||
|
@ -599,8 +604,8 @@ class RobustScannerDecoder(BaseDecoder):
|
||||||
Returns:
|
Returns:
|
||||||
Tensor: A raw logit tensor of shape :math:`(N, T, C-1)`.
|
Tensor: A raw logit tensor of shape :math:`(N, T, C-1)`.
|
||||||
"""
|
"""
|
||||||
hybrid_glimpse = self.hybrid_decoder.forward_train(
|
hybrid_glimpse = self.hybrid_decoder.forward_train(feat, out_enc,
|
||||||
feat, out_enc, target, valid_ratios)
|
target, valid_ratios)
|
||||||
position_glimpse = self.position_decoder.forward_train(
|
position_glimpse = self.position_decoder.forward_train(
|
||||||
feat, out_enc, target, valid_ratios, word_positions)
|
feat, out_enc, target, valid_ratios, word_positions)
|
||||||
|
|
||||||
|
@ -625,7 +630,8 @@ class RobustScannerDecoder(BaseDecoder):
|
||||||
seq_len = self.max_seq_len
|
seq_len = self.max_seq_len
|
||||||
batch_size = feat.shape[0]
|
batch_size = feat.shape[0]
|
||||||
|
|
||||||
decode_sequence = (paddle.ones((batch_size, seq_len), dtype='int64') * self.start_idx)
|
decode_sequence = (paddle.ones(
|
||||||
|
(batch_size, seq_len), dtype='int64') * self.start_idx)
|
||||||
|
|
||||||
position_glimpse = self.position_decoder.forward_test(
|
position_glimpse = self.position_decoder.forward_test(
|
||||||
feat, out_enc, valid_ratios, word_positions)
|
feat, out_enc, valid_ratios, word_positions)
|
||||||
|
@ -649,8 +655,10 @@ class RobustScannerDecoder(BaseDecoder):
|
||||||
|
|
||||||
return outputs
|
return outputs
|
||||||
|
|
||||||
|
|
||||||
class RobustScannerHead(nn.Layer):
|
class RobustScannerHead(nn.Layer):
|
||||||
def __init__(self,
|
def __init__(
|
||||||
|
self,
|
||||||
out_channels, # 90 + unknown + start + padding
|
out_channels, # 90 + unknown + start + padding
|
||||||
in_channels,
|
in_channels,
|
||||||
enc_outchannles=128,
|
enc_outchannles=128,
|
||||||
|
@ -670,7 +678,7 @@ class RobustScannerHead(nn.Layer):
|
||||||
in_channels=in_channels, out_channels=enc_outchannles)
|
in_channels=in_channels, out_channels=enc_outchannles)
|
||||||
|
|
||||||
# decoder module
|
# decoder module
|
||||||
self.decoder =RobustScannerDecoder(
|
self.decoder = RobustScannerDecoder(
|
||||||
num_classes=out_channels,
|
num_classes=out_channels,
|
||||||
dim_input=in_channels,
|
dim_input=in_channels,
|
||||||
dim_model=enc_outchannles,
|
dim_model=enc_outchannles,
|
||||||
|
@ -697,8 +705,8 @@ class RobustScannerHead(nn.Layer):
|
||||||
if self.training:
|
if self.training:
|
||||||
label = targets[0] # label
|
label = targets[0] # label
|
||||||
label = paddle.to_tensor(label, dtype='int64')
|
label = paddle.to_tensor(label, dtype='int64')
|
||||||
final_out = self.decoder(
|
final_out = self.decoder(inputs, out_enc, label, valid_ratios,
|
||||||
inputs, out_enc, label, valid_ratios, word_positions)
|
word_positions)
|
||||||
if not self.training:
|
if not self.training:
|
||||||
final_out = self.decoder(
|
final_out = self.decoder(
|
||||||
inputs,
|
inputs,
|
||||||
|
|
|
@ -43,6 +43,10 @@ if [ $modelname == "rec_r31_sar" ] || [ $modelname == "rec_mtb_nrtr" ]; then
|
||||||
sed -i "s/gpu_list:0|0,1/gpu_list:0,1/g" $FILENAME
|
sed -i "s/gpu_list:0|0,1/gpu_list:0,1/g" $FILENAME
|
||||||
sed -i "s/Global.use_npu:True|True/Global.use_npu:True/g" $FILENAME
|
sed -i "s/Global.use_npu:True|True/Global.use_npu:True/g" $FILENAME
|
||||||
fi
|
fi
|
||||||
|
if [ $modelname == "ch_ppocr_mobile_v2_0_rec_FPGM" ]; then
|
||||||
|
sed -i '18s/$/ -o Global.use_gpu=False/' $FILENAME
|
||||||
|
sed -i '32s/$/ Global.use_gpu=False/' $FILENAME
|
||||||
|
fi
|
||||||
|
|
||||||
# replace training config file
|
# replace training config file
|
||||||
grep -n 'tools/.*yml' $FILENAME | cut -d ":" -f 1 \
|
grep -n 'tools/.*yml' $FILENAME | cut -d ":" -f 1 \
|
||||||
|
|
|
@ -43,6 +43,10 @@ if [ $modelname == "rec_r31_sar" ] || [ $modelname == "rec_mtb_nrtr" ]; then
|
||||||
sed -i "s/gpu_list:0|0,1/gpu_list:0,1/g" $FILENAME
|
sed -i "s/gpu_list:0|0,1/gpu_list:0,1/g" $FILENAME
|
||||||
sed -i "s/Global.use_xpu:True|True/Global.use_xpu:True/g" $FILENAME
|
sed -i "s/Global.use_xpu:True|True/Global.use_xpu:True/g" $FILENAME
|
||||||
fi
|
fi
|
||||||
|
if [ $modelname == "ch_ppocr_mobile_v2_0_rec_FPGM" ]; then
|
||||||
|
sed -i '18s/$/ -o Global.use_gpu=False/' $FILENAME
|
||||||
|
sed -i '32s/$/ Global.use_gpu=False/' $FILENAME
|
||||||
|
fi
|
||||||
|
|
||||||
# replace training config file
|
# replace training config file
|
||||||
grep -n 'tools/.*yml' $FILENAME | cut -d ":" -f 1 \
|
grep -n 'tools/.*yml' $FILENAME | cut -d ":" -f 1 \
|
||||||
|
|
Loading…
Reference in New Issue