update resnet&pp-lcnet
parent
5d9d2395e2
commit
4465d27ab0
|
@ -94,13 +94,16 @@ class ConvBNLayer(TheseusLayer):
|
|||
stride=stride,
|
||||
padding=(filter_size - 1) // 2,
|
||||
groups=num_groups,
|
||||
weight_attr=ParamAttr(initializer=KaimingNormal(), learning_rate=lr_mult),
|
||||
weight_attr=ParamAttr(
|
||||
initializer=KaimingNormal(), learning_rate=lr_mult),
|
||||
bias_attr=False)
|
||||
|
||||
self.bn = BatchNorm2D(
|
||||
num_filters,
|
||||
weight_attr=ParamAttr(regularizer=L2Decay(0.0), learning_rate=lr_mult),
|
||||
bias_attr=ParamAttr(regularizer=L2Decay(0.0), learning_rate=lr_mult))
|
||||
weight_attr=ParamAttr(
|
||||
regularizer=L2Decay(0.0), learning_rate=lr_mult),
|
||||
bias_attr=ParamAttr(
|
||||
regularizer=L2Decay(0.0), learning_rate=lr_mult))
|
||||
self.hardswish = nn.Hardswish()
|
||||
|
||||
def forward(self, x):
|
||||
|
@ -128,8 +131,7 @@ class DepthwiseSeparable(TheseusLayer):
|
|||
num_groups=num_channels,
|
||||
lr_mult=lr_mult)
|
||||
if use_se:
|
||||
self.se = SEModule(num_channels,
|
||||
lr_mult=lr_mult)
|
||||
self.se = SEModule(num_channels, lr_mult=lr_mult)
|
||||
self.pw_conv = ConvBNLayer(
|
||||
num_channels=num_channels,
|
||||
filter_size=1,
|
||||
|
@ -216,8 +218,9 @@ class PPLCNet(TheseusLayer):
|
|||
assert len(self.stride_list
|
||||
) == 5, "stride_list length should be 5 but got {}".format(
|
||||
len(self.stride_list))
|
||||
|
||||
for i, stride in enumerate(stride_list[1:]):
|
||||
self.net_config["blocks{}".format(i+3)][0][3] = stride
|
||||
self.net_config["blocks{}".format(i + 3)][0][3] = stride
|
||||
self.conv1 = ConvBNLayer(
|
||||
num_channels=3,
|
||||
filter_size=3,
|
||||
|
@ -225,7 +228,7 @@ class PPLCNet(TheseusLayer):
|
|||
stride=stride_list[0],
|
||||
lr_mult=self.lr_mult_list[0])
|
||||
|
||||
self.blocks2 = nn.Sequential(* [
|
||||
self.blocks2 = nn.Sequential(*[
|
||||
DepthwiseSeparable(
|
||||
num_channels=make_divisible(in_c * scale),
|
||||
num_filters=make_divisible(out_c * scale),
|
||||
|
@ -233,10 +236,11 @@ class PPLCNet(TheseusLayer):
|
|||
stride=s,
|
||||
use_se=se,
|
||||
lr_mult=self.lr_mult_list[1])
|
||||
for i, (k, in_c, out_c, s, se) in enumerate(self.net_config["blocks2"])
|
||||
for i, (k, in_c, out_c, s, se
|
||||
) in enumerate(self.net_config["blocks2"])
|
||||
])
|
||||
|
||||
self.blocks3 = nn.Sequential(* [
|
||||
self.blocks3 = nn.Sequential(*[
|
||||
DepthwiseSeparable(
|
||||
num_channels=make_divisible(in_c * scale),
|
||||
num_filters=make_divisible(out_c * scale),
|
||||
|
@ -244,10 +248,11 @@ class PPLCNet(TheseusLayer):
|
|||
stride=s,
|
||||
use_se=se,
|
||||
lr_mult=self.lr_mult_list[2])
|
||||
for i, (k, in_c, out_c, s, se) in enumerate(self.net_config["blocks3"])
|
||||
for i, (k, in_c, out_c, s, se
|
||||
) in enumerate(self.net_config["blocks3"])
|
||||
])
|
||||
|
||||
self.blocks4 = nn.Sequential(* [
|
||||
self.blocks4 = nn.Sequential(*[
|
||||
DepthwiseSeparable(
|
||||
num_channels=make_divisible(in_c * scale),
|
||||
num_filters=make_divisible(out_c * scale),
|
||||
|
@ -255,10 +260,11 @@ class PPLCNet(TheseusLayer):
|
|||
stride=s,
|
||||
use_se=se,
|
||||
lr_mult=self.lr_mult_list[3])
|
||||
for i, (k, in_c, out_c, s, se) in enumerate(self.net_config["blocks4"])
|
||||
for i, (k, in_c, out_c, s, se
|
||||
) in enumerate(self.net_config["blocks4"])
|
||||
])
|
||||
|
||||
self.blocks5 = nn.Sequential(* [
|
||||
self.blocks5 = nn.Sequential(*[
|
||||
DepthwiseSeparable(
|
||||
num_channels=make_divisible(in_c * scale),
|
||||
num_filters=make_divisible(out_c * scale),
|
||||
|
@ -266,10 +272,11 @@ class PPLCNet(TheseusLayer):
|
|||
stride=s,
|
||||
use_se=se,
|
||||
lr_mult=self.lr_mult_list[4])
|
||||
for i, (k, in_c, out_c, s, se) in enumerate(self.net_config["blocks5"])
|
||||
for i, (k, in_c, out_c, s, se
|
||||
) in enumerate(self.net_config["blocks5"])
|
||||
])
|
||||
|
||||
self.blocks6 = nn.Sequential(* [
|
||||
self.blocks6 = nn.Sequential(*[
|
||||
DepthwiseSeparable(
|
||||
num_channels=make_divisible(in_c * scale),
|
||||
num_filters=make_divisible(out_c * scale),
|
||||
|
@ -277,13 +284,15 @@ class PPLCNet(TheseusLayer):
|
|||
stride=s,
|
||||
use_se=se,
|
||||
lr_mult=self.lr_mult_list[5])
|
||||
for i, (k, in_c, out_c, s, se) in enumerate(self.net_config["blocks6"])
|
||||
for i, (k, in_c, out_c, s, se
|
||||
) in enumerate(self.net_config["blocks6"])
|
||||
])
|
||||
|
||||
self.avg_pool = AdaptiveAvgPool2D(1)
|
||||
if self.use_last_conv:
|
||||
self.last_conv = Conv2D(
|
||||
in_channels=make_divisible(self.net_config["blocks6"][-1][2] * scale),
|
||||
in_channels=make_divisible(self.net_config["blocks6"][-1][2] *
|
||||
scale),
|
||||
out_channels=self.class_expand,
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
|
@ -294,7 +303,9 @@ class PPLCNet(TheseusLayer):
|
|||
else:
|
||||
self.last_conv = None
|
||||
self.flatten = nn.Flatten(start_axis=1, stop_axis=-1)
|
||||
self.fc = Linear(self.class_expand if self.use_last_conv else make_divisible(self.net_config["blocks6"][-1][2]), class_num)
|
||||
self.fc = Linear(
|
||||
self.class_expand if self.use_last_conv else
|
||||
make_divisible(self.net_config["blocks6"][-1][2]), class_num)
|
||||
|
||||
super().init_res(
|
||||
stages_pattern,
|
||||
|
|
|
@ -20,7 +20,7 @@ import numpy as np
|
|||
import paddle
|
||||
from paddle import ParamAttr
|
||||
import paddle.nn as nn
|
||||
from paddle.nn import Conv2D, BatchNorm, Linear, BatchNorm2D
|
||||
from paddle.nn import Conv2D, BatchNorm, Linear
|
||||
from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
|
||||
from paddle.nn.initializer import Uniform
|
||||
from paddle.regularizer import L2Decay
|
||||
|
@ -134,9 +134,6 @@ class ConvBNLayer(TheseusLayer):
|
|||
bias_attr=False,
|
||||
data_format=data_format)
|
||||
|
||||
weight_attr = ParamAttr(learning_rate=lr_mult, trainable=True)
|
||||
bias_attr = ParamAttr(learning_rate=lr_mult, trainable=True)
|
||||
|
||||
self.bn = BatchNorm(
|
||||
num_filters,
|
||||
param_attr=ParamAttr(learning_rate=lr_mult),
|
||||
|
@ -324,11 +321,11 @@ class ResNet(TheseusLayer):
|
|||
self.stem_cfg = {
|
||||
#num_channels, num_filters, filter_size, stride
|
||||
"vb": [[input_image_channel, 64, 7, self.stride_list[0]]],
|
||||
"vd":
|
||||
[[input_image_channel, 32, 3, self.stride_list[0]], [32, 32, 3, 1], [32, 64, 3, 1]]
|
||||
"vd": [[input_image_channel, 32, 3, self.stride_list[0]],
|
||||
[32, 32, 3, 1], [32, 64, 3, 1]]
|
||||
}
|
||||
|
||||
self.stem = nn.Sequential(* [
|
||||
self.stem = nn.Sequential(*[
|
||||
ConvBNLayer(
|
||||
num_channels=in_c,
|
||||
num_filters=out_c,
|
||||
|
@ -341,7 +338,10 @@ class ResNet(TheseusLayer):
|
|||
])
|
||||
|
||||
self.max_pool = MaxPool2D(
|
||||
kernel_size=3, stride=stride_list[1], padding=1, data_format=data_format)
|
||||
kernel_size=3,
|
||||
stride=stride_list[1],
|
||||
padding=1,
|
||||
data_format=data_format)
|
||||
block_list = []
|
||||
for block_idx in range(len(self.block_depth)):
|
||||
shortcut = False
|
||||
|
@ -350,7 +350,8 @@ class ResNet(TheseusLayer):
|
|||
num_channels=self.num_channels[block_idx] if i == 0 else
|
||||
self.num_filters[block_idx] * self.channels_mult,
|
||||
num_filters=self.num_filters[block_idx],
|
||||
stride=self.stride_list[block_idx+1] if i == 0 and block_idx != 0 else 1,
|
||||
stride=self.stride_list[block_idx + 1]
|
||||
if i == 0 and block_idx != 0 else 1,
|
||||
shortcut=shortcut,
|
||||
if_first=block_idx == i == 0 if version == "vd" else True,
|
||||
lr_mult=self.lr_mult_list[block_idx + 1],
|
||||
|
|
Loading…
Reference in New Issue