delete norm_decay in resnet
parent
05ecf1d045
commit
032c45c1d3
|
@ -117,7 +117,6 @@ class ConvBNLayer(TheseusLayer):
|
||||||
is_vd_mode=False,
|
is_vd_mode=False,
|
||||||
act=None,
|
act=None,
|
||||||
lr_mult=1.0,
|
lr_mult=1.0,
|
||||||
norm_decay=0.,
|
|
||||||
data_format="NCHW"):
|
data_format="NCHW"):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.is_vd_mode = is_vd_mode
|
self.is_vd_mode = is_vd_mode
|
||||||
|
@ -135,14 +134,8 @@ class ConvBNLayer(TheseusLayer):
|
||||||
bias_attr=False,
|
bias_attr=False,
|
||||||
data_format=data_format)
|
data_format=data_format)
|
||||||
|
|
||||||
weight_attr = ParamAttr(
|
weight_attr = ParamAttr(learning_rate=lr_mult, trainable=True)
|
||||||
learning_rate=lr_mult,
|
bias_attr = ParamAttr(learning_rate=lr_mult, trainable=True)
|
||||||
regularizer=L2Decay(norm_decay),
|
|
||||||
trainable=True)
|
|
||||||
bias_attr = ParamAttr(
|
|
||||||
learning_rate=lr_mult,
|
|
||||||
regularizer=L2Decay(norm_decay),
|
|
||||||
trainable=True)
|
|
||||||
|
|
||||||
self.bn = BatchNorm2D(
|
self.bn = BatchNorm2D(
|
||||||
num_filters, weight_attr=weight_attr, bias_attr=bias_attr)
|
num_filters, weight_attr=weight_attr, bias_attr=bias_attr)
|
||||||
|
@ -166,7 +159,6 @@ class BottleneckBlock(TheseusLayer):
|
||||||
shortcut=True,
|
shortcut=True,
|
||||||
if_first=False,
|
if_first=False,
|
||||||
lr_mult=1.0,
|
lr_mult=1.0,
|
||||||
norm_decay=0.,
|
|
||||||
data_format="NCHW"):
|
data_format="NCHW"):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
|
@ -176,7 +168,6 @@ class BottleneckBlock(TheseusLayer):
|
||||||
filter_size=1,
|
filter_size=1,
|
||||||
act="relu",
|
act="relu",
|
||||||
lr_mult=lr_mult,
|
lr_mult=lr_mult,
|
||||||
norm_decay=norm_decay,
|
|
||||||
data_format=data_format)
|
data_format=data_format)
|
||||||
self.conv1 = ConvBNLayer(
|
self.conv1 = ConvBNLayer(
|
||||||
num_channels=num_filters,
|
num_channels=num_filters,
|
||||||
|
@ -185,7 +176,6 @@ class BottleneckBlock(TheseusLayer):
|
||||||
stride=stride,
|
stride=stride,
|
||||||
act="relu",
|
act="relu",
|
||||||
lr_mult=lr_mult,
|
lr_mult=lr_mult,
|
||||||
norm_decay=norm_decay,
|
|
||||||
data_format=data_format)
|
data_format=data_format)
|
||||||
self.conv2 = ConvBNLayer(
|
self.conv2 = ConvBNLayer(
|
||||||
num_channels=num_filters,
|
num_channels=num_filters,
|
||||||
|
@ -193,7 +183,6 @@ class BottleneckBlock(TheseusLayer):
|
||||||
filter_size=1,
|
filter_size=1,
|
||||||
act=None,
|
act=None,
|
||||||
lr_mult=lr_mult,
|
lr_mult=lr_mult,
|
||||||
norm_decay=norm_decay,
|
|
||||||
data_format=data_format)
|
data_format=data_format)
|
||||||
|
|
||||||
if not shortcut:
|
if not shortcut:
|
||||||
|
@ -204,7 +193,6 @@ class BottleneckBlock(TheseusLayer):
|
||||||
stride=stride if if_first else 1,
|
stride=stride if if_first else 1,
|
||||||
is_vd_mode=False if if_first else True,
|
is_vd_mode=False if if_first else True,
|
||||||
lr_mult=lr_mult,
|
lr_mult=lr_mult,
|
||||||
norm_decay=norm_decay,
|
|
||||||
data_format=data_format)
|
data_format=data_format)
|
||||||
|
|
||||||
self.relu = nn.ReLU()
|
self.relu = nn.ReLU()
|
||||||
|
@ -233,7 +221,6 @@ class BasicBlock(TheseusLayer):
|
||||||
shortcut=True,
|
shortcut=True,
|
||||||
if_first=False,
|
if_first=False,
|
||||||
lr_mult=1.0,
|
lr_mult=1.0,
|
||||||
norm_decay=0.,
|
|
||||||
data_format="NCHW"):
|
data_format="NCHW"):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
|
@ -245,7 +232,6 @@ class BasicBlock(TheseusLayer):
|
||||||
stride=stride,
|
stride=stride,
|
||||||
act="relu",
|
act="relu",
|
||||||
lr_mult=lr_mult,
|
lr_mult=lr_mult,
|
||||||
norm_decay=norm_decay,
|
|
||||||
data_format=data_format)
|
data_format=data_format)
|
||||||
self.conv1 = ConvBNLayer(
|
self.conv1 = ConvBNLayer(
|
||||||
num_channels=num_filters,
|
num_channels=num_filters,
|
||||||
|
@ -253,7 +239,6 @@ class BasicBlock(TheseusLayer):
|
||||||
filter_size=3,
|
filter_size=3,
|
||||||
act=None,
|
act=None,
|
||||||
lr_mult=lr_mult,
|
lr_mult=lr_mult,
|
||||||
norm_decay=norm_decay,
|
|
||||||
data_format=data_format)
|
data_format=data_format)
|
||||||
if not shortcut:
|
if not shortcut:
|
||||||
self.short = ConvBNLayer(
|
self.short = ConvBNLayer(
|
||||||
|
@ -263,7 +248,6 @@ class BasicBlock(TheseusLayer):
|
||||||
stride=stride if if_first else 1,
|
stride=stride if if_first else 1,
|
||||||
is_vd_mode=False if if_first else True,
|
is_vd_mode=False if if_first else True,
|
||||||
lr_mult=lr_mult,
|
lr_mult=lr_mult,
|
||||||
norm_decay=norm_decay,
|
|
||||||
data_format=data_format)
|
data_format=data_format)
|
||||||
self.shortcut = shortcut
|
self.shortcut = shortcut
|
||||||
self.relu = nn.ReLU()
|
self.relu = nn.ReLU()
|
||||||
|
@ -300,7 +284,6 @@ class ResNet(TheseusLayer):
|
||||||
stem_act="relu",
|
stem_act="relu",
|
||||||
class_num=1000,
|
class_num=1000,
|
||||||
lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0],
|
lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0],
|
||||||
norm_decay=0.,
|
|
||||||
data_format="NCHW",
|
data_format="NCHW",
|
||||||
input_image_channel=3,
|
input_image_channel=3,
|
||||||
return_patterns=None,
|
return_patterns=None,
|
||||||
|
@ -340,7 +323,6 @@ class ResNet(TheseusLayer):
|
||||||
stride=s,
|
stride=s,
|
||||||
act=stem_act,
|
act=stem_act,
|
||||||
lr_mult=self.lr_mult_list[0],
|
lr_mult=self.lr_mult_list[0],
|
||||||
norm_decay=norm_decay,
|
|
||||||
data_format=data_format)
|
data_format=data_format)
|
||||||
for in_c, out_c, k, s in self.stem_cfg[version]
|
for in_c, out_c, k, s in self.stem_cfg[version]
|
||||||
])
|
])
|
||||||
|
@ -359,7 +341,6 @@ class ResNet(TheseusLayer):
|
||||||
shortcut=shortcut,
|
shortcut=shortcut,
|
||||||
if_first=block_idx == i == 0 if version == "vd" else True,
|
if_first=block_idx == i == 0 if version == "vd" else True,
|
||||||
lr_mult=self.lr_mult_list[block_idx + 1],
|
lr_mult=self.lr_mult_list[block_idx + 1],
|
||||||
norm_decay=norm_decay,
|
|
||||||
data_format=data_format))
|
data_format=data_format))
|
||||||
shortcut = True
|
shortcut = True
|
||||||
self.blocks = nn.Sequential(*block_list)
|
self.blocks = nn.Sequential(*block_list)
|
||||||
|
|
|
@ -20,7 +20,6 @@ Arch:
|
||||||
name: "ResNet50"
|
name: "ResNet50"
|
||||||
pretrained: True
|
pretrained: True
|
||||||
class_num: 26
|
class_num: 26
|
||||||
norm_decay: 0.0005
|
|
||||||
|
|
||||||
# loss function config for traing/eval process
|
# loss function config for traing/eval process
|
||||||
Loss:
|
Loss:
|
||||||
|
|
Loading…
Reference in New Issue