Support fp16 training for ResNeXt101_32x4d (#653)
parent
8a469799af
commit
525b5e1a0e
|
@ -0,0 +1,91 @@
|
|||
mode: 'train'
|
||||
ARCHITECTURE:
|
||||
name: 'ResNeXt101_32x4d'
|
||||
|
||||
pretrained_model: ""
|
||||
model_save_dir: "./output/"
|
||||
classes_num: 1000
|
||||
total_images: 1281167
|
||||
save_interval: 1
|
||||
validate: True
|
||||
valid_interval: 1
|
||||
epochs: 120
|
||||
topk: 5
|
||||
image_shape: [4, 224, 224]
|
||||
|
||||
use_dali: True
|
||||
use_gpu: True
|
||||
data_format: "NCHW"
|
||||
image_channel: &image_channel 4
|
||||
image_shape: [*image_channel, 224, 224]
|
||||
|
||||
use_mix: False
|
||||
ls_epsilon: -1
|
||||
|
||||
# mixed precision training
|
||||
AMP:
|
||||
scale_loss: 128.0
|
||||
use_dynamic_loss_scaling: True
|
||||
use_pure_fp16: &use_pure_fp16 True
|
||||
|
||||
LEARNING_RATE:
|
||||
function: 'Piecewise'
|
||||
params:
|
||||
lr: 0.1
|
||||
decay_epochs: [30, 60, 90]
|
||||
gamma: 0.1
|
||||
|
||||
OPTIMIZER:
|
||||
function: 'Momentum'
|
||||
params:
|
||||
momentum: 0.9
|
||||
multi_precision: *use_pure_fp16
|
||||
regularizer:
|
||||
function: 'L2'
|
||||
factor: 0.000100
|
||||
|
||||
TRAIN:
|
||||
batch_size: 256
|
||||
num_workers: 4
|
||||
file_list: "./dataset/ILSVRC2012/train_list.txt"
|
||||
data_dir: "./dataset/ILSVRC2012/"
|
||||
shuffle_seed: 0
|
||||
transforms:
|
||||
- DecodeImage:
|
||||
to_rgb: True
|
||||
to_np: False
|
||||
channel_first: False
|
||||
- RandCropImage:
|
||||
size: 224
|
||||
- RandFlipImage:
|
||||
flip_code: 1
|
||||
- NormalizeImage:
|
||||
scale: 1./255.
|
||||
mean: [0.485, 0.456, 0.406]
|
||||
std: [0.229, 0.224, 0.225]
|
||||
order: ''
|
||||
output_fp16: *use_pure_fp16
|
||||
channel_num: *image_channel
|
||||
- ToCHWImage:
|
||||
|
||||
VALID:
|
||||
batch_size: 64
|
||||
num_workers: 4
|
||||
file_list: "./dataset/ILSVRC2012/val_list.txt"
|
||||
data_dir: "./dataset/ILSVRC2012/"
|
||||
shuffle_seed: 0
|
||||
transforms:
|
||||
- DecodeImage:
|
||||
to_rgb: True
|
||||
to_np: False
|
||||
channel_first: False
|
||||
- ResizeImage:
|
||||
resize_short: 256
|
||||
- CropImage:
|
||||
size: 224
|
||||
- NormalizeImage:
|
||||
scale: 1.0/255.0
|
||||
mean: [0.485, 0.456, 0.406]
|
||||
std: [0.229, 0.224, 0.225]
|
||||
order: ''
|
||||
- ToCHWImage:
|
|
@ -41,9 +41,9 @@ class ConvBNLayer(nn.Layer):
|
|||
stride=1,
|
||||
groups=1,
|
||||
act=None,
|
||||
name=None):
|
||||
name=None,
|
||||
data_format="NCHW"):
|
||||
super(ConvBNLayer, self).__init__()
|
||||
|
||||
self._conv = Conv2D(
|
||||
in_channels=num_channels,
|
||||
out_channels=num_filters,
|
||||
|
@ -52,7 +52,8 @@ class ConvBNLayer(nn.Layer):
|
|||
padding=(filter_size - 1) // 2,
|
||||
groups=groups,
|
||||
weight_attr=ParamAttr(name=name + "_weights"),
|
||||
bias_attr=False)
|
||||
bias_attr=False,
|
||||
data_format=data_format)
|
||||
if name == "conv1":
|
||||
bn_name = "bn_" + name
|
||||
else:
|
||||
|
@ -63,7 +64,8 @@ class ConvBNLayer(nn.Layer):
|
|||
param_attr=ParamAttr(name=bn_name + '_scale'),
|
||||
bias_attr=ParamAttr(bn_name + '_offset'),
|
||||
moving_mean_name=bn_name + '_mean',
|
||||
moving_variance_name=bn_name + '_variance')
|
||||
moving_variance_name=bn_name + '_variance',
|
||||
data_layout=data_format)
|
||||
|
||||
def forward(self, inputs):
|
||||
y = self._conv(inputs)
|
||||
|
@ -78,15 +80,16 @@ class BottleneckBlock(nn.Layer):
|
|||
stride,
|
||||
cardinality,
|
||||
shortcut=True,
|
||||
name=None):
|
||||
name=None,
|
||||
data_format="NCHW"):
|
||||
super(BottleneckBlock, self).__init__()
|
||||
|
||||
self.conv0 = ConvBNLayer(
|
||||
num_channels=num_channels,
|
||||
num_filters=num_filters,
|
||||
filter_size=1,
|
||||
act='relu',
|
||||
name=name + "_branch2a")
|
||||
name=name + "_branch2a",
|
||||
data_format=data_format)
|
||||
self.conv1 = ConvBNLayer(
|
||||
num_channels=num_filters,
|
||||
num_filters=num_filters,
|
||||
|
@ -94,13 +97,15 @@ class BottleneckBlock(nn.Layer):
|
|||
groups=cardinality,
|
||||
stride=stride,
|
||||
act='relu',
|
||||
name=name + "_branch2b")
|
||||
name=name + "_branch2b",
|
||||
data_format=data_format)
|
||||
self.conv2 = ConvBNLayer(
|
||||
num_channels=num_filters,
|
||||
num_filters=num_filters * 2 if cardinality == 32 else num_filters,
|
||||
filter_size=1,
|
||||
act=None,
|
||||
name=name + "_branch2c")
|
||||
name=name + "_branch2c",
|
||||
data_format=data_format)
|
||||
|
||||
if not shortcut:
|
||||
self.short = ConvBNLayer(
|
||||
|
@ -109,7 +114,8 @@ class BottleneckBlock(nn.Layer):
|
|||
if cardinality == 32 else num_filters,
|
||||
filter_size=1,
|
||||
stride=stride,
|
||||
name=name + "_branch1")
|
||||
name=name + "_branch1",
|
||||
data_format=data_format)
|
||||
|
||||
self.shortcut = shortcut
|
||||
|
||||
|
@ -129,10 +135,12 @@ class BottleneckBlock(nn.Layer):
|
|||
|
||||
|
||||
class ResNeXt(nn.Layer):
|
||||
def __init__(self, layers=50, class_dim=1000, cardinality=32):
|
||||
def __init__(self, layers=50, class_dim=1000, cardinality=32, input_image_channel=3, data_format="NCHW"):
|
||||
super(ResNeXt, self).__init__()
|
||||
|
||||
self.layers = layers
|
||||
self.data_format = data_format
|
||||
self.input_image_channel = input_image_channel
|
||||
self.cardinality = cardinality
|
||||
supported_layers = [50, 101, 152]
|
||||
assert layers in supported_layers, \
|
||||
|
@ -153,13 +161,14 @@ class ResNeXt(nn.Layer):
|
|||
1024] if cardinality == 32 else [256, 512, 1024, 2048]
|
||||
|
||||
self.conv = ConvBNLayer(
|
||||
num_channels=3,
|
||||
num_channels=self.input_image_channel,
|
||||
num_filters=64,
|
||||
filter_size=7,
|
||||
stride=2,
|
||||
act='relu',
|
||||
name="res_conv1")
|
||||
self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1)
|
||||
name="res_conv1",
|
||||
data_format=self.data_format)
|
||||
self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1, data_format=self.data_format)
|
||||
|
||||
self.block_list = []
|
||||
for block in range(len(depth)):
|
||||
|
@ -181,11 +190,12 @@ class ResNeXt(nn.Layer):
|
|||
stride=2 if i == 0 and block != 0 else 1,
|
||||
cardinality=self.cardinality,
|
||||
shortcut=shortcut,
|
||||
name=conv_name))
|
||||
name=conv_name,
|
||||
data_format=self.data_format))
|
||||
self.block_list.append(bottleneck_block)
|
||||
shortcut = True
|
||||
|
||||
self.pool2d_avg = AdaptiveAvgPool2D(1)
|
||||
self.pool2d_avg = AdaptiveAvgPool2D(1, data_format=self.data_format)
|
||||
|
||||
self.pool2d_avg_channels = num_channels[-1] * 2
|
||||
|
||||
|
@ -199,14 +209,18 @@ class ResNeXt(nn.Layer):
|
|||
bias_attr=ParamAttr(name="fc_offset"))
|
||||
|
||||
def forward(self, inputs):
|
||||
y = self.conv(inputs)
|
||||
y = self.pool2d_max(y)
|
||||
for block in self.block_list:
|
||||
y = block(y)
|
||||
y = self.pool2d_avg(y)
|
||||
y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels])
|
||||
y = self.out(y)
|
||||
return y
|
||||
with paddle.static.amp.fp16_guard():
|
||||
if self.data_format == "NHWC":
|
||||
inputs = paddle.tensor.transpose(inputs, [0, 2, 3, 1])
|
||||
inputs.stop_gradient = True
|
||||
y = self.conv(inputs)
|
||||
y = self.pool2d_max(y)
|
||||
for block in self.block_list:
|
||||
y = block(y)
|
||||
y = self.pool2d_avg(y)
|
||||
y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels])
|
||||
y = self.out(y)
|
||||
return y
|
||||
|
||||
|
||||
def ResNeXt50_32x4d(**args):
|
||||
|
|
Loading…
Reference in New Issue