Merge pull request #185 from littletomatodonkey/dyg_ls
Add label smooth support for dygraphpull/218/head
commit
5d3fe63f6f
|
@ -13,3 +13,6 @@
|
|||
# limitations under the License.
|
||||
|
||||
from .resnet_name import *
|
||||
from .dpn import DPN68
|
||||
from .densenet import DenseNet121
|
||||
from .hrnet import HRNet_W18_C
|
|
@ -1,3 +1,21 @@
|
|||
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import paddle
|
||||
import paddle.fluid as fluid
|
||||
|
@ -268,26 +286,26 @@ class DenseNet(fluid.dygraph.Layer):
|
|||
return y
|
||||
|
||||
|
||||
def DenseNet121():
|
||||
model = DenseNet(layers=121)
|
||||
def DenseNet121(**args):
|
||||
model = DenseNet(layers=121, **args)
|
||||
return model
|
||||
|
||||
|
||||
def DenseNet161():
|
||||
model = DenseNet(layers=161)
|
||||
def DenseNet161(**args):
|
||||
model = DenseNet(layers=161, **args)
|
||||
return model
|
||||
|
||||
|
||||
def DenseNet169():
|
||||
model = DenseNet(layers=169)
|
||||
def DenseNet169(**args):
|
||||
model = DenseNet(layers=169, **args)
|
||||
return model
|
||||
|
||||
|
||||
def DenseNet201():
|
||||
model = DenseNet(layers=201)
|
||||
def DenseNet201(**args):
|
||||
model = DenseNet(layers=201, **args)
|
||||
return model
|
||||
|
||||
|
||||
def DenseNet264():
|
||||
model = DenseNet(layers=264)
|
||||
def DenseNet264(**args):
|
||||
model = DenseNet(layers=264, **args)
|
||||
return model
|
||||
|
|
|
@ -1,3 +1,21 @@
|
|||
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import sys
|
||||
import paddle
|
||||
|
@ -386,26 +404,26 @@ class DPN(fluid.dygraph.Layer):
|
|||
return net_arg
|
||||
|
||||
|
||||
def DPN68():
|
||||
model = DPN(layers=68)
|
||||
def DPN68(**args):
|
||||
model = DPN(layers=68, **args)
|
||||
return model
|
||||
|
||||
|
||||
def DPN92():
|
||||
model = DPN(layers=92)
|
||||
def DPN92(**args):
|
||||
model = DPN(layers=92, **args)
|
||||
return model
|
||||
|
||||
|
||||
def DPN98():
|
||||
model = DPN(layers=98)
|
||||
def DPN98(**args):
|
||||
model = DPN(layers=98, **args)
|
||||
return model
|
||||
|
||||
|
||||
def DPN107():
|
||||
model = DPN(layers=107)
|
||||
def DPN107(**args):
|
||||
model = DPN(layers=107, **args)
|
||||
return model
|
||||
|
||||
|
||||
def DPN131():
|
||||
model = DPN(layers=131)
|
||||
def DPN131(**args):
|
||||
model = DPN(layers=131, **args)
|
||||
return model
|
||||
|
|
|
@ -1,3 +1,21 @@
|
|||
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import paddle
|
||||
import paddle.fluid as fluid
|
||||
|
@ -647,81 +665,81 @@ class HRNet(fluid.dygraph.Layer):
|
|||
return y
|
||||
|
||||
|
||||
def HRNet_W18_C():
|
||||
model = HRNet(width=18)
|
||||
def HRNet_W18_C(**args):
|
||||
model = HRNet(width=18, **args)
|
||||
return model
|
||||
|
||||
|
||||
def HRNet_W30_C():
|
||||
model = HRNet(width=30)
|
||||
def HRNet_W30_C(**args):
|
||||
model = HRNet(width=30, **args)
|
||||
return model
|
||||
|
||||
|
||||
def HRNet_W32_C():
|
||||
model = HRNet(width=32)
|
||||
def HRNet_W32_C(**args):
|
||||
model = HRNet(width=32, **args)
|
||||
return model
|
||||
|
||||
|
||||
def HRNet_W40_C():
|
||||
model = HRNet(width=40)
|
||||
def HRNet_W40_C(**args):
|
||||
model = HRNet(width=40, **args)
|
||||
return model
|
||||
|
||||
|
||||
def HRNet_W44_C():
|
||||
model = HRNet(width=44)
|
||||
def HRNet_W44_C(**args):
|
||||
model = HRNet(width=44, **args)
|
||||
return model
|
||||
|
||||
|
||||
def HRNet_W48_C():
|
||||
model = HRNet(width=48)
|
||||
def HRNet_W48_C(**args):
|
||||
model = HRNet(width=48, **args)
|
||||
return model
|
||||
|
||||
|
||||
def HRNet_W60_C():
|
||||
model = HRNet(width=60)
|
||||
def HRNet_W60_C(**args):
|
||||
model = HRNet(width=60, **args)
|
||||
return model
|
||||
|
||||
|
||||
def HRNet_W64_C():
|
||||
model = HRNet(width=64)
|
||||
def HRNet_W64_C(**args):
|
||||
model = HRNet(width=64, **args)
|
||||
return model
|
||||
|
||||
|
||||
def SE_HRNet_W18_C():
|
||||
model = HRNet(width=18, has_se=True)
|
||||
def SE_HRNet_W18_C(**args):
|
||||
model = HRNet(width=18, has_se=True, **args)
|
||||
return model
|
||||
|
||||
|
||||
def SE_HRNet_W30_C():
|
||||
model = HRNet(width=30, has_se=True)
|
||||
def SE_HRNet_W30_C(**args):
|
||||
model = HRNet(width=30, has_se=True, **args)
|
||||
return model
|
||||
|
||||
|
||||
def SE_HRNet_W32_C():
|
||||
model = HRNet(width=32, has_se=True)
|
||||
def SE_HRNet_W32_C(**args):
|
||||
model = HRNet(width=32, has_se=True, **args)
|
||||
return model
|
||||
|
||||
|
||||
def SE_HRNet_W40_C():
|
||||
model = HRNet(width=40, has_se=True)
|
||||
def SE_HRNet_W40_C(**args):
|
||||
model = HRNet(width=40, has_se=True, **args)
|
||||
return model
|
||||
|
||||
|
||||
def SE_HRNet_W44_C():
|
||||
model = HRNet(width=44, has_se=True)
|
||||
def SE_HRNet_W44_C(**args):
|
||||
model = HRNet(width=44, has_se=True, **args)
|
||||
return model
|
||||
|
||||
|
||||
def SE_HRNet_W48_C():
|
||||
model = HRNet(width=48, has_se=True)
|
||||
def SE_HRNet_W48_C(**args):
|
||||
model = HRNet(width=48, has_se=True, **args)
|
||||
return model
|
||||
|
||||
|
||||
def SE_HRNet_W60_C():
|
||||
model = HRNet(width=60, has_se=True)
|
||||
def SE_HRNet_W60_C(**args):
|
||||
model = HRNet(width=60, has_se=True, **args)
|
||||
return model
|
||||
|
||||
|
||||
def SE_HRNet_W64_C():
|
||||
model = HRNet(width=64, has_se=True)
|
||||
def SE_HRNet_W64_C(**args):
|
||||
model = HRNet(width=64, has_se=True, **args)
|
||||
return model
|
||||
|
|
132
tools/program.py
132
tools/program.py
|
@ -49,11 +49,9 @@ def create_dataloader():
|
|||
dataloader(fluid dataloader):
|
||||
"""
|
||||
trainer_num = int(os.environ.get('PADDLE_TRAINERS_NUM', 1))
|
||||
capacity = 64 if trainer_num <= 1 else 8
|
||||
capacity = 64 if trainer_num == 1 else 8
|
||||
dataloader = fluid.io.DataLoader.from_generator(
|
||||
capacity=capacity,
|
||||
use_double_buffer=True,
|
||||
iterable=True)
|
||||
capacity=capacity, use_double_buffer=True, iterable=True)
|
||||
|
||||
return dataloader
|
||||
|
||||
|
@ -76,8 +74,8 @@ def create_model(architecture, classes_num):
|
|||
return architectures.__dict__[name](class_dim=classes_num, **params)
|
||||
|
||||
|
||||
def create_loss(out,
|
||||
label,
|
||||
def create_loss(feeds,
|
||||
out,
|
||||
architecture,
|
||||
classes_num=1000,
|
||||
epsilon=None,
|
||||
|
@ -106,7 +104,7 @@ def create_loss(out,
|
|||
if architecture["name"] == "GoogLeNet":
|
||||
assert len(out) == 3, "GoogLeNet should have 3 outputs"
|
||||
loss = GoogLeNetLoss(class_dim=classes_num, epsilon=epsilon)
|
||||
return loss(out[0], out[1], out[2], label)
|
||||
return loss(out[0], out[1], out[2], feeds["label"])
|
||||
|
||||
if use_distillation:
|
||||
assert len(out) == 2, ("distillation output length must be 2, "
|
||||
|
@ -116,14 +114,13 @@ def create_loss(out,
|
|||
|
||||
if use_mix:
|
||||
loss = MixCELoss(class_dim=classes_num, epsilon=epsilon)
|
||||
raise NotImplementedError
|
||||
#feed_y_a = feeds['feed_y_a']
|
||||
#feed_y_b = feeds['feed_y_b']
|
||||
#feed_lam = feeds['feed_lam']
|
||||
#return loss(out, feed_y_a, feed_y_b, feed_lam)
|
||||
feed_y_a = feeds['y_a']
|
||||
feed_y_b = feeds['y_b']
|
||||
feed_lam = feeds['lam']
|
||||
return loss(out, feed_y_a, feed_y_b, feed_lam)
|
||||
else:
|
||||
loss = CELoss(class_dim=classes_num, epsilon=epsilon)
|
||||
return loss(out, label)
|
||||
return loss(out, feeds["label"])
|
||||
|
||||
|
||||
def create_metric(out,
|
||||
|
@ -166,14 +163,7 @@ def create_metric(out,
|
|||
return fetchs
|
||||
|
||||
|
||||
def create_fetchs(out,
|
||||
label,
|
||||
architecture,
|
||||
topk=5,
|
||||
classes_num=1000,
|
||||
epsilon=None,
|
||||
use_mix=False,
|
||||
use_distillation=False):
|
||||
def create_fetchs(feeds, net, config, mode="train"):
|
||||
"""
|
||||
Create fetchs as model outputs(included loss and measures),
|
||||
will call create_loss and create_metric(if use_mix).
|
||||
|
@ -192,12 +182,21 @@ def create_fetchs(out,
|
|||
Returns:
|
||||
fetchs(dict): dict of model outputs(included loss and measures)
|
||||
"""
|
||||
architecture = config.ARCHITECTURE
|
||||
topk = config.topk
|
||||
classes_num = config.classes_num
|
||||
epsilon = config.get('ls_epsilon')
|
||||
use_mix = config.get('use_mix') and mode == 'train'
|
||||
use_distillation = config.get('use_distillation')
|
||||
|
||||
out = net(feeds["image"])
|
||||
|
||||
fetchs = OrderedDict()
|
||||
fetchs['loss'] = create_loss(out, label, architecture, classes_num, epsilon, use_mix,
|
||||
use_distillation)
|
||||
fetchs['loss'] = create_loss(feeds, out, architecture, classes_num,
|
||||
epsilon, use_mix, use_distillation)
|
||||
if not use_mix:
|
||||
metric = create_metric(out, label, architecture, topk, classes_num,
|
||||
use_distillation)
|
||||
metric = create_metric(out, feeds["label"], architecture, topk,
|
||||
classes_num, use_distillation)
|
||||
fetchs.update(metric)
|
||||
|
||||
return fetchs
|
||||
|
@ -278,36 +277,17 @@ def mixed_precision_optimizer(config, optimizer):
|
|||
return optimizer
|
||||
|
||||
|
||||
def compute(config, out, label, mode='train'):
|
||||
"""
|
||||
Build a program using a model and an optimizer
|
||||
1. create feeds
|
||||
2. create a dataloader
|
||||
3. create a model
|
||||
4. create fetchs
|
||||
5. create an optimizer
|
||||
|
||||
Args:
|
||||
config(dict): config
|
||||
main_prog(): main program
|
||||
startup_prog(): startup program
|
||||
is_train(bool): train or valid
|
||||
|
||||
Returns:
|
||||
dataloader(): a bridge between the model and the data
|
||||
fetchs(dict): dict of model outputs(included loss and measures)
|
||||
"""
|
||||
fetchs = create_fetchs(
|
||||
out,
|
||||
label,
|
||||
config.ARCHITECTURE,
|
||||
config.topk,
|
||||
config.classes_num,
|
||||
epsilon=config.get('ls_epsilon'),
|
||||
use_mix=config.get('use_mix') and mode == 'train',
|
||||
use_distillation=config.get('use_distillation'))
|
||||
|
||||
return fetchs
|
||||
def create_feeds(batch, use_mix):
|
||||
image = to_variable(batch[0].numpy().astype("float32"))
|
||||
if use_mix:
|
||||
y_a = to_variable(batch[1].numpy().astype("int64").reshape(-1, 1))
|
||||
y_b = to_variable(batch[2].numpy().astype("int64").reshape(-1, 1))
|
||||
lam = to_variable(batch[3].numpy().astype("float32").reshape(-1, 1))
|
||||
feeds = {"image": image, "y_a": y_a, "y_b": y_b, "lam": lam}
|
||||
else:
|
||||
label = to_variable(batch[1].numpy().astype('int64').reshape(-1, 1))
|
||||
feeds = {"image": image, "label": label}
|
||||
return feeds
|
||||
|
||||
|
||||
def run(dataloader, config, net, optimizer=None, epoch=0, mode='train'):
|
||||
|
@ -324,19 +304,30 @@ def run(dataloader, config, net, optimizer=None, epoch=0, mode='train'):
|
|||
|
||||
Returns:
|
||||
"""
|
||||
topk_name = 'top{}'.format(config.topk)
|
||||
metric_list = OrderedDict([
|
||||
("loss", AverageMeter('loss', '7.4f')),
|
||||
("top1", AverageMeter('top1', '.4f')),
|
||||
(topk_name, AverageMeter(topk_name, '.4f')),
|
||||
("lr", AverageMeter('lr', 'f', need_avg=False)),
|
||||
("batch_time", AverageMeter('elapse', '.3f')),
|
||||
])
|
||||
use_mix = config.get("use_mix", False) and mode == "train"
|
||||
if use_mix:
|
||||
metric_list = OrderedDict([
|
||||
("loss", AverageMeter('loss', '7.4f')),
|
||||
("lr", AverageMeter(
|
||||
'lr', 'f', need_avg=False)),
|
||||
("batch_time", AverageMeter('elapse', '.3f')),
|
||||
])
|
||||
else:
|
||||
topk_name = 'top{}'.format(config.topk)
|
||||
metric_list = OrderedDict([
|
||||
("loss", AverageMeter('loss', '7.4f')),
|
||||
("top1", AverageMeter('top1', '.4f')),
|
||||
(topk_name, AverageMeter(topk_name, '.4f')),
|
||||
("lr", AverageMeter(
|
||||
'lr', 'f', need_avg=False)),
|
||||
("batch_time", AverageMeter('elapse', '.3f')),
|
||||
])
|
||||
|
||||
tic = time.time()
|
||||
for idx, (img, label) in enumerate(dataloader()):
|
||||
label = to_variable(label.numpy().astype('int64').reshape(-1, 1))
|
||||
fetchs = compute(config, net(img), label, mode)
|
||||
for idx, batch in enumerate(dataloader()):
|
||||
batch_size = len(batch[0])
|
||||
feeds = create_feeds(batch, use_mix)
|
||||
fetchs = create_fetchs(feeds, net, config, mode)
|
||||
if mode == 'train':
|
||||
avg_loss = net.scale_loss(fetchs['loss'])
|
||||
avg_loss.backward()
|
||||
|
@ -345,10 +336,10 @@ def run(dataloader, config, net, optimizer=None, epoch=0, mode='train'):
|
|||
optimizer.minimize(avg_loss)
|
||||
net.clear_gradients()
|
||||
metric_list['lr'].update(
|
||||
optimizer._global_learning_rate().numpy()[0], len(img))
|
||||
optimizer._global_learning_rate().numpy()[0], batch_size)
|
||||
|
||||
for name, fetch in fetchs.items():
|
||||
metric_list[name].update(fetch.numpy()[0], len(img))
|
||||
metric_list[name].update(fetch.numpy()[0], batch_size)
|
||||
metric_list['batch_time'].update(time.time() - tic)
|
||||
tic = time.time()
|
||||
|
||||
|
@ -365,7 +356,8 @@ def run(dataloader, config, net, optimizer=None, epoch=0, mode='train'):
|
|||
logger.coloring(step_str, "PURPLE"),
|
||||
logger.coloring(fetchs_str, 'OKGREEN')))
|
||||
|
||||
end_str = ' '.join([str(m.mean) for m in metric_list.values()] + [metric_list['batch_time'].total])
|
||||
end_str = ' '.join([str(m.mean) for m in metric_list.values()] +
|
||||
[metric_list['batch_time'].total])
|
||||
if mode == 'eval':
|
||||
logger.info("END {:s} {:s}s".format(mode, end_str))
|
||||
else:
|
||||
|
@ -378,4 +370,4 @@ def run(dataloader, config, net, optimizer=None, epoch=0, mode='train'):
|
|||
|
||||
# return top1_acc in order to save the best model
|
||||
if mode == 'valid':
|
||||
return metric_list['top1'].avg
|
||||
return metric_list['top1'].avg
|
||||
|
|
Loading…
Reference in New Issue