2018-08-15 05:04:10 +08:00
|
|
|
from __future__ import absolute_import
|
|
|
|
from __future__ import division
|
|
|
|
|
|
|
|
import torch
|
|
|
|
import torch.nn as nn
|
|
|
|
|
|
|
|
|
2018-10-30 07:00:17 +08:00
|
|
|
class CrossEntropyLoss(nn.Module):
|
2019-03-22 08:14:41 +08:00
|
|
|
r"""Cross entropy loss with label smoothing regularizer.
|
2018-08-15 05:04:10 +08:00
|
|
|
|
|
|
|
Reference:
|
2019-03-22 08:14:41 +08:00
|
|
|
Szegedy et al. Rethinking the Inception Architecture for Computer Vision. CVPR 2016.
|
|
|
|
|
|
|
|
With label smoothing, the label :math:`y` for a class is computed by
|
2018-10-30 07:00:17 +08:00
|
|
|
|
2019-03-22 08:14:41 +08:00
|
|
|
.. math::
|
|
|
|
\begin{equation}
|
|
|
|
(1 - \epsilon) \times y + \frac{\epsilon}{K},
|
|
|
|
\end{equation}
|
|
|
|
|
|
|
|
where :math:`K` denotes the number of classes and :math:`\epsilon` is a weight. When
|
|
|
|
:math:`\epsilon = 0`, the loss function reduces to the normal cross entropy.
|
2018-08-15 05:04:10 +08:00
|
|
|
|
|
|
|
Args:
|
2019-03-22 08:14:41 +08:00
|
|
|
num_classes (int): number of classes.
|
|
|
|
epsilon (float, optional): weight. Default is 0.1.
|
|
|
|
use_gpu (bool, optional): whether to use gpu devices. Default is True.
|
|
|
|
label_smooth (bool, optional): whether to apply label smoothing. Default is True.
|
2018-08-15 05:04:10 +08:00
|
|
|
"""
|
2019-03-09 20:53:23 +08:00
|
|
|
|
2018-10-30 07:00:17 +08:00
|
|
|
def __init__(self, num_classes, epsilon=0.1, use_gpu=True, label_smooth=True):
|
|
|
|
super(CrossEntropyLoss, self).__init__()
|
2018-08-15 05:04:10 +08:00
|
|
|
self.num_classes = num_classes
|
2018-10-30 07:00:17 +08:00
|
|
|
self.epsilon = epsilon if label_smooth else 0
|
2018-08-15 05:04:10 +08:00
|
|
|
self.use_gpu = use_gpu
|
|
|
|
self.logsoftmax = nn.LogSoftmax(dim=1)
|
|
|
|
|
|
|
|
def forward(self, inputs, targets):
|
|
|
|
"""
|
|
|
|
Args:
|
2019-03-22 08:14:41 +08:00
|
|
|
inputs (torch.Tensor): prediction matrix (before softmax) with
|
|
|
|
shape (batch_size, num_classes).
|
2019-04-17 16:13:04 +08:00
|
|
|
targets (torch.LongTensor): ground truth labels with shape (batch_size).
|
|
|
|
Each position contains the label index.
|
2018-08-15 05:04:10 +08:00
|
|
|
"""
|
|
|
|
log_probs = self.logsoftmax(inputs)
|
|
|
|
targets = torch.zeros(log_probs.size()).scatter_(1, targets.unsqueeze(1).data.cpu(), 1)
|
|
|
|
if self.use_gpu: targets = targets.cuda()
|
|
|
|
targets = (1 - self.epsilon) * targets + self.epsilon / self.num_classes
|
2019-03-16 07:17:38 +08:00
|
|
|
return (- targets * log_probs).mean(0).sum()
|