PaddleOCR/ppocr/losses/det_pse_loss.py

159 lines
5.5 KiB
Python
Raw Normal View History

2021-11-04 19:11:28 +08:00
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
2021-11-04 18:23:23 +08:00
"""
This code is refer from:
https://github.com/whai362/PSENet/blob/python3/models/head/psenet_head.py
"""
2021-09-01 12:41:37 +08:00
2021-07-27 15:33:05 +08:00
import paddle
from paddle import nn
from paddle.nn import functional as F
import numpy as np
from ppocr.utils.iou import iou
class PSELoss(nn.Layer):
def __init__(
self,
alpha,
ohem_ratio=3,
kernel_sample_mask="pred",
reduction="sum",
eps=1e-6,
**kwargs
):
"""Implement PSE Loss."""
2021-07-27 15:33:05 +08:00
super(PSELoss, self).__init__()
assert reduction in ["sum", "mean", "none"]
2021-07-27 15:33:05 +08:00
self.alpha = alpha
self.ohem_ratio = ohem_ratio
self.kernel_sample_mask = kernel_sample_mask
self.reduction = reduction
2021-09-01 12:43:11 +08:00
self.eps = eps
2021-07-27 15:33:05 +08:00
def forward(self, outputs, labels):
predicts = outputs["maps"]
2021-07-27 15:33:05 +08:00
predicts = F.interpolate(predicts, scale_factor=4)
texts = predicts[:, 0, :, :]
kernels = predicts[:, 1:, :, :]
gt_texts, gt_kernels, training_masks = labels[1:]
# text loss
selected_masks = self.ohem_batch(texts, gt_texts, training_masks)
loss_text = self.dice_loss(texts, gt_texts, selected_masks)
iou_text = iou(
(texts > 0).astype("int64"), gt_texts, training_masks, reduce=False
)
2021-09-01 12:41:37 +08:00
losses = dict(loss_text=loss_text, iou_text=iou_text)
2021-07-27 15:33:05 +08:00
# kernel loss
loss_kernels = []
if self.kernel_sample_mask == "gt":
2021-07-27 15:33:05 +08:00
selected_masks = gt_texts * training_masks
elif self.kernel_sample_mask == "pred":
selected_masks = (F.sigmoid(texts) > 0.5).astype("float32") * training_masks
2021-07-27 15:33:05 +08:00
for i in range(kernels.shape[1]):
kernel_i = kernels[:, i, :, :]
gt_kernel_i = gt_kernels[:, i, :, :]
loss_kernel_i = self.dice_loss(kernel_i, gt_kernel_i, selected_masks)
2021-07-27 15:33:05 +08:00
loss_kernels.append(loss_kernel_i)
loss_kernels = paddle.mean(paddle.stack(loss_kernels, axis=1), axis=1)
iou_kernel = iou(
(kernels[:, -1, :, :] > 0).astype("int64"),
gt_kernels[:, -1, :, :],
training_masks * gt_texts,
reduce=False,
)
2021-09-01 12:41:37 +08:00
losses.update(dict(loss_kernels=loss_kernels, iou_kernel=iou_kernel))
2021-07-27 15:33:05 +08:00
loss = self.alpha * loss_text + (1 - self.alpha) * loss_kernels
losses["loss"] = loss
if self.reduction == "sum":
2021-07-27 15:33:05 +08:00
losses = {x: paddle.sum(v) for x, v in losses.items()}
elif self.reduction == "mean":
2021-07-27 15:33:05 +08:00
losses = {x: paddle.mean(v) for x, v in losses.items()}
return losses
def dice_loss(self, input, target, mask):
input = F.sigmoid(input)
input = input.reshape([input.shape[0], -1])
target = target.reshape([target.shape[0], -1])
mask = mask.reshape([mask.shape[0], -1])
input = input * mask
target = target * mask
a = paddle.sum(input * target, 1)
2021-09-01 12:43:11 +08:00
b = paddle.sum(input * input, 1) + self.eps
c = paddle.sum(target * target, 1) + self.eps
2021-07-27 15:33:05 +08:00
d = (2 * a) / (b + c)
return 1 - d
def ohem_single(self, score, gt_text, training_mask, ohem_ratio=3):
pos_num = int(paddle.sum((gt_text > 0.5).astype("float32"))) - int(
2021-09-01 12:41:37 +08:00
paddle.sum(
paddle.logical_and((gt_text > 0.5), (training_mask <= 0.5)).astype(
"float32"
)
)
)
2021-07-27 15:33:05 +08:00
if pos_num == 0:
selected_mask = training_mask
2021-09-01 12:41:37 +08:00
selected_mask = selected_mask.reshape(
[1, selected_mask.shape[0], selected_mask.shape[1]]
).astype("float32")
2021-07-27 15:33:05 +08:00
return selected_mask
neg_num = int(paddle.sum((gt_text <= 0.5).astype("float32")))
2021-07-27 15:33:05 +08:00
neg_num = int(min(pos_num * ohem_ratio, neg_num))
if neg_num == 0:
selected_mask = training_mask
2022-03-28 13:05:07 +08:00
selected_mask = selected_mask.reshape(
[1, selected_mask.shape[0], selected_mask.shape[1]]
).astype("float32")
2021-07-27 15:33:05 +08:00
return selected_mask
neg_score = paddle.masked_select(score, gt_text <= 0.5)
neg_score_sorted = paddle.sort(-neg_score)
threshold = -neg_score_sorted[neg_num - 1]
2021-09-01 12:41:37 +08:00
selected_mask = paddle.logical_and(
paddle.logical_or((score >= threshold), (gt_text > 0.5)),
(training_mask > 0.5),
)
2021-09-01 12:41:37 +08:00
selected_mask = selected_mask.reshape(
[1, selected_mask.shape[0], selected_mask.shape[1]]
).astype("float32")
2021-07-27 15:33:05 +08:00
return selected_mask
def ohem_batch(self, scores, gt_texts, training_masks, ohem_ratio=3):
selected_masks = []
for i in range(scores.shape[0]):
selected_masks.append(
self.ohem_single(
scores[i, :, :],
gt_texts[i, :, :],
training_masks[i, :, :],
ohem_ratio,
)
)
selected_masks = paddle.concat(selected_masks, 0).astype("float32")
2021-09-01 12:41:37 +08:00
return selected_masks