mirror of https://github.com/hero-y/BHRL
184 lines
7.0 KiB
Python
184 lines
7.0 KiB
Python
import mmcv
|
|
import torch
|
|
|
|
from mmdet.core import bbox_overlaps
|
|
|
|
|
|
@mmcv.jit(derivate=True, coderize=True)
|
|
def isr_p(cls_score,
|
|
bbox_pred,
|
|
bbox_targets,
|
|
rois,
|
|
sampling_results,
|
|
loss_cls,
|
|
bbox_coder,
|
|
k=2,
|
|
bias=0,
|
|
num_class=80):
|
|
"""Importance-based Sample Reweighting (ISR_P), positive part.
|
|
|
|
Args:
|
|
cls_score (Tensor): Predicted classification scores.
|
|
bbox_pred (Tensor): Predicted bbox deltas.
|
|
bbox_targets (tuple[Tensor]): A tuple of bbox targets, the are
|
|
labels, label_weights, bbox_targets, bbox_weights, respectively.
|
|
rois (Tensor): Anchors (single_stage) in shape (n, 4) or RoIs
|
|
(two_stage) in shape (n, 5).
|
|
sampling_results (obj): Sampling results.
|
|
loss_cls (func): Classification loss func of the head.
|
|
bbox_coder (obj): BBox coder of the head.
|
|
k (float): Power of the non-linear mapping.
|
|
bias (float): Shift of the non-linear mapping.
|
|
num_class (int): Number of classes, default: 80.
|
|
|
|
Return:
|
|
tuple([Tensor]): labels, imp_based_label_weights, bbox_targets,
|
|
bbox_target_weights
|
|
"""
|
|
|
|
labels, label_weights, bbox_targets, bbox_weights = bbox_targets
|
|
pos_label_inds = ((labels >= 0) &
|
|
(labels < num_class)).nonzero().reshape(-1)
|
|
pos_labels = labels[pos_label_inds]
|
|
|
|
# if no positive samples, return the original targets
|
|
num_pos = float(pos_label_inds.size(0))
|
|
if num_pos == 0:
|
|
return labels, label_weights, bbox_targets, bbox_weights
|
|
|
|
# merge pos_assigned_gt_inds of per image to a single tensor
|
|
gts = list()
|
|
last_max_gt = 0
|
|
for i in range(len(sampling_results)):
|
|
gt_i = sampling_results[i].pos_assigned_gt_inds
|
|
gts.append(gt_i + last_max_gt)
|
|
if len(gt_i) != 0:
|
|
last_max_gt = gt_i.max() + 1
|
|
gts = torch.cat(gts)
|
|
assert len(gts) == num_pos
|
|
|
|
cls_score = cls_score.detach()
|
|
bbox_pred = bbox_pred.detach()
|
|
|
|
# For single stage detectors, rois here indicate anchors, in shape (N, 4)
|
|
# For two stage detectors, rois are in shape (N, 5)
|
|
if rois.size(-1) == 5:
|
|
pos_rois = rois[pos_label_inds][:, 1:]
|
|
else:
|
|
pos_rois = rois[pos_label_inds]
|
|
|
|
if bbox_pred.size(-1) > 4:
|
|
bbox_pred = bbox_pred.view(bbox_pred.size(0), -1, 4)
|
|
pos_delta_pred = bbox_pred[pos_label_inds, pos_labels].view(-1, 4)
|
|
else:
|
|
pos_delta_pred = bbox_pred[pos_label_inds].view(-1, 4)
|
|
|
|
# compute iou of the predicted bbox and the corresponding GT
|
|
pos_delta_target = bbox_targets[pos_label_inds].view(-1, 4)
|
|
pos_bbox_pred = bbox_coder.decode(pos_rois, pos_delta_pred)
|
|
target_bbox_pred = bbox_coder.decode(pos_rois, pos_delta_target)
|
|
ious = bbox_overlaps(pos_bbox_pred, target_bbox_pred, is_aligned=True)
|
|
|
|
pos_imp_weights = label_weights[pos_label_inds]
|
|
# Two steps to compute IoU-HLR. Samples are first sorted by IoU locally,
|
|
# then sorted again within the same-rank group
|
|
max_l_num = pos_labels.bincount().max()
|
|
for label in pos_labels.unique():
|
|
l_inds = (pos_labels == label).nonzero().view(-1)
|
|
l_gts = gts[l_inds]
|
|
for t in l_gts.unique():
|
|
t_inds = l_inds[l_gts == t]
|
|
t_ious = ious[t_inds]
|
|
_, t_iou_rank_idx = t_ious.sort(descending=True)
|
|
_, t_iou_rank = t_iou_rank_idx.sort()
|
|
ious[t_inds] += max_l_num - t_iou_rank.float()
|
|
l_ious = ious[l_inds]
|
|
_, l_iou_rank_idx = l_ious.sort(descending=True)
|
|
_, l_iou_rank = l_iou_rank_idx.sort() # IoU-HLR
|
|
# linearly map HLR to label weights
|
|
pos_imp_weights[l_inds] *= (max_l_num - l_iou_rank.float()) / max_l_num
|
|
|
|
pos_imp_weights = (bias + pos_imp_weights * (1 - bias)).pow(k)
|
|
|
|
# normalize to make the new weighted loss value equal to the original loss
|
|
pos_loss_cls = loss_cls(
|
|
cls_score[pos_label_inds], pos_labels, reduction_override='none')
|
|
if pos_loss_cls.dim() > 1:
|
|
ori_pos_loss_cls = pos_loss_cls * label_weights[pos_label_inds][:,
|
|
None]
|
|
new_pos_loss_cls = pos_loss_cls * pos_imp_weights[:, None]
|
|
else:
|
|
ori_pos_loss_cls = pos_loss_cls * label_weights[pos_label_inds]
|
|
new_pos_loss_cls = pos_loss_cls * pos_imp_weights
|
|
pos_loss_cls_ratio = ori_pos_loss_cls.sum() / new_pos_loss_cls.sum()
|
|
pos_imp_weights = pos_imp_weights * pos_loss_cls_ratio
|
|
label_weights[pos_label_inds] = pos_imp_weights
|
|
|
|
bbox_targets = labels, label_weights, bbox_targets, bbox_weights
|
|
return bbox_targets
|
|
|
|
|
|
@mmcv.jit(derivate=True, coderize=True)
|
|
def carl_loss(cls_score,
|
|
labels,
|
|
bbox_pred,
|
|
bbox_targets,
|
|
loss_bbox,
|
|
k=1,
|
|
bias=0.2,
|
|
avg_factor=None,
|
|
sigmoid=False,
|
|
num_class=80):
|
|
"""Classification-Aware Regression Loss (CARL).
|
|
|
|
Args:
|
|
cls_score (Tensor): Predicted classification scores.
|
|
labels (Tensor): Targets of classification.
|
|
bbox_pred (Tensor): Predicted bbox deltas.
|
|
bbox_targets (Tensor): Target of bbox regression.
|
|
loss_bbox (func): Regression loss func of the head.
|
|
bbox_coder (obj): BBox coder of the head.
|
|
k (float): Power of the non-linear mapping.
|
|
bias (float): Shift of the non-linear mapping.
|
|
avg_factor (int): Average factor used in regression loss.
|
|
sigmoid (bool): Activation of the classification score.
|
|
num_class (int): Number of classes, default: 80.
|
|
|
|
Return:
|
|
dict: CARL loss dict.
|
|
"""
|
|
pos_label_inds = ((labels >= 0) &
|
|
(labels < num_class)).nonzero().reshape(-1)
|
|
if pos_label_inds.numel() == 0:
|
|
return dict(loss_carl=cls_score.sum()[None] * 0.)
|
|
pos_labels = labels[pos_label_inds]
|
|
|
|
# multiply pos_cls_score with the corresponding bbox weight
|
|
# and remain gradient
|
|
if sigmoid:
|
|
pos_cls_score = cls_score.sigmoid()[pos_label_inds, pos_labels]
|
|
else:
|
|
pos_cls_score = cls_score.softmax(-1)[pos_label_inds, pos_labels]
|
|
carl_loss_weights = (bias + (1 - bias) * pos_cls_score).pow(k)
|
|
|
|
# normalize carl_loss_weight to make its sum equal to num positive
|
|
num_pos = float(pos_cls_score.size(0))
|
|
weight_ratio = num_pos / carl_loss_weights.sum()
|
|
carl_loss_weights *= weight_ratio
|
|
|
|
if avg_factor is None:
|
|
avg_factor = bbox_targets.size(0)
|
|
# if is class agnostic, bbox pred is in shape (N, 4)
|
|
# otherwise, bbox pred is in shape (N, #classes, 4)
|
|
if bbox_pred.size(-1) > 4:
|
|
bbox_pred = bbox_pred.view(bbox_pred.size(0), -1, 4)
|
|
pos_bbox_preds = bbox_pred[pos_label_inds, pos_labels]
|
|
else:
|
|
pos_bbox_preds = bbox_pred[pos_label_inds]
|
|
ori_loss_reg = loss_bbox(
|
|
pos_bbox_preds,
|
|
bbox_targets[pos_label_inds],
|
|
reduction_override='none') / avg_factor
|
|
loss_carl = (ori_loss_reg * carl_loss_weights[:, None]).sum()
|
|
return dict(loss_carl=loss_carl[None])
|