1312 lines
48 KiB
Python
1312 lines
48 KiB
Python
import mmcv
|
|
import numpy as np
|
|
import torch
|
|
|
|
from mmdet.core import bbox2roi, build_assigner, build_sampler
|
|
from mmdet.core.evaluation.bbox_overlaps import bbox_overlaps
|
|
from mmdet.models.dense_heads import (AnchorHead, CornerHead, FCOSHead,
|
|
FSAFHead, GuidedAnchorHead, PAAHead,
|
|
SABLRetinaHead, TransformerHead,
|
|
VFNetHead, YOLACTHead, YOLACTProtonet,
|
|
YOLACTSegmHead, paa_head)
|
|
from mmdet.models.dense_heads.paa_head import levels_to_images
|
|
from mmdet.models.roi_heads.bbox_heads import BBoxHead, SABLHead
|
|
from mmdet.models.roi_heads.mask_heads import FCNMaskHead, MaskIoUHead
|
|
|
|
|
|
def test_paa_head_loss():
|
|
"""Tests paa head loss when truth is empty and non-empty."""
|
|
|
|
class mock_skm(object):
|
|
|
|
def GaussianMixture(self, *args, **kwargs):
|
|
return self
|
|
|
|
def fit(self, loss):
|
|
pass
|
|
|
|
def predict(self, loss):
|
|
components = np.zeros_like(loss, dtype=np.long)
|
|
return components.reshape(-1)
|
|
|
|
def score_samples(self, loss):
|
|
scores = np.random.random(len(loss))
|
|
return scores
|
|
|
|
paa_head.skm = mock_skm()
|
|
|
|
s = 256
|
|
img_metas = [{
|
|
'img_shape': (s, s, 3),
|
|
'scale_factor': 1,
|
|
'pad_shape': (s, s, 3)
|
|
}]
|
|
train_cfg = mmcv.Config(
|
|
dict(
|
|
assigner=dict(
|
|
type='MaxIoUAssigner',
|
|
pos_iou_thr=0.1,
|
|
neg_iou_thr=0.1,
|
|
min_pos_iou=0,
|
|
ignore_iof_thr=-1),
|
|
allowed_border=-1,
|
|
pos_weight=-1,
|
|
debug=False))
|
|
# since Focal Loss is not supported on CPU
|
|
self = PAAHead(
|
|
num_classes=4,
|
|
in_channels=1,
|
|
train_cfg=train_cfg,
|
|
loss_cls=dict(
|
|
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
|
|
loss_bbox=dict(type='GIoULoss', loss_weight=1.3),
|
|
loss_centerness=dict(
|
|
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.5))
|
|
feat = [
|
|
torch.rand(1, 1, s // feat_size, s // feat_size)
|
|
for feat_size in [4, 8, 16, 32, 64]
|
|
]
|
|
self.init_weights()
|
|
cls_scores, bbox_preds, iou_preds = self(feat)
|
|
# Test that empty ground truth encourages the network to predict background
|
|
gt_bboxes = [torch.empty((0, 4))]
|
|
gt_labels = [torch.LongTensor([])]
|
|
gt_bboxes_ignore = None
|
|
empty_gt_losses = self.loss(cls_scores, bbox_preds, iou_preds, gt_bboxes,
|
|
gt_labels, img_metas, gt_bboxes_ignore)
|
|
# When there is no truth, the cls loss should be nonzero but there should
|
|
# be no box loss.
|
|
empty_cls_loss = empty_gt_losses['loss_cls']
|
|
empty_box_loss = empty_gt_losses['loss_bbox']
|
|
empty_iou_loss = empty_gt_losses['loss_iou']
|
|
assert empty_cls_loss.item() > 0, 'cls loss should be non-zero'
|
|
assert empty_box_loss.item() == 0, (
|
|
'there should be no box loss when there are no true boxes')
|
|
assert empty_iou_loss.item() == 0, (
|
|
'there should be no box loss when there are no true boxes')
|
|
|
|
# When truth is non-empty then both cls and box loss should be nonzero for
|
|
# random inputs
|
|
gt_bboxes = [
|
|
torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]),
|
|
]
|
|
gt_labels = [torch.LongTensor([2])]
|
|
one_gt_losses = self.loss(cls_scores, bbox_preds, iou_preds, gt_bboxes,
|
|
gt_labels, img_metas, gt_bboxes_ignore)
|
|
onegt_cls_loss = one_gt_losses['loss_cls']
|
|
onegt_box_loss = one_gt_losses['loss_bbox']
|
|
onegt_iou_loss = one_gt_losses['loss_iou']
|
|
assert onegt_cls_loss.item() > 0, 'cls loss should be non-zero'
|
|
assert onegt_box_loss.item() > 0, 'box loss should be non-zero'
|
|
assert onegt_iou_loss.item() > 0, 'box loss should be non-zero'
|
|
n, c, h, w = 10, 4, 20, 20
|
|
mlvl_tensor = [torch.ones(n, c, h, w) for i in range(5)]
|
|
results = levels_to_images(mlvl_tensor)
|
|
assert len(results) == n
|
|
assert results[0].size() == (h * w * 5, c)
|
|
assert self.with_score_voting
|
|
cls_scores = [torch.ones(4, 5, 5)]
|
|
bbox_preds = [torch.ones(4, 5, 5)]
|
|
iou_preds = [torch.ones(1, 5, 5)]
|
|
mlvl_anchors = [torch.ones(5 * 5, 4)]
|
|
img_shape = None
|
|
scale_factor = [0.5, 0.5]
|
|
cfg = mmcv.Config(
|
|
dict(
|
|
nms_pre=1000,
|
|
min_bbox_size=0,
|
|
score_thr=0.05,
|
|
nms=dict(type='nms', iou_threshold=0.6),
|
|
max_per_img=100))
|
|
rescale = False
|
|
self._get_bboxes_single(
|
|
cls_scores,
|
|
bbox_preds,
|
|
iou_preds,
|
|
mlvl_anchors,
|
|
img_shape,
|
|
scale_factor,
|
|
cfg,
|
|
rescale=rescale)
|
|
|
|
|
|
def test_fcos_head_loss():
|
|
"""Tests fcos head loss when truth is empty and non-empty."""
|
|
s = 256
|
|
img_metas = [{
|
|
'img_shape': (s, s, 3),
|
|
'scale_factor': 1,
|
|
'pad_shape': (s, s, 3)
|
|
}]
|
|
train_cfg = mmcv.Config(
|
|
dict(
|
|
assigner=dict(
|
|
type='MaxIoUAssigner',
|
|
pos_iou_thr=0.5,
|
|
neg_iou_thr=0.4,
|
|
min_pos_iou=0,
|
|
ignore_iof_thr=-1),
|
|
allowed_border=-1,
|
|
pos_weight=-1,
|
|
debug=False))
|
|
# since Focal Loss is not supported on CPU
|
|
self = FCOSHead(
|
|
num_classes=4,
|
|
in_channels=1,
|
|
train_cfg=train_cfg,
|
|
loss_cls=dict(
|
|
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0))
|
|
feat = [
|
|
torch.rand(1, 1, s // feat_size, s // feat_size)
|
|
for feat_size in [4, 8, 16, 32, 64]
|
|
]
|
|
cls_scores, bbox_preds, centerness = self.forward(feat)
|
|
# Test that empty ground truth encourages the network to predict background
|
|
gt_bboxes = [torch.empty((0, 4))]
|
|
gt_labels = [torch.LongTensor([])]
|
|
gt_bboxes_ignore = None
|
|
empty_gt_losses = self.loss(cls_scores, bbox_preds, centerness, gt_bboxes,
|
|
gt_labels, img_metas, gt_bboxes_ignore)
|
|
# When there is no truth, the cls loss should be nonzero but there should
|
|
# be no box loss.
|
|
empty_cls_loss = empty_gt_losses['loss_cls']
|
|
empty_box_loss = empty_gt_losses['loss_bbox']
|
|
assert empty_cls_loss.item() > 0, 'cls loss should be non-zero'
|
|
assert empty_box_loss.item() == 0, (
|
|
'there should be no box loss when there are no true boxes')
|
|
|
|
# When truth is non-empty then both cls and box loss should be nonzero for
|
|
# random inputs
|
|
gt_bboxes = [
|
|
torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]),
|
|
]
|
|
gt_labels = [torch.LongTensor([2])]
|
|
one_gt_losses = self.loss(cls_scores, bbox_preds, centerness, gt_bboxes,
|
|
gt_labels, img_metas, gt_bboxes_ignore)
|
|
onegt_cls_loss = one_gt_losses['loss_cls']
|
|
onegt_box_loss = one_gt_losses['loss_bbox']
|
|
assert onegt_cls_loss.item() > 0, 'cls loss should be non-zero'
|
|
assert onegt_box_loss.item() > 0, 'box loss should be non-zero'
|
|
|
|
|
|
def test_vfnet_head_loss():
|
|
"""Tests vfnet head loss when truth is empty and non-empty."""
|
|
s = 256
|
|
img_metas = [{
|
|
'img_shape': (s, s, 3),
|
|
'scale_factor': 1,
|
|
'pad_shape': (s, s, 3)
|
|
}]
|
|
train_cfg = mmcv.Config(
|
|
dict(
|
|
assigner=dict(type='ATSSAssigner', topk=9),
|
|
allowed_border=-1,
|
|
pos_weight=-1,
|
|
debug=False))
|
|
# since Focal Loss is not supported on CPU
|
|
self = VFNetHead(
|
|
num_classes=4,
|
|
in_channels=1,
|
|
train_cfg=train_cfg,
|
|
loss_cls=dict(type='VarifocalLoss', use_sigmoid=True, loss_weight=1.0))
|
|
if torch.cuda.is_available():
|
|
self.cuda()
|
|
feat = [
|
|
torch.rand(1, 1, s // feat_size, s // feat_size).cuda()
|
|
for feat_size in [4, 8, 16, 32, 64]
|
|
]
|
|
cls_scores, bbox_preds, bbox_preds_refine = self.forward(feat)
|
|
# Test that empty ground truth encourages the network to predict
|
|
# background
|
|
gt_bboxes = [torch.empty((0, 4)).cuda()]
|
|
gt_labels = [torch.LongTensor([]).cuda()]
|
|
gt_bboxes_ignore = None
|
|
empty_gt_losses = self.loss(cls_scores, bbox_preds, bbox_preds_refine,
|
|
gt_bboxes, gt_labels, img_metas,
|
|
gt_bboxes_ignore)
|
|
# When there is no truth, the cls loss should be nonzero but there
|
|
# should be no box loss.
|
|
empty_cls_loss = empty_gt_losses['loss_cls']
|
|
empty_box_loss = empty_gt_losses['loss_bbox']
|
|
assert empty_cls_loss.item() > 0, 'cls loss should be non-zero'
|
|
assert empty_box_loss.item() == 0, (
|
|
'there should be no box loss when there are no true boxes')
|
|
|
|
# When truth is non-empty then both cls and box loss should be nonzero
|
|
# for random inputs
|
|
gt_bboxes = [
|
|
torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]).cuda(),
|
|
]
|
|
gt_labels = [torch.LongTensor([2]).cuda()]
|
|
one_gt_losses = self.loss(cls_scores, bbox_preds, bbox_preds_refine,
|
|
gt_bboxes, gt_labels, img_metas,
|
|
gt_bboxes_ignore)
|
|
onegt_cls_loss = one_gt_losses['loss_cls']
|
|
onegt_box_loss = one_gt_losses['loss_bbox']
|
|
assert onegt_cls_loss.item() > 0, 'cls loss should be non-zero'
|
|
assert onegt_box_loss.item() > 0, 'box loss should be non-zero'
|
|
|
|
|
|
def test_anchor_head_loss():
|
|
"""Tests anchor head loss when truth is empty and non-empty."""
|
|
s = 256
|
|
img_metas = [{
|
|
'img_shape': (s, s, 3),
|
|
'scale_factor': 1,
|
|
'pad_shape': (s, s, 3)
|
|
}]
|
|
|
|
cfg = mmcv.Config(
|
|
dict(
|
|
assigner=dict(
|
|
type='MaxIoUAssigner',
|
|
pos_iou_thr=0.7,
|
|
neg_iou_thr=0.3,
|
|
min_pos_iou=0.3,
|
|
match_low_quality=True,
|
|
ignore_iof_thr=-1),
|
|
sampler=dict(
|
|
type='RandomSampler',
|
|
num=256,
|
|
pos_fraction=0.5,
|
|
neg_pos_ub=-1,
|
|
add_gt_as_proposals=False),
|
|
allowed_border=0,
|
|
pos_weight=-1,
|
|
debug=False))
|
|
self = AnchorHead(num_classes=4, in_channels=1, train_cfg=cfg)
|
|
|
|
# Anchor head expects a multiple levels of features per image
|
|
feat = [
|
|
torch.rand(1, 1, s // (2**(i + 2)), s // (2**(i + 2)))
|
|
for i in range(len(self.anchor_generator.strides))
|
|
]
|
|
cls_scores, bbox_preds = self.forward(feat)
|
|
|
|
# Test that empty ground truth encourages the network to predict background
|
|
gt_bboxes = [torch.empty((0, 4))]
|
|
gt_labels = [torch.LongTensor([])]
|
|
|
|
gt_bboxes_ignore = None
|
|
empty_gt_losses = self.loss(cls_scores, bbox_preds, gt_bboxes, gt_labels,
|
|
img_metas, gt_bboxes_ignore)
|
|
# When there is no truth, the cls loss should be nonzero but there should
|
|
# be no box loss.
|
|
empty_cls_loss = sum(empty_gt_losses['loss_cls'])
|
|
empty_box_loss = sum(empty_gt_losses['loss_bbox'])
|
|
assert empty_cls_loss.item() > 0, 'cls loss should be non-zero'
|
|
assert empty_box_loss.item() == 0, (
|
|
'there should be no box loss when there are no true boxes')
|
|
|
|
# When truth is non-empty then both cls and box loss should be nonzero for
|
|
# random inputs
|
|
gt_bboxes = [
|
|
torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]),
|
|
]
|
|
gt_labels = [torch.LongTensor([2])]
|
|
one_gt_losses = self.loss(cls_scores, bbox_preds, gt_bboxes, gt_labels,
|
|
img_metas, gt_bboxes_ignore)
|
|
onegt_cls_loss = sum(one_gt_losses['loss_cls'])
|
|
onegt_box_loss = sum(one_gt_losses['loss_bbox'])
|
|
assert onegt_cls_loss.item() > 0, 'cls loss should be non-zero'
|
|
assert onegt_box_loss.item() > 0, 'box loss should be non-zero'
|
|
|
|
|
|
def test_fsaf_head_loss():
|
|
"""Tests anchor head loss when truth is empty and non-empty."""
|
|
s = 256
|
|
img_metas = [{
|
|
'img_shape': (s, s, 3),
|
|
'scale_factor': 1,
|
|
'pad_shape': (s, s, 3)
|
|
}]
|
|
|
|
cfg = dict(
|
|
reg_decoded_bbox=True,
|
|
anchor_generator=dict(
|
|
type='AnchorGenerator',
|
|
octave_base_scale=1,
|
|
scales_per_octave=1,
|
|
ratios=[1.0],
|
|
strides=[8, 16, 32, 64, 128]),
|
|
bbox_coder=dict(type='TBLRBBoxCoder', normalizer=4.0),
|
|
loss_cls=dict(
|
|
type='FocalLoss',
|
|
use_sigmoid=True,
|
|
gamma=2.0,
|
|
alpha=0.25,
|
|
loss_weight=1.0,
|
|
reduction='none'),
|
|
loss_bbox=dict(
|
|
type='IoULoss', eps=1e-6, loss_weight=1.0, reduction='none'))
|
|
|
|
train_cfg = mmcv.Config(
|
|
dict(
|
|
assigner=dict(
|
|
type='CenterRegionAssigner',
|
|
pos_scale=0.2,
|
|
neg_scale=0.2,
|
|
min_pos_iof=0.01),
|
|
allowed_border=-1,
|
|
pos_weight=-1,
|
|
debug=False))
|
|
head = FSAFHead(num_classes=4, in_channels=1, train_cfg=train_cfg, **cfg)
|
|
if torch.cuda.is_available():
|
|
head.cuda()
|
|
# FSAF head expects a multiple levels of features per image
|
|
feat = [
|
|
torch.rand(1, 1, s // (2**(i + 2)), s // (2**(i + 2))).cuda()
|
|
for i in range(len(head.anchor_generator.strides))
|
|
]
|
|
cls_scores, bbox_preds = head.forward(feat)
|
|
gt_bboxes_ignore = None
|
|
|
|
# When truth is non-empty then both cls and box loss should be nonzero
|
|
# for random inputs
|
|
gt_bboxes = [
|
|
torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]).cuda(),
|
|
]
|
|
gt_labels = [torch.LongTensor([2]).cuda()]
|
|
one_gt_losses = head.loss(cls_scores, bbox_preds, gt_bboxes, gt_labels,
|
|
img_metas, gt_bboxes_ignore)
|
|
onegt_cls_loss = sum(one_gt_losses['loss_cls'])
|
|
onegt_box_loss = sum(one_gt_losses['loss_bbox'])
|
|
assert onegt_cls_loss.item() > 0, 'cls loss should be non-zero'
|
|
assert onegt_box_loss.item() > 0, 'box loss should be non-zero'
|
|
|
|
# Test that empty ground truth encourages the network to predict bkg
|
|
gt_bboxes = [torch.empty((0, 4)).cuda()]
|
|
gt_labels = [torch.LongTensor([]).cuda()]
|
|
|
|
empty_gt_losses = head.loss(cls_scores, bbox_preds, gt_bboxes,
|
|
gt_labels, img_metas, gt_bboxes_ignore)
|
|
# When there is no truth, the cls loss should be nonzero but there
|
|
# should be no box loss.
|
|
empty_cls_loss = sum(empty_gt_losses['loss_cls'])
|
|
empty_box_loss = sum(empty_gt_losses['loss_bbox'])
|
|
assert empty_cls_loss.item() > 0, 'cls loss should be non-zero'
|
|
assert empty_box_loss.item() == 0, (
|
|
'there should be no box loss when there are no true boxes')
|
|
|
|
|
|
def test_ga_anchor_head_loss():
|
|
"""Tests anchor head loss when truth is empty and non-empty."""
|
|
s = 256
|
|
img_metas = [{
|
|
'img_shape': (s, s, 3),
|
|
'scale_factor': 1,
|
|
'pad_shape': (s, s, 3)
|
|
}]
|
|
|
|
cfg = mmcv.Config(
|
|
dict(
|
|
assigner=dict(
|
|
type='MaxIoUAssigner',
|
|
pos_iou_thr=0.7,
|
|
neg_iou_thr=0.3,
|
|
min_pos_iou=0.3,
|
|
match_low_quality=True,
|
|
ignore_iof_thr=-1),
|
|
sampler=dict(
|
|
type='RandomSampler',
|
|
num=256,
|
|
pos_fraction=0.5,
|
|
neg_pos_ub=-1,
|
|
add_gt_as_proposals=False),
|
|
ga_assigner=dict(
|
|
type='ApproxMaxIoUAssigner',
|
|
pos_iou_thr=0.7,
|
|
neg_iou_thr=0.3,
|
|
min_pos_iou=0.3,
|
|
ignore_iof_thr=-1),
|
|
ga_sampler=dict(
|
|
type='RandomSampler',
|
|
num=256,
|
|
pos_fraction=0.5,
|
|
neg_pos_ub=-1,
|
|
add_gt_as_proposals=False),
|
|
allowed_border=-1,
|
|
center_ratio=0.2,
|
|
ignore_ratio=0.5,
|
|
pos_weight=-1,
|
|
debug=False))
|
|
head = GuidedAnchorHead(num_classes=4, in_channels=4, train_cfg=cfg)
|
|
|
|
# Anchor head expects a multiple levels of features per image
|
|
if torch.cuda.is_available():
|
|
head.cuda()
|
|
feat = [
|
|
torch.rand(1, 4, s // (2**(i + 2)), s // (2**(i + 2))).cuda()
|
|
for i in range(len(head.approx_anchor_generator.base_anchors))
|
|
]
|
|
cls_scores, bbox_preds, shape_preds, loc_preds = head.forward(feat)
|
|
|
|
# Test that empty ground truth encourages the network to predict
|
|
# background
|
|
gt_bboxes = [torch.empty((0, 4)).cuda()]
|
|
gt_labels = [torch.LongTensor([]).cuda()]
|
|
|
|
gt_bboxes_ignore = None
|
|
|
|
empty_gt_losses = head.loss(cls_scores, bbox_preds, shape_preds,
|
|
loc_preds, gt_bboxes, gt_labels, img_metas,
|
|
gt_bboxes_ignore)
|
|
|
|
# When there is no truth, the cls loss should be nonzero but there
|
|
# should be no box loss.
|
|
empty_cls_loss = sum(empty_gt_losses['loss_cls'])
|
|
empty_box_loss = sum(empty_gt_losses['loss_bbox'])
|
|
assert empty_cls_loss.item() > 0, 'cls loss should be non-zero'
|
|
assert empty_box_loss.item() == 0, (
|
|
'there should be no box loss when there are no true boxes')
|
|
|
|
# When truth is non-empty then both cls and box loss should be nonzero
|
|
# for random inputs
|
|
gt_bboxes = [
|
|
torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]).cuda(),
|
|
]
|
|
gt_labels = [torch.LongTensor([2]).cuda()]
|
|
one_gt_losses = head.loss(cls_scores, bbox_preds, shape_preds,
|
|
loc_preds, gt_bboxes, gt_labels, img_metas,
|
|
gt_bboxes_ignore)
|
|
onegt_cls_loss = sum(one_gt_losses['loss_cls'])
|
|
onegt_box_loss = sum(one_gt_losses['loss_bbox'])
|
|
assert onegt_cls_loss.item() > 0, 'cls loss should be non-zero'
|
|
assert onegt_box_loss.item() > 0, 'box loss should be non-zero'
|
|
|
|
|
|
def test_bbox_head_loss():
|
|
"""Tests bbox head loss when truth is empty and non-empty."""
|
|
self = BBoxHead(in_channels=8, roi_feat_size=3)
|
|
|
|
# Dummy proposals
|
|
proposal_list = [
|
|
torch.Tensor([[23.6667, 23.8757, 228.6326, 153.8874]]),
|
|
]
|
|
|
|
target_cfg = mmcv.Config(dict(pos_weight=1))
|
|
|
|
# Test bbox loss when truth is empty
|
|
gt_bboxes = [torch.empty((0, 4))]
|
|
gt_labels = [torch.LongTensor([])]
|
|
|
|
sampling_results = _dummy_bbox_sampling(proposal_list, gt_bboxes,
|
|
gt_labels)
|
|
|
|
bbox_targets = self.get_targets(sampling_results, gt_bboxes, gt_labels,
|
|
target_cfg)
|
|
labels, label_weights, bbox_targets, bbox_weights = bbox_targets
|
|
|
|
# Create dummy features "extracted" for each sampled bbox
|
|
num_sampled = sum(len(res.bboxes) for res in sampling_results)
|
|
rois = bbox2roi([res.bboxes for res in sampling_results])
|
|
dummy_feats = torch.rand(num_sampled, 8 * 3 * 3)
|
|
cls_scores, bbox_preds = self.forward(dummy_feats)
|
|
|
|
losses = self.loss(cls_scores, bbox_preds, rois, labels, label_weights,
|
|
bbox_targets, bbox_weights)
|
|
assert losses.get('loss_cls', 0) > 0, 'cls-loss should be non-zero'
|
|
assert losses.get('loss_bbox', 0) == 0, 'empty gt loss should be zero'
|
|
|
|
# Test bbox loss when truth is non-empty
|
|
gt_bboxes = [
|
|
torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]),
|
|
]
|
|
gt_labels = [torch.LongTensor([2])]
|
|
|
|
sampling_results = _dummy_bbox_sampling(proposal_list, gt_bboxes,
|
|
gt_labels)
|
|
rois = bbox2roi([res.bboxes for res in sampling_results])
|
|
|
|
bbox_targets = self.get_targets(sampling_results, gt_bboxes, gt_labels,
|
|
target_cfg)
|
|
labels, label_weights, bbox_targets, bbox_weights = bbox_targets
|
|
|
|
# Create dummy features "extracted" for each sampled bbox
|
|
num_sampled = sum(len(res.bboxes) for res in sampling_results)
|
|
dummy_feats = torch.rand(num_sampled, 8 * 3 * 3)
|
|
cls_scores, bbox_preds = self.forward(dummy_feats)
|
|
|
|
losses = self.loss(cls_scores, bbox_preds, rois, labels, label_weights,
|
|
bbox_targets, bbox_weights)
|
|
assert losses.get('loss_cls', 0) > 0, 'cls-loss should be non-zero'
|
|
assert losses.get('loss_bbox', 0) > 0, 'box-loss should be non-zero'
|
|
|
|
|
|
def test_sabl_bbox_head_loss():
|
|
"""Tests bbox head loss when truth is empty and non-empty."""
|
|
self = SABLHead(
|
|
num_classes=4,
|
|
cls_in_channels=3,
|
|
reg_in_channels=3,
|
|
cls_out_channels=3,
|
|
reg_offset_out_channels=3,
|
|
reg_cls_out_channels=3,
|
|
roi_feat_size=7)
|
|
|
|
# Dummy proposals
|
|
proposal_list = [
|
|
torch.Tensor([[23.6667, 23.8757, 228.6326, 153.8874]]),
|
|
]
|
|
|
|
target_cfg = mmcv.Config(dict(pos_weight=1))
|
|
|
|
# Test bbox loss when truth is empty
|
|
gt_bboxes = [torch.empty((0, 4))]
|
|
gt_labels = [torch.LongTensor([])]
|
|
|
|
sampling_results = _dummy_bbox_sampling(proposal_list, gt_bboxes,
|
|
gt_labels)
|
|
|
|
bbox_targets = self.get_targets(sampling_results, gt_bboxes, gt_labels,
|
|
target_cfg)
|
|
labels, label_weights, bbox_targets, bbox_weights = bbox_targets
|
|
|
|
# Create dummy features "extracted" for each sampled bbox
|
|
num_sampled = sum(len(res.bboxes) for res in sampling_results)
|
|
rois = bbox2roi([res.bboxes for res in sampling_results])
|
|
dummy_feats = torch.rand(num_sampled, 3, 7, 7)
|
|
cls_scores, bbox_preds = self.forward(dummy_feats)
|
|
|
|
losses = self.loss(cls_scores, bbox_preds, rois, labels, label_weights,
|
|
bbox_targets, bbox_weights)
|
|
assert losses.get('loss_cls', 0) > 0, 'cls-loss should be non-zero'
|
|
assert losses.get('loss_bbox_cls',
|
|
0) == 0, 'empty gt bbox-cls-loss should be zero'
|
|
assert losses.get('loss_bbox_reg',
|
|
0) == 0, 'empty gt bbox-reg-loss should be zero'
|
|
|
|
# Test bbox loss when truth is non-empty
|
|
gt_bboxes = [
|
|
torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]),
|
|
]
|
|
gt_labels = [torch.LongTensor([2])]
|
|
|
|
sampling_results = _dummy_bbox_sampling(proposal_list, gt_bboxes,
|
|
gt_labels)
|
|
rois = bbox2roi([res.bboxes for res in sampling_results])
|
|
|
|
bbox_targets = self.get_targets(sampling_results, gt_bboxes, gt_labels,
|
|
target_cfg)
|
|
labels, label_weights, bbox_targets, bbox_weights = bbox_targets
|
|
|
|
# Create dummy features "extracted" for each sampled bbox
|
|
num_sampled = sum(len(res.bboxes) for res in sampling_results)
|
|
dummy_feats = torch.rand(num_sampled, 3, 7, 7)
|
|
cls_scores, bbox_preds = self.forward(dummy_feats)
|
|
|
|
losses = self.loss(cls_scores, bbox_preds, rois, labels, label_weights,
|
|
bbox_targets, bbox_weights)
|
|
assert losses.get('loss_bbox_cls',
|
|
0) > 0, 'empty gt bbox-cls-loss should be zero'
|
|
assert losses.get('loss_bbox_reg',
|
|
0) > 0, 'empty gt bbox-reg-loss should be zero'
|
|
|
|
|
|
def test_sabl_retina_head_loss():
|
|
"""Tests anchor head loss when truth is empty and non-empty."""
|
|
s = 256
|
|
img_metas = [{
|
|
'img_shape': (s, s, 3),
|
|
'scale_factor': 1,
|
|
'pad_shape': (s, s, 3)
|
|
}]
|
|
|
|
cfg = mmcv.Config(
|
|
dict(
|
|
assigner=dict(
|
|
type='ApproxMaxIoUAssigner',
|
|
pos_iou_thr=0.5,
|
|
neg_iou_thr=0.4,
|
|
min_pos_iou=0.0,
|
|
ignore_iof_thr=-1),
|
|
allowed_border=-1,
|
|
pos_weight=-1,
|
|
debug=False))
|
|
head = SABLRetinaHead(
|
|
num_classes=4,
|
|
in_channels=3,
|
|
feat_channels=10,
|
|
loss_cls=dict(
|
|
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
|
|
train_cfg=cfg)
|
|
if torch.cuda.is_available():
|
|
head.cuda()
|
|
# Anchor head expects a multiple levels of features per image
|
|
feat = [
|
|
torch.rand(1, 3, s // (2**(i + 2)), s // (2**(i + 2))).cuda()
|
|
for i in range(len(head.approx_anchor_generator.base_anchors))
|
|
]
|
|
cls_scores, bbox_preds = head.forward(feat)
|
|
|
|
# Test that empty ground truth encourages the network
|
|
# to predict background
|
|
gt_bboxes = [torch.empty((0, 4)).cuda()]
|
|
gt_labels = [torch.LongTensor([]).cuda()]
|
|
|
|
gt_bboxes_ignore = None
|
|
empty_gt_losses = head.loss(cls_scores, bbox_preds, gt_bboxes,
|
|
gt_labels, img_metas, gt_bboxes_ignore)
|
|
# When there is no truth, the cls loss should be nonzero but there
|
|
# should be no box loss.
|
|
empty_cls_loss = sum(empty_gt_losses['loss_cls'])
|
|
empty_box_cls_loss = sum(empty_gt_losses['loss_bbox_cls'])
|
|
empty_box_reg_loss = sum(empty_gt_losses['loss_bbox_reg'])
|
|
assert empty_cls_loss.item() > 0, 'cls loss should be non-zero'
|
|
assert empty_box_cls_loss.item() == 0, (
|
|
'there should be no box cls loss when there are no true boxes')
|
|
assert empty_box_reg_loss.item() == 0, (
|
|
'there should be no box reg loss when there are no true boxes')
|
|
|
|
# When truth is non-empty then both cls and box loss should
|
|
# be nonzero for random inputs
|
|
gt_bboxes = [
|
|
torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]).cuda(),
|
|
]
|
|
gt_labels = [torch.LongTensor([2]).cuda()]
|
|
one_gt_losses = head.loss(cls_scores, bbox_preds, gt_bboxes, gt_labels,
|
|
img_metas, gt_bboxes_ignore)
|
|
onegt_cls_loss = sum(one_gt_losses['loss_cls'])
|
|
onegt_box_cls_loss = sum(one_gt_losses['loss_bbox_cls'])
|
|
onegt_box_reg_loss = sum(one_gt_losses['loss_bbox_reg'])
|
|
assert onegt_cls_loss.item() > 0, 'cls loss should be non-zero'
|
|
assert onegt_box_cls_loss.item() > 0, 'box loss cls should be non-zero'
|
|
assert onegt_box_reg_loss.item() > 0, 'box loss reg should be non-zero'
|
|
|
|
|
|
def test_refine_boxes():
|
|
"""Mirrors the doctest in
|
|
``mmdet.models.bbox_heads.bbox_head.BBoxHead.refine_boxes`` but checks for
|
|
multiple values of n_roi / n_img."""
|
|
self = BBoxHead(reg_class_agnostic=True)
|
|
|
|
test_settings = [
|
|
|
|
# Corner case: less rois than images
|
|
{
|
|
'n_roi': 2,
|
|
'n_img': 4,
|
|
'rng': 34285940
|
|
},
|
|
|
|
# Corner case: no images
|
|
{
|
|
'n_roi': 0,
|
|
'n_img': 0,
|
|
'rng': 52925222
|
|
},
|
|
|
|
# Corner cases: few images / rois
|
|
{
|
|
'n_roi': 1,
|
|
'n_img': 1,
|
|
'rng': 1200281
|
|
},
|
|
{
|
|
'n_roi': 2,
|
|
'n_img': 1,
|
|
'rng': 1200282
|
|
},
|
|
{
|
|
'n_roi': 2,
|
|
'n_img': 2,
|
|
'rng': 1200283
|
|
},
|
|
{
|
|
'n_roi': 1,
|
|
'n_img': 2,
|
|
'rng': 1200284
|
|
},
|
|
|
|
# Corner case: no rois few images
|
|
{
|
|
'n_roi': 0,
|
|
'n_img': 1,
|
|
'rng': 23955860
|
|
},
|
|
{
|
|
'n_roi': 0,
|
|
'n_img': 2,
|
|
'rng': 25830516
|
|
},
|
|
|
|
# Corner case: no rois many images
|
|
{
|
|
'n_roi': 0,
|
|
'n_img': 10,
|
|
'rng': 671346
|
|
},
|
|
{
|
|
'n_roi': 0,
|
|
'n_img': 20,
|
|
'rng': 699807
|
|
},
|
|
|
|
# Corner case: cal_similarity num rois and images
|
|
{
|
|
'n_roi': 20,
|
|
'n_img': 20,
|
|
'rng': 1200238
|
|
},
|
|
{
|
|
'n_roi': 10,
|
|
'n_img': 20,
|
|
'rng': 1200238
|
|
},
|
|
{
|
|
'n_roi': 5,
|
|
'n_img': 5,
|
|
'rng': 1200238
|
|
},
|
|
|
|
# ----------------------------------
|
|
# Common case: more rois than images
|
|
{
|
|
'n_roi': 100,
|
|
'n_img': 1,
|
|
'rng': 337156
|
|
},
|
|
{
|
|
'n_roi': 150,
|
|
'n_img': 2,
|
|
'rng': 275898
|
|
},
|
|
{
|
|
'n_roi': 500,
|
|
'n_img': 5,
|
|
'rng': 4903221
|
|
},
|
|
]
|
|
|
|
for demokw in test_settings:
|
|
try:
|
|
n_roi = demokw['n_roi']
|
|
n_img = demokw['n_img']
|
|
rng = demokw['rng']
|
|
|
|
print(f'Test refine_boxes case: {demokw!r}')
|
|
tup = _demodata_refine_boxes(n_roi, n_img, rng=rng)
|
|
rois, labels, bbox_preds, pos_is_gts, img_metas = tup
|
|
bboxes_list = self.refine_bboxes(rois, labels, bbox_preds,
|
|
pos_is_gts, img_metas)
|
|
assert len(bboxes_list) == n_img
|
|
assert sum(map(len, bboxes_list)) <= n_roi
|
|
assert all(b.shape[1] == 4 for b in bboxes_list)
|
|
except Exception:
|
|
print(f'Test failed with demokw={demokw!r}')
|
|
raise
|
|
|
|
|
|
def _demodata_refine_boxes(n_roi, n_img, rng=0):
|
|
"""Create random test data for the
|
|
``mmdet.models.bbox_heads.bbox_head.BBoxHead.refine_boxes`` method."""
|
|
import numpy as np
|
|
from mmdet.core.bbox.demodata import random_boxes
|
|
from mmdet.core.bbox.demodata import ensure_rng
|
|
try:
|
|
import kwarray
|
|
except ImportError:
|
|
import pytest
|
|
pytest.skip('kwarray is required for this test')
|
|
scale = 512
|
|
rng = ensure_rng(rng)
|
|
img_metas = [{'img_shape': (scale, scale)} for _ in range(n_img)]
|
|
# Create rois in the expected format
|
|
roi_boxes = random_boxes(n_roi, scale=scale, rng=rng)
|
|
if n_img == 0:
|
|
assert n_roi == 0, 'cannot have any rois if there are no images'
|
|
img_ids = torch.empty((0, ), dtype=torch.long)
|
|
roi_boxes = torch.empty((0, 4), dtype=torch.float32)
|
|
else:
|
|
img_ids = rng.randint(0, n_img, (n_roi, ))
|
|
img_ids = torch.from_numpy(img_ids)
|
|
rois = torch.cat([img_ids[:, None].float(), roi_boxes], dim=1)
|
|
# Create other args
|
|
labels = rng.randint(0, 2, (n_roi, ))
|
|
labels = torch.from_numpy(labels).long()
|
|
bbox_preds = random_boxes(n_roi, scale=scale, rng=rng)
|
|
# For each image, pretend random positive boxes are gts
|
|
is_label_pos = (labels.numpy() > 0).astype(np.int)
|
|
lbl_per_img = kwarray.group_items(is_label_pos, img_ids.numpy())
|
|
pos_per_img = [sum(lbl_per_img.get(gid, [])) for gid in range(n_img)]
|
|
# randomly generate with numpy then sort with torch
|
|
_pos_is_gts = [
|
|
rng.randint(0, 2, (npos, )).astype(np.uint8) for npos in pos_per_img
|
|
]
|
|
pos_is_gts = [
|
|
torch.from_numpy(p).sort(descending=True)[0] for p in _pos_is_gts
|
|
]
|
|
return rois, labels, bbox_preds, pos_is_gts, img_metas
|
|
|
|
|
|
def test_mask_head_loss():
|
|
"""Test mask head loss when mask target is empty."""
|
|
self = FCNMaskHead(
|
|
num_convs=1,
|
|
roi_feat_size=6,
|
|
in_channels=8,
|
|
conv_out_channels=8,
|
|
num_classes=8)
|
|
|
|
# Dummy proposals
|
|
proposal_list = [
|
|
torch.Tensor([[23.6667, 23.8757, 228.6326, 153.8874]]),
|
|
]
|
|
|
|
gt_bboxes = [
|
|
torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]),
|
|
]
|
|
gt_labels = [torch.LongTensor([2])]
|
|
sampling_results = _dummy_bbox_sampling(proposal_list, gt_bboxes,
|
|
gt_labels)
|
|
|
|
# create dummy mask
|
|
import numpy as np
|
|
from mmdet.core import BitmapMasks
|
|
dummy_mask = np.random.randint(0, 2, (1, 160, 240), dtype=np.uint8)
|
|
gt_masks = [BitmapMasks(dummy_mask, 160, 240)]
|
|
|
|
# create dummy train_cfg
|
|
train_cfg = mmcv.Config(dict(mask_size=12, mask_thr_binary=0.5))
|
|
|
|
# Create dummy features "extracted" for each sampled bbox
|
|
num_sampled = sum(len(res.bboxes) for res in sampling_results)
|
|
dummy_feats = torch.rand(num_sampled, 8, 6, 6)
|
|
|
|
mask_pred = self.forward(dummy_feats)
|
|
mask_targets = self.get_targets(sampling_results, gt_masks, train_cfg)
|
|
pos_labels = torch.cat([res.pos_gt_labels for res in sampling_results])
|
|
loss_mask = self.loss(mask_pred, mask_targets, pos_labels)
|
|
|
|
onegt_mask_loss = sum(loss_mask['loss_mask'])
|
|
assert onegt_mask_loss.item() > 0, 'mask loss should be non-zero'
|
|
|
|
# test mask_iou_head
|
|
mask_iou_head = MaskIoUHead(
|
|
num_convs=1,
|
|
num_fcs=1,
|
|
roi_feat_size=6,
|
|
in_channels=8,
|
|
conv_out_channels=8,
|
|
fc_out_channels=8,
|
|
num_classes=8)
|
|
|
|
pos_mask_pred = mask_pred[range(mask_pred.size(0)), pos_labels]
|
|
mask_iou_pred = mask_iou_head(dummy_feats, pos_mask_pred)
|
|
pos_mask_iou_pred = mask_iou_pred[range(mask_iou_pred.size(0)), pos_labels]
|
|
|
|
mask_iou_targets = mask_iou_head.get_targets(sampling_results, gt_masks,
|
|
pos_mask_pred, mask_targets,
|
|
train_cfg)
|
|
loss_mask_iou = mask_iou_head.loss(pos_mask_iou_pred, mask_iou_targets)
|
|
onegt_mask_iou_loss = loss_mask_iou['loss_mask_iou'].sum()
|
|
assert onegt_mask_iou_loss.item() >= 0
|
|
|
|
|
|
def _dummy_bbox_sampling(proposal_list, gt_bboxes, gt_labels):
|
|
"""Create sample results that can be passed to BBoxHead.get_targets."""
|
|
num_imgs = 1
|
|
feat = torch.rand(1, 1, 3, 3)
|
|
assign_config = dict(
|
|
type='MaxIoUAssigner',
|
|
pos_iou_thr=0.5,
|
|
neg_iou_thr=0.5,
|
|
min_pos_iou=0.5,
|
|
ignore_iof_thr=-1)
|
|
sampler_config = dict(
|
|
type='RandomSampler',
|
|
num=512,
|
|
pos_fraction=0.25,
|
|
neg_pos_ub=-1,
|
|
add_gt_as_proposals=True)
|
|
bbox_assigner = build_assigner(assign_config)
|
|
bbox_sampler = build_sampler(sampler_config)
|
|
gt_bboxes_ignore = [None for _ in range(num_imgs)]
|
|
sampling_results = []
|
|
for i in range(num_imgs):
|
|
assign_result = bbox_assigner.assign(proposal_list[i], gt_bboxes[i],
|
|
gt_bboxes_ignore[i], gt_labels[i])
|
|
sampling_result = bbox_sampler.sample(
|
|
assign_result,
|
|
proposal_list[i],
|
|
gt_bboxes[i],
|
|
gt_labels[i],
|
|
feats=feat)
|
|
sampling_results.append(sampling_result)
|
|
|
|
return sampling_results
|
|
|
|
|
|
def test_corner_head_loss():
|
|
"""Tests corner head loss when truth is empty and non-empty."""
|
|
s = 256
|
|
img_metas = [{
|
|
'img_shape': (s, s, 3),
|
|
'scale_factor': 1,
|
|
'pad_shape': (s, s, 3)
|
|
}]
|
|
|
|
self = CornerHead(num_classes=4, in_channels=1)
|
|
|
|
# Corner head expects a multiple levels of features per image
|
|
feat = [
|
|
torch.rand(1, 1, s // 4, s // 4) for _ in range(self.num_feat_levels)
|
|
]
|
|
tl_heats, br_heats, tl_embs, br_embs, tl_offs, br_offs = self.forward(feat)
|
|
|
|
# Test that empty ground truth encourages the network to predict background
|
|
gt_bboxes = [torch.empty((0, 4))]
|
|
gt_labels = [torch.LongTensor([])]
|
|
|
|
gt_bboxes_ignore = None
|
|
empty_gt_losses = self.loss(tl_heats, br_heats, tl_embs, br_embs, tl_offs,
|
|
br_offs, gt_bboxes, gt_labels, img_metas,
|
|
gt_bboxes_ignore)
|
|
empty_det_loss = sum(empty_gt_losses['det_loss'])
|
|
empty_push_loss = sum(empty_gt_losses['push_loss'])
|
|
empty_pull_loss = sum(empty_gt_losses['pull_loss'])
|
|
empty_off_loss = sum(empty_gt_losses['off_loss'])
|
|
assert empty_det_loss.item() > 0, 'det loss should be non-zero'
|
|
assert empty_push_loss.item() == 0, (
|
|
'there should be no push loss when there are no true boxes')
|
|
assert empty_pull_loss.item() == 0, (
|
|
'there should be no pull loss when there are no true boxes')
|
|
assert empty_off_loss.item() == 0, (
|
|
'there should be no box loss when there are no true boxes')
|
|
|
|
# When truth is non-empty then both cls and box loss should be nonzero for
|
|
# random inputs
|
|
gt_bboxes = [
|
|
torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]),
|
|
]
|
|
gt_labels = [torch.LongTensor([2])]
|
|
one_gt_losses = self.loss(tl_heats, br_heats, tl_embs, br_embs, tl_offs,
|
|
br_offs, gt_bboxes, gt_labels, img_metas,
|
|
gt_bboxes_ignore)
|
|
onegt_det_loss = sum(one_gt_losses['det_loss'])
|
|
onegt_push_loss = sum(one_gt_losses['push_loss'])
|
|
onegt_pull_loss = sum(one_gt_losses['pull_loss'])
|
|
onegt_off_loss = sum(one_gt_losses['off_loss'])
|
|
assert onegt_det_loss.item() > 0, 'det loss should be non-zero'
|
|
assert onegt_push_loss.item() == 0, (
|
|
'there should be no push loss when there are only one true box')
|
|
assert onegt_pull_loss.item() > 0, 'pull loss should be non-zero'
|
|
assert onegt_off_loss.item() > 0, 'off loss should be non-zero'
|
|
|
|
gt_bboxes = [
|
|
torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874],
|
|
[123.6667, 123.8757, 138.6326, 251.8874]]),
|
|
]
|
|
gt_labels = [torch.LongTensor([2, 3])]
|
|
|
|
# equalize the corners' embedding value of different objects to make the
|
|
# push_loss larger than 0
|
|
gt_bboxes_ind = (gt_bboxes[0] // 4).int().tolist()
|
|
for tl_emb_feat, br_emb_feat in zip(tl_embs, br_embs):
|
|
tl_emb_feat[:, :, gt_bboxes_ind[0][1],
|
|
gt_bboxes_ind[0][0]] = tl_emb_feat[:, :,
|
|
gt_bboxes_ind[1][1],
|
|
gt_bboxes_ind[1][0]]
|
|
br_emb_feat[:, :, gt_bboxes_ind[0][3],
|
|
gt_bboxes_ind[0][2]] = br_emb_feat[:, :,
|
|
gt_bboxes_ind[1][3],
|
|
gt_bboxes_ind[1][2]]
|
|
|
|
two_gt_losses = self.loss(tl_heats, br_heats, tl_embs, br_embs, tl_offs,
|
|
br_offs, gt_bboxes, gt_labels, img_metas,
|
|
gt_bboxes_ignore)
|
|
twogt_det_loss = sum(two_gt_losses['det_loss'])
|
|
twogt_push_loss = sum(two_gt_losses['push_loss'])
|
|
twogt_pull_loss = sum(two_gt_losses['pull_loss'])
|
|
twogt_off_loss = sum(two_gt_losses['off_loss'])
|
|
assert twogt_det_loss.item() > 0, 'det loss should be non-zero'
|
|
assert twogt_push_loss.item() > 0, 'push loss should be non-zero'
|
|
assert twogt_pull_loss.item() > 0, 'pull loss should be non-zero'
|
|
assert twogt_off_loss.item() > 0, 'off loss should be non-zero'
|
|
|
|
|
|
def test_corner_head_encode_and_decode_heatmap():
|
|
"""Tests corner head generating and decoding the heatmap."""
|
|
s = 256
|
|
img_metas = [{
|
|
'img_shape': (s, s, 3),
|
|
'scale_factor': 1,
|
|
'pad_shape': (s, s, 3),
|
|
'border': (0, 0, 0, 0)
|
|
}]
|
|
|
|
gt_bboxes = [
|
|
torch.Tensor([[10, 20, 200, 240], [40, 50, 100, 200],
|
|
[10, 20, 200, 240]])
|
|
]
|
|
gt_labels = [torch.LongTensor([1, 1, 2])]
|
|
|
|
self = CornerHead(num_classes=4, in_channels=1, corner_emb_channels=1)
|
|
|
|
feat = [
|
|
torch.rand(1, 1, s // 4, s // 4) for _ in range(self.num_feat_levels)
|
|
]
|
|
|
|
targets = self.get_targets(
|
|
gt_bboxes,
|
|
gt_labels,
|
|
feat[0].shape,
|
|
img_metas[0]['pad_shape'],
|
|
with_corner_emb=self.with_corner_emb)
|
|
|
|
gt_tl_heatmap = targets['topleft_heatmap']
|
|
gt_br_heatmap = targets['bottomright_heatmap']
|
|
gt_tl_offset = targets['topleft_offset']
|
|
gt_br_offset = targets['bottomright_offset']
|
|
embedding = targets['corner_embedding']
|
|
[top, left], [bottom, right] = embedding[0][0]
|
|
gt_tl_embedding_heatmap = torch.zeros([1, 1, s // 4, s // 4])
|
|
gt_br_embedding_heatmap = torch.zeros([1, 1, s // 4, s // 4])
|
|
gt_tl_embedding_heatmap[0, 0, top, left] = 1
|
|
gt_br_embedding_heatmap[0, 0, bottom, right] = 1
|
|
|
|
batch_bboxes, batch_scores, batch_clses = self.decode_heatmap(
|
|
tl_heat=gt_tl_heatmap,
|
|
br_heat=gt_br_heatmap,
|
|
tl_off=gt_tl_offset,
|
|
br_off=gt_br_offset,
|
|
tl_emb=gt_tl_embedding_heatmap,
|
|
br_emb=gt_br_embedding_heatmap,
|
|
img_meta=img_metas[0],
|
|
k=100,
|
|
kernel=3,
|
|
distance_threshold=0.5)
|
|
|
|
bboxes = batch_bboxes.view(-1, 4)
|
|
scores = batch_scores.view(-1, 1)
|
|
clses = batch_clses.view(-1, 1)
|
|
|
|
idx = scores.argsort(dim=0, descending=True)
|
|
bboxes = bboxes[idx].view(-1, 4)
|
|
scores = scores[idx].view(-1)
|
|
clses = clses[idx].view(-1)
|
|
|
|
valid_bboxes = bboxes[torch.where(scores > 0.05)]
|
|
valid_labels = clses[torch.where(scores > 0.05)]
|
|
max_coordinate = valid_bboxes.max()
|
|
offsets = valid_labels.to(valid_bboxes) * (max_coordinate + 1)
|
|
gt_offsets = gt_labels[0].to(gt_bboxes[0]) * (max_coordinate + 1)
|
|
|
|
offset_bboxes = valid_bboxes + offsets[:, None]
|
|
offset_gtbboxes = gt_bboxes[0] + gt_offsets[:, None]
|
|
|
|
iou_matrix = bbox_overlaps(offset_bboxes.numpy(), offset_gtbboxes.numpy())
|
|
assert (iou_matrix == 1).sum() == 3
|
|
|
|
|
|
def test_yolact_head_loss():
|
|
"""Tests yolact head losses when truth is empty and non-empty."""
|
|
s = 550
|
|
img_metas = [{
|
|
'img_shape': (s, s, 3),
|
|
'scale_factor': 1,
|
|
'pad_shape': (s, s, 3)
|
|
}]
|
|
train_cfg = mmcv.Config(
|
|
dict(
|
|
assigner=dict(
|
|
type='MaxIoUAssigner',
|
|
pos_iou_thr=0.5,
|
|
neg_iou_thr=0.4,
|
|
min_pos_iou=0.,
|
|
ignore_iof_thr=-1,
|
|
gt_max_assign_all=False),
|
|
smoothl1_beta=1.,
|
|
allowed_border=-1,
|
|
pos_weight=-1,
|
|
neg_pos_ratio=3,
|
|
debug=False,
|
|
min_gt_box_wh=[4.0, 4.0]))
|
|
bbox_head = YOLACTHead(
|
|
num_classes=80,
|
|
in_channels=256,
|
|
feat_channels=256,
|
|
anchor_generator=dict(
|
|
type='AnchorGenerator',
|
|
octave_base_scale=3,
|
|
scales_per_octave=1,
|
|
base_sizes=[8, 16, 32, 64, 128],
|
|
ratios=[0.5, 1.0, 2.0],
|
|
strides=[550.0 / x for x in [69, 35, 18, 9, 5]],
|
|
centers=[(550 * 0.5 / x, 550 * 0.5 / x)
|
|
for x in [69, 35, 18, 9, 5]]),
|
|
bbox_coder=dict(
|
|
type='DeltaXYWHBBoxCoder',
|
|
target_means=[.0, .0, .0, .0],
|
|
target_stds=[0.1, 0.1, 0.2, 0.2]),
|
|
loss_cls=dict(
|
|
type='CrossEntropyLoss',
|
|
use_sigmoid=False,
|
|
reduction='none',
|
|
loss_weight=1.0),
|
|
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.5),
|
|
num_head_convs=1,
|
|
num_protos=32,
|
|
use_ohem=True,
|
|
train_cfg=train_cfg)
|
|
segm_head = YOLACTSegmHead(
|
|
in_channels=256,
|
|
num_classes=80,
|
|
loss_segm=dict(
|
|
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0))
|
|
mask_head = YOLACTProtonet(
|
|
num_classes=80,
|
|
in_channels=256,
|
|
num_protos=32,
|
|
max_masks_to_train=100,
|
|
loss_mask_weight=6.125)
|
|
feat = [
|
|
torch.rand(1, 256, feat_size, feat_size)
|
|
for feat_size in [69, 35, 18, 9, 5]
|
|
]
|
|
cls_score, bbox_pred, coeff_pred = bbox_head.forward(feat)
|
|
# Test that empty ground truth encourages the network to predict background
|
|
gt_bboxes = [torch.empty((0, 4))]
|
|
gt_labels = [torch.LongTensor([])]
|
|
gt_masks = [torch.empty((0, 550, 550))]
|
|
gt_bboxes_ignore = None
|
|
empty_gt_losses, sampling_results = bbox_head.loss(
|
|
cls_score,
|
|
bbox_pred,
|
|
gt_bboxes,
|
|
gt_labels,
|
|
img_metas,
|
|
gt_bboxes_ignore=gt_bboxes_ignore)
|
|
# When there is no truth, the cls loss should be nonzero but there should
|
|
# be no box loss.
|
|
empty_cls_loss = sum(empty_gt_losses['loss_cls'])
|
|
empty_box_loss = sum(empty_gt_losses['loss_bbox'])
|
|
assert empty_cls_loss.item() > 0, 'cls loss should be non-zero'
|
|
assert empty_box_loss.item() == 0, (
|
|
'there should be no box loss when there are no true boxes')
|
|
|
|
# Test segm head and mask head
|
|
segm_head_outs = segm_head(feat[0])
|
|
empty_segm_loss = segm_head.loss(segm_head_outs, gt_masks, gt_labels)
|
|
mask_pred = mask_head(feat[0], coeff_pred, gt_bboxes, img_metas,
|
|
sampling_results)
|
|
empty_mask_loss = mask_head.loss(mask_pred, gt_masks, gt_bboxes, img_metas,
|
|
sampling_results)
|
|
# When there is no truth, the segm and mask loss should be zero.
|
|
empty_segm_loss = sum(empty_segm_loss['loss_segm'])
|
|
empty_mask_loss = sum(empty_mask_loss['loss_mask'])
|
|
assert empty_segm_loss.item() == 0, (
|
|
'there should be no segm loss when there are no true boxes')
|
|
assert empty_mask_loss == 0, (
|
|
'there should be no mask loss when there are no true boxes')
|
|
|
|
# When truth is non-empty then cls, box, mask, segm loss should be
|
|
# nonzero for random inputs.
|
|
gt_bboxes = [
|
|
torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]),
|
|
]
|
|
gt_labels = [torch.LongTensor([2])]
|
|
gt_masks = [(torch.rand((1, 550, 550)) > 0.5).float()]
|
|
|
|
one_gt_losses, sampling_results = bbox_head.loss(
|
|
cls_score,
|
|
bbox_pred,
|
|
gt_bboxes,
|
|
gt_labels,
|
|
img_metas,
|
|
gt_bboxes_ignore=gt_bboxes_ignore)
|
|
one_gt_cls_loss = sum(one_gt_losses['loss_cls'])
|
|
one_gt_box_loss = sum(one_gt_losses['loss_bbox'])
|
|
assert one_gt_cls_loss.item() > 0, 'cls loss should be non-zero'
|
|
assert one_gt_box_loss.item() > 0, 'box loss should be non-zero'
|
|
|
|
one_gt_segm_loss = segm_head.loss(segm_head_outs, gt_masks, gt_labels)
|
|
mask_pred = mask_head(feat[0], coeff_pred, gt_bboxes, img_metas,
|
|
sampling_results)
|
|
one_gt_mask_loss = mask_head.loss(mask_pred, gt_masks, gt_bboxes,
|
|
img_metas, sampling_results)
|
|
one_gt_segm_loss = sum(one_gt_segm_loss['loss_segm'])
|
|
one_gt_mask_loss = sum(one_gt_mask_loss['loss_mask'])
|
|
assert one_gt_segm_loss.item() > 0, 'segm loss should be non-zero'
|
|
assert one_gt_mask_loss.item() > 0, 'mask loss should be non-zero'
|
|
|
|
|
|
def test_transformer_head_loss():
|
|
"""Tests transformer head loss when truth is empty and non-empty."""
|
|
s = 256
|
|
img_metas = [{
|
|
'img_shape': (s, s, 3),
|
|
'scale_factor': 1,
|
|
'pad_shape': (s, s, 3),
|
|
'batch_input_shape': (s, s)
|
|
}]
|
|
train_cfg = dict(
|
|
assigner=dict(
|
|
type='HungarianAssigner',
|
|
cls_cost=dict(type='ClassificationCost', weight=1.0),
|
|
reg_cost=dict(type='BBoxL1Cost', weight=5.0),
|
|
iou_cost=dict(type='IoUCost', iou_mode='giou', weight=2.0)))
|
|
transformer_cfg = dict(
|
|
type='Transformer',
|
|
embed_dims=4,
|
|
num_heads=1,
|
|
num_encoder_layers=1,
|
|
num_decoder_layers=1,
|
|
feedforward_channels=1,
|
|
dropout=0.1,
|
|
act_cfg=dict(type='ReLU', inplace=True),
|
|
norm_cfg=dict(type='LN'),
|
|
num_fcs=2,
|
|
pre_norm=False,
|
|
return_intermediate_dec=True)
|
|
positional_encoding_cfg = dict(
|
|
type='SinePositionalEncoding', num_feats=2, normalize=True)
|
|
self = TransformerHead(
|
|
num_classes=4,
|
|
in_channels=1,
|
|
num_fcs=2,
|
|
train_cfg=train_cfg,
|
|
transformer=transformer_cfg,
|
|
positional_encoding=positional_encoding_cfg)
|
|
self.init_weights()
|
|
feat = [
|
|
torch.rand(1, 1, s // feat_size, s // feat_size)
|
|
for feat_size in [4, 8, 16, 32, 64]
|
|
]
|
|
cls_scores, bbox_preds = self.forward(feat, img_metas)
|
|
# Test that empty ground truth encourages the network to predict background
|
|
gt_bboxes = [torch.empty((0, 4))]
|
|
gt_labels = [torch.LongTensor([])]
|
|
gt_bboxes_ignore = None
|
|
empty_gt_losses = self.loss(cls_scores, bbox_preds, gt_bboxes, gt_labels,
|
|
img_metas, gt_bboxes_ignore)
|
|
# When there is no truth, the cls loss should be nonzero but there should
|
|
# be no box loss.
|
|
for key, loss in empty_gt_losses.items():
|
|
if 'cls' in key:
|
|
assert loss.item() > 0, 'cls loss should be non-zero'
|
|
elif 'bbox' in key:
|
|
assert loss.item(
|
|
) == 0, 'there should be no box loss when there are no true boxes'
|
|
elif 'iou' in key:
|
|
assert loss.item(
|
|
) == 0, 'there should be no iou loss when there are no true boxes'
|
|
|
|
# When truth is non-empty then both cls and box loss should be nonzero for
|
|
# random inputs
|
|
gt_bboxes = [
|
|
torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]),
|
|
]
|
|
gt_labels = [torch.LongTensor([2])]
|
|
one_gt_losses = self.loss(cls_scores, bbox_preds, gt_bboxes, gt_labels,
|
|
img_metas, gt_bboxes_ignore)
|
|
for loss in one_gt_losses.values():
|
|
assert loss.item(
|
|
) > 0, 'cls loss, or box loss, or iou loss should be non-zero'
|
|
|
|
# test forward_train
|
|
self.forward_train(feat, img_metas, gt_bboxes, gt_labels)
|
|
|
|
# test inference mode
|
|
self.get_bboxes(cls_scores, bbox_preds, img_metas, rescale=True)
|