object_localization_network/tests/test_models/test_heads.py

import mmcv
import numpy as np
import torch

from mmdet.core import bbox2roi, build_assigner, build_sampler
from mmdet.core.evaluation.bbox_overlaps import bbox_overlaps
from mmdet.models.dense_heads import (AnchorHead, CornerHead, FCOSHead,
                                      FSAFHead, GuidedAnchorHead, PAAHead,
                                      SABLRetinaHead, TransformerHead,
                                      VFNetHead, YOLACTHead, YOLACTProtonet,
                                      YOLACTSegmHead, paa_head)
from mmdet.models.dense_heads.paa_head import levels_to_images
from mmdet.models.roi_heads.bbox_heads import BBoxHead, SABLHead
from mmdet.models.roi_heads.mask_heads import FCNMaskHead, MaskIoUHead


def test_paa_head_loss():
    """Tests paa head loss when truth is empty and non-empty."""

    class mock_skm(object):

        def GaussianMixture(self, *args, **kwargs):
            return self

        def fit(self, loss):
            pass

        def predict(self, loss):
            components = np.zeros_like(loss, dtype=np.long)
            return components.reshape(-1)

        def score_samples(self, loss):
            scores = np.random.random(len(loss))
            return scores

    paa_head.skm = mock_skm()

    s = 256
    img_metas = [{
        'img_shape': (s, s, 3),
        'scale_factor': 1,
        'pad_shape': (s, s, 3)
    }]
    train_cfg = mmcv.Config(
        dict(
            assigner=dict(
                type='MaxIoUAssigner',
                pos_iou_thr=0.1,
                neg_iou_thr=0.1,
                min_pos_iou=0,
                ignore_iof_thr=-1),
            allowed_border=-1,
            pos_weight=-1,
            debug=False))
    # since Focal Loss is not supported on CPU
    self = PAAHead(
        num_classes=4,
        in_channels=1,
        train_cfg=train_cfg,
        loss_cls=dict(
            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
        loss_bbox=dict(type='GIoULoss', loss_weight=1.3),
        loss_centerness=dict(
            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.5))
    feat = [
        torch.rand(1, 1, s // feat_size, s // feat_size)
        for feat_size in [4, 8, 16, 32, 64]
    ]
    self.init_weights()
    cls_scores, bbox_preds, iou_preds = self(feat)
    # Test that empty ground truth encourages the network to predict background
    gt_bboxes = [torch.empty((0, 4))]
    gt_labels = [torch.LongTensor([])]
    gt_bboxes_ignore = None
    empty_gt_losses = self.loss(cls_scores, bbox_preds, iou_preds, gt_bboxes,
                                gt_labels, img_metas, gt_bboxes_ignore)
    # When there is no truth, the cls loss should be nonzero but there should
    # be no box loss.
    empty_cls_loss = empty_gt_losses['loss_cls']
    empty_box_loss = empty_gt_losses['loss_bbox']
    empty_iou_loss = empty_gt_losses['loss_iou']
    assert empty_cls_loss.item() > 0, 'cls loss should be non-zero'
    assert empty_box_loss.item() == 0, (
        'there should be no box loss when there are no true boxes')
    assert empty_iou_loss.item() == 0, (
        'there should be no box loss when there are no true boxes')

    # When truth is non-empty then both cls and box loss should be nonzero for
    # random inputs
    gt_bboxes = [
        torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]),
    ]
    gt_labels = [torch.LongTensor([2])]
    one_gt_losses = self.loss(cls_scores, bbox_preds, iou_preds, gt_bboxes,
                              gt_labels, img_metas, gt_bboxes_ignore)
    onegt_cls_loss = one_gt_losses['loss_cls']
    onegt_box_loss = one_gt_losses['loss_bbox']
    onegt_iou_loss = one_gt_losses['loss_iou']
    assert onegt_cls_loss.item() > 0, 'cls loss should be non-zero'
    assert onegt_box_loss.item() > 0, 'box loss should be non-zero'
    assert onegt_iou_loss.item() > 0, 'box loss should be non-zero'
    n, c, h, w = 10, 4, 20, 20
    mlvl_tensor = [torch.ones(n, c, h, w) for i in range(5)]
    results = levels_to_images(mlvl_tensor)
    assert len(results) == n
    assert results[0].size() == (h * w * 5, c)
    assert self.with_score_voting
    cls_scores = [torch.ones(4, 5, 5)]
    bbox_preds = [torch.ones(4, 5, 5)]
    iou_preds = [torch.ones(1, 5, 5)]
    mlvl_anchors = [torch.ones(5 * 5, 4)]
    img_shape = None
    scale_factor = [0.5, 0.5]
    cfg = mmcv.Config(
        dict(
            nms_pre=1000,
            min_bbox_size=0,
            score_thr=0.05,
            nms=dict(type='nms', iou_threshold=0.6),
            max_per_img=100))
    rescale = False
    self._get_bboxes_single(
        cls_scores,
        bbox_preds,
        iou_preds,
        mlvl_anchors,
        img_shape,
        scale_factor,
        cfg,
        rescale=rescale)


def test_fcos_head_loss():
    """Tests fcos head loss when truth is empty and non-empty."""
    s = 256
    img_metas = [{
        'img_shape': (s, s, 3),
        'scale_factor': 1,
        'pad_shape': (s, s, 3)
    }]
    train_cfg = mmcv.Config(
        dict(
            assigner=dict(
                type='MaxIoUAssigner',
                pos_iou_thr=0.5,
                neg_iou_thr=0.4,
                min_pos_iou=0,
                ignore_iof_thr=-1),
            allowed_border=-1,
            pos_weight=-1,
            debug=False))
    # since Focal Loss is not supported on CPU
    self = FCOSHead(
        num_classes=4,
        in_channels=1,
        train_cfg=train_cfg,
        loss_cls=dict(
            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0))
    feat = [
        torch.rand(1, 1, s // feat_size, s // feat_size)
        for feat_size in [4, 8, 16, 32, 64]
    ]
    cls_scores, bbox_preds, centerness = self.forward(feat)
    # Test that empty ground truth encourages the network to predict background
    gt_bboxes = [torch.empty((0, 4))]
    gt_labels = [torch.LongTensor([])]
    gt_bboxes_ignore = None
    empty_gt_losses = self.loss(cls_scores, bbox_preds, centerness, gt_bboxes,
                                gt_labels, img_metas, gt_bboxes_ignore)
    # When there is no truth, the cls loss should be nonzero but there should
    # be no box loss.
    empty_cls_loss = empty_gt_losses['loss_cls']
    empty_box_loss = empty_gt_losses['loss_bbox']
    assert empty_cls_loss.item() > 0, 'cls loss should be non-zero'
    assert empty_box_loss.item() == 0, (
        'there should be no box loss when there are no true boxes')

    # When truth is non-empty then both cls and box loss should be nonzero for
    # random inputs
    gt_bboxes = [
        torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]),
    ]
    gt_labels = [torch.LongTensor([2])]
    one_gt_losses = self.loss(cls_scores, bbox_preds, centerness, gt_bboxes,
                              gt_labels, img_metas, gt_bboxes_ignore)
    onegt_cls_loss = one_gt_losses['loss_cls']
    onegt_box_loss = one_gt_losses['loss_bbox']
    assert onegt_cls_loss.item() > 0, 'cls loss should be non-zero'
    assert onegt_box_loss.item() > 0, 'box loss should be non-zero'


def test_vfnet_head_loss():
    """Tests vfnet head loss when truth is empty and non-empty."""
    s = 256
    img_metas = [{
        'img_shape': (s, s, 3),
        'scale_factor': 1,
        'pad_shape': (s, s, 3)
    }]
    train_cfg = mmcv.Config(
        dict(
            assigner=dict(type='ATSSAssigner', topk=9),
            allowed_border=-1,
            pos_weight=-1,
            debug=False))
    # since Focal Loss is not supported on CPU
    self = VFNetHead(
        num_classes=4,
        in_channels=1,
        train_cfg=train_cfg,
        loss_cls=dict(type='VarifocalLoss', use_sigmoid=True, loss_weight=1.0))
    if torch.cuda.is_available():
        self.cuda()
        feat = [
            torch.rand(1, 1, s // feat_size, s // feat_size).cuda()
            for feat_size in [4, 8, 16, 32, 64]
        ]
        cls_scores, bbox_preds, bbox_preds_refine = self.forward(feat)
        # Test that empty ground truth encourages the network to predict
        # background
        gt_bboxes = [torch.empty((0, 4)).cuda()]
        gt_labels = [torch.LongTensor([]).cuda()]
        gt_bboxes_ignore = None
        empty_gt_losses = self.loss(cls_scores, bbox_preds, bbox_preds_refine,
                                    gt_bboxes, gt_labels, img_metas,
                                    gt_bboxes_ignore)
        # When there is no truth, the cls loss should be nonzero but there
        # should be no box loss.
        empty_cls_loss = empty_gt_losses['loss_cls']
        empty_box_loss = empty_gt_losses['loss_bbox']
        assert empty_cls_loss.item() > 0, 'cls loss should be non-zero'
        assert empty_box_loss.item() == 0, (
            'there should be no box loss when there are no true boxes')

        # When truth is non-empty then both cls and box loss should be nonzero
        # for random inputs
        gt_bboxes = [
            torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]).cuda(),
        ]
        gt_labels = [torch.LongTensor([2]).cuda()]
        one_gt_losses = self.loss(cls_scores, bbox_preds, bbox_preds_refine,
                                  gt_bboxes, gt_labels, img_metas,
                                  gt_bboxes_ignore)
        onegt_cls_loss = one_gt_losses['loss_cls']
        onegt_box_loss = one_gt_losses['loss_bbox']
        assert onegt_cls_loss.item() > 0, 'cls loss should be non-zero'
        assert onegt_box_loss.item() > 0, 'box loss should be non-zero'


def test_anchor_head_loss():
    """Tests anchor head loss when truth is empty and non-empty."""
    s = 256
    img_metas = [{
        'img_shape': (s, s, 3),
        'scale_factor': 1,
        'pad_shape': (s, s, 3)
    }]

    cfg = mmcv.Config(
        dict(
            assigner=dict(
                type='MaxIoUAssigner',
                pos_iou_thr=0.7,
                neg_iou_thr=0.3,
                min_pos_iou=0.3,
                match_low_quality=True,
                ignore_iof_thr=-1),
            sampler=dict(
                type='RandomSampler',
                num=256,
                pos_fraction=0.5,
                neg_pos_ub=-1,
                add_gt_as_proposals=False),
            allowed_border=0,
            pos_weight=-1,
            debug=False))
    self = AnchorHead(num_classes=4, in_channels=1, train_cfg=cfg)

    # Anchor head expects a multiple levels of features per image
    feat = [
        torch.rand(1, 1, s // (2**(i + 2)), s // (2**(i + 2)))
        for i in range(len(self.anchor_generator.strides))
    ]
    cls_scores, bbox_preds = self.forward(feat)

    # Test that empty ground truth encourages the network to predict background
    gt_bboxes = [torch.empty((0, 4))]
    gt_labels = [torch.LongTensor([])]

    gt_bboxes_ignore = None
    empty_gt_losses = self.loss(cls_scores, bbox_preds, gt_bboxes, gt_labels,
                                img_metas, gt_bboxes_ignore)
    # When there is no truth, the cls loss should be nonzero but there should
    # be no box loss.
    empty_cls_loss = sum(empty_gt_losses['loss_cls'])
    empty_box_loss = sum(empty_gt_losses['loss_bbox'])
    assert empty_cls_loss.item() > 0, 'cls loss should be non-zero'
    assert empty_box_loss.item() == 0, (
        'there should be no box loss when there are no true boxes')

    # When truth is non-empty then both cls and box loss should be nonzero for
    # random inputs
    gt_bboxes = [
        torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]),
    ]
    gt_labels = [torch.LongTensor([2])]
    one_gt_losses = self.loss(cls_scores, bbox_preds, gt_bboxes, gt_labels,
                              img_metas, gt_bboxes_ignore)
    onegt_cls_loss = sum(one_gt_losses['loss_cls'])
    onegt_box_loss = sum(one_gt_losses['loss_bbox'])
    assert onegt_cls_loss.item() > 0, 'cls loss should be non-zero'
    assert onegt_box_loss.item() > 0, 'box loss should be non-zero'


def test_fsaf_head_loss():
    """Tests anchor head loss when truth is empty and non-empty."""
    s = 256
    img_metas = [{
        'img_shape': (s, s, 3),
        'scale_factor': 1,
        'pad_shape': (s, s, 3)
    }]

    cfg = dict(
        reg_decoded_bbox=True,
        anchor_generator=dict(
            type='AnchorGenerator',
            octave_base_scale=1,
            scales_per_octave=1,
            ratios=[1.0],
            strides=[8, 16, 32, 64, 128]),
        bbox_coder=dict(type='TBLRBBoxCoder', normalizer=4.0),
        loss_cls=dict(
            type='FocalLoss',
            use_sigmoid=True,
            gamma=2.0,
            alpha=0.25,
            loss_weight=1.0,
            reduction='none'),
        loss_bbox=dict(
            type='IoULoss', eps=1e-6, loss_weight=1.0, reduction='none'))

    train_cfg = mmcv.Config(
        dict(
            assigner=dict(
                type='CenterRegionAssigner',
                pos_scale=0.2,
                neg_scale=0.2,
                min_pos_iof=0.01),
            allowed_border=-1,
            pos_weight=-1,
            debug=False))
    head = FSAFHead(num_classes=4, in_channels=1, train_cfg=train_cfg, **cfg)
    if torch.cuda.is_available():
        head.cuda()
        # FSAF head expects a multiple levels of features per image
        feat = [
            torch.rand(1, 1, s // (2**(i + 2)), s // (2**(i + 2))).cuda()
            for i in range(len(head.anchor_generator.strides))
        ]
        cls_scores, bbox_preds = head.forward(feat)
        gt_bboxes_ignore = None

        # When truth is non-empty then both cls and box loss should be nonzero
        #  for random inputs
        gt_bboxes = [
            torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]).cuda(),
        ]
        gt_labels = [torch.LongTensor([2]).cuda()]
        one_gt_losses = head.loss(cls_scores, bbox_preds, gt_bboxes, gt_labels,
                                  img_metas, gt_bboxes_ignore)
        onegt_cls_loss = sum(one_gt_losses['loss_cls'])
        onegt_box_loss = sum(one_gt_losses['loss_bbox'])
        assert onegt_cls_loss.item() > 0, 'cls loss should be non-zero'
        assert onegt_box_loss.item() > 0, 'box loss should be non-zero'

        # Test that empty ground truth encourages the network to predict bkg
        gt_bboxes = [torch.empty((0, 4)).cuda()]
        gt_labels = [torch.LongTensor([]).cuda()]

        empty_gt_losses = head.loss(cls_scores, bbox_preds, gt_bboxes,
                                    gt_labels, img_metas, gt_bboxes_ignore)
        # When there is no truth, the cls loss should be nonzero but there
        # should be no box loss.
        empty_cls_loss = sum(empty_gt_losses['loss_cls'])
        empty_box_loss = sum(empty_gt_losses['loss_bbox'])
        assert empty_cls_loss.item() > 0, 'cls loss should be non-zero'
        assert empty_box_loss.item() == 0, (
            'there should be no box loss when there are no true boxes')


def test_ga_anchor_head_loss():
    """Tests anchor head loss when truth is empty and non-empty."""
    s = 256
    img_metas = [{
        'img_shape': (s, s, 3),
        'scale_factor': 1,
        'pad_shape': (s, s, 3)
    }]

    cfg = mmcv.Config(
        dict(
            assigner=dict(
                type='MaxIoUAssigner',
                pos_iou_thr=0.7,
                neg_iou_thr=0.3,
                min_pos_iou=0.3,
                match_low_quality=True,
                ignore_iof_thr=-1),
            sampler=dict(
                type='RandomSampler',
                num=256,
                pos_fraction=0.5,
                neg_pos_ub=-1,
                add_gt_as_proposals=False),
            ga_assigner=dict(
                type='ApproxMaxIoUAssigner',
                pos_iou_thr=0.7,
                neg_iou_thr=0.3,
                min_pos_iou=0.3,
                ignore_iof_thr=-1),
            ga_sampler=dict(
                type='RandomSampler',
                num=256,
                pos_fraction=0.5,
                neg_pos_ub=-1,
                add_gt_as_proposals=False),
            allowed_border=-1,
            center_ratio=0.2,
            ignore_ratio=0.5,
            pos_weight=-1,
            debug=False))
    head = GuidedAnchorHead(num_classes=4, in_channels=4, train_cfg=cfg)

    # Anchor head expects a multiple levels of features per image
    if torch.cuda.is_available():
        head.cuda()
        feat = [
            torch.rand(1, 4, s // (2**(i + 2)), s // (2**(i + 2))).cuda()
            for i in range(len(head.approx_anchor_generator.base_anchors))
        ]
        cls_scores, bbox_preds, shape_preds, loc_preds = head.forward(feat)

        # Test that empty ground truth encourages the network to predict
        # background
        gt_bboxes = [torch.empty((0, 4)).cuda()]
        gt_labels = [torch.LongTensor([]).cuda()]

        gt_bboxes_ignore = None

        empty_gt_losses = head.loss(cls_scores, bbox_preds, shape_preds,
                                    loc_preds, gt_bboxes, gt_labels, img_metas,
                                    gt_bboxes_ignore)

        # When there is no truth, the cls loss should be nonzero but there
        # should be no box loss.
        empty_cls_loss = sum(empty_gt_losses['loss_cls'])
        empty_box_loss = sum(empty_gt_losses['loss_bbox'])
        assert empty_cls_loss.item() > 0, 'cls loss should be non-zero'
        assert empty_box_loss.item() == 0, (
            'there should be no box loss when there are no true boxes')

        # When truth is non-empty then both cls and box loss should be nonzero
        # for random inputs
        gt_bboxes = [
            torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]).cuda(),
        ]
        gt_labels = [torch.LongTensor([2]).cuda()]
        one_gt_losses = head.loss(cls_scores, bbox_preds, shape_preds,
                                  loc_preds, gt_bboxes, gt_labels, img_metas,
                                  gt_bboxes_ignore)
        onegt_cls_loss = sum(one_gt_losses['loss_cls'])
        onegt_box_loss = sum(one_gt_losses['loss_bbox'])
        assert onegt_cls_loss.item() > 0, 'cls loss should be non-zero'
        assert onegt_box_loss.item() > 0, 'box loss should be non-zero'


def test_bbox_head_loss():
    """Tests bbox head loss when truth is empty and non-empty."""
    self = BBoxHead(in_channels=8, roi_feat_size=3)

    # Dummy proposals
    proposal_list = [
        torch.Tensor([[23.6667, 23.8757, 228.6326, 153.8874]]),
    ]

    target_cfg = mmcv.Config(dict(pos_weight=1))

    # Test bbox loss when truth is empty
    gt_bboxes = [torch.empty((0, 4))]
    gt_labels = [torch.LongTensor([])]

    sampling_results = _dummy_bbox_sampling(proposal_list, gt_bboxes,
                                            gt_labels)

    bbox_targets = self.get_targets(sampling_results, gt_bboxes, gt_labels,
                                    target_cfg)
    labels, label_weights, bbox_targets, bbox_weights = bbox_targets

    # Create dummy features "extracted" for each sampled bbox
    num_sampled = sum(len(res.bboxes) for res in sampling_results)
    rois = bbox2roi([res.bboxes for res in sampling_results])
    dummy_feats = torch.rand(num_sampled, 8 * 3 * 3)
    cls_scores, bbox_preds = self.forward(dummy_feats)

    losses = self.loss(cls_scores, bbox_preds, rois, labels, label_weights,
                       bbox_targets, bbox_weights)
    assert losses.get('loss_cls', 0) > 0, 'cls-loss should be non-zero'
    assert losses.get('loss_bbox', 0) == 0, 'empty gt loss should be zero'

    # Test bbox loss when truth is non-empty
    gt_bboxes = [
        torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]),
    ]
    gt_labels = [torch.LongTensor([2])]

    sampling_results = _dummy_bbox_sampling(proposal_list, gt_bboxes,
                                            gt_labels)
    rois = bbox2roi([res.bboxes for res in sampling_results])

    bbox_targets = self.get_targets(sampling_results, gt_bboxes, gt_labels,
                                    target_cfg)
    labels, label_weights, bbox_targets, bbox_weights = bbox_targets

    # Create dummy features "extracted" for each sampled bbox
    num_sampled = sum(len(res.bboxes) for res in sampling_results)
    dummy_feats = torch.rand(num_sampled, 8 * 3 * 3)
    cls_scores, bbox_preds = self.forward(dummy_feats)

    losses = self.loss(cls_scores, bbox_preds, rois, labels, label_weights,
                       bbox_targets, bbox_weights)
    assert losses.get('loss_cls', 0) > 0, 'cls-loss should be non-zero'
    assert losses.get('loss_bbox', 0) > 0, 'box-loss should be non-zero'


def test_sabl_bbox_head_loss():
    """Tests bbox head loss when truth is empty and non-empty."""
    self = SABLHead(
        num_classes=4,
        cls_in_channels=3,
        reg_in_channels=3,
        cls_out_channels=3,
        reg_offset_out_channels=3,
        reg_cls_out_channels=3,
        roi_feat_size=7)

    # Dummy proposals
    proposal_list = [
        torch.Tensor([[23.6667, 23.8757, 228.6326, 153.8874]]),
    ]

    target_cfg = mmcv.Config(dict(pos_weight=1))

    # Test bbox loss when truth is empty
    gt_bboxes = [torch.empty((0, 4))]
    gt_labels = [torch.LongTensor([])]

    sampling_results = _dummy_bbox_sampling(proposal_list, gt_bboxes,
                                            gt_labels)

    bbox_targets = self.get_targets(sampling_results, gt_bboxes, gt_labels,
                                    target_cfg)
    labels, label_weights, bbox_targets, bbox_weights = bbox_targets

    # Create dummy features "extracted" for each sampled bbox
    num_sampled = sum(len(res.bboxes) for res in sampling_results)
    rois = bbox2roi([res.bboxes for res in sampling_results])
    dummy_feats = torch.rand(num_sampled, 3, 7, 7)
    cls_scores, bbox_preds = self.forward(dummy_feats)

    losses = self.loss(cls_scores, bbox_preds, rois, labels, label_weights,
                       bbox_targets, bbox_weights)
    assert losses.get('loss_cls', 0) > 0, 'cls-loss should be non-zero'
    assert losses.get('loss_bbox_cls',
                      0) == 0, 'empty gt bbox-cls-loss should be zero'
    assert losses.get('loss_bbox_reg',
                      0) == 0, 'empty gt bbox-reg-loss should be zero'

    # Test bbox loss when truth is non-empty
    gt_bboxes = [
        torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]),
    ]
    gt_labels = [torch.LongTensor([2])]

    sampling_results = _dummy_bbox_sampling(proposal_list, gt_bboxes,
                                            gt_labels)
    rois = bbox2roi([res.bboxes for res in sampling_results])

    bbox_targets = self.get_targets(sampling_results, gt_bboxes, gt_labels,
                                    target_cfg)
    labels, label_weights, bbox_targets, bbox_weights = bbox_targets

    # Create dummy features "extracted" for each sampled bbox
    num_sampled = sum(len(res.bboxes) for res in sampling_results)
    dummy_feats = torch.rand(num_sampled, 3, 7, 7)
    cls_scores, bbox_preds = self.forward(dummy_feats)

    losses = self.loss(cls_scores, bbox_preds, rois, labels, label_weights,
                       bbox_targets, bbox_weights)
    assert losses.get('loss_bbox_cls',
                      0) > 0, 'empty gt bbox-cls-loss should be zero'
    assert losses.get('loss_bbox_reg',
                      0) > 0, 'empty gt bbox-reg-loss should be zero'


def test_sabl_retina_head_loss():
    """Tests anchor head loss when truth is empty and non-empty."""
    s = 256
    img_metas = [{
        'img_shape': (s, s, 3),
        'scale_factor': 1,
        'pad_shape': (s, s, 3)
    }]

    cfg = mmcv.Config(
        dict(
            assigner=dict(
                type='ApproxMaxIoUAssigner',
                pos_iou_thr=0.5,
                neg_iou_thr=0.4,
                min_pos_iou=0.0,
                ignore_iof_thr=-1),
            allowed_border=-1,
            pos_weight=-1,
            debug=False))
    head = SABLRetinaHead(
        num_classes=4,
        in_channels=3,
        feat_channels=10,
        loss_cls=dict(
            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
        train_cfg=cfg)
    if torch.cuda.is_available():
        head.cuda()
        # Anchor head expects a multiple levels of features per image
        feat = [
            torch.rand(1, 3, s // (2**(i + 2)), s // (2**(i + 2))).cuda()
            for i in range(len(head.approx_anchor_generator.base_anchors))
        ]
        cls_scores, bbox_preds = head.forward(feat)

        # Test that empty ground truth encourages the network
        # to predict background
        gt_bboxes = [torch.empty((0, 4)).cuda()]
        gt_labels = [torch.LongTensor([]).cuda()]

        gt_bboxes_ignore = None
        empty_gt_losses = head.loss(cls_scores, bbox_preds, gt_bboxes,
                                    gt_labels, img_metas, gt_bboxes_ignore)
        # When there is no truth, the cls loss should be nonzero but there
        # should be no box loss.
        empty_cls_loss = sum(empty_gt_losses['loss_cls'])
        empty_box_cls_loss = sum(empty_gt_losses['loss_bbox_cls'])
        empty_box_reg_loss = sum(empty_gt_losses['loss_bbox_reg'])
        assert empty_cls_loss.item() > 0, 'cls loss should be non-zero'
        assert empty_box_cls_loss.item() == 0, (
            'there should be no box cls loss when there are no true boxes')
        assert empty_box_reg_loss.item() == 0, (
            'there should be no box reg loss when there are no true boxes')

        # When truth is non-empty then both cls and box loss should
        # be nonzero for random inputs
        gt_bboxes = [
            torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]).cuda(),
        ]
        gt_labels = [torch.LongTensor([2]).cuda()]
        one_gt_losses = head.loss(cls_scores, bbox_preds, gt_bboxes, gt_labels,
                                  img_metas, gt_bboxes_ignore)
        onegt_cls_loss = sum(one_gt_losses['loss_cls'])
        onegt_box_cls_loss = sum(one_gt_losses['loss_bbox_cls'])
        onegt_box_reg_loss = sum(one_gt_losses['loss_bbox_reg'])
        assert onegt_cls_loss.item() > 0, 'cls loss should be non-zero'
        assert onegt_box_cls_loss.item() > 0, 'box loss cls should be non-zero'
        assert onegt_box_reg_loss.item() > 0, 'box loss reg should be non-zero'


def test_refine_boxes():
    """Mirrors the doctest in
    ``mmdet.models.bbox_heads.bbox_head.BBoxHead.refine_boxes`` but checks for
    multiple values of n_roi / n_img."""
    self = BBoxHead(reg_class_agnostic=True)

    test_settings = [

        # Corner case: less rois than images
        {
            'n_roi': 2,
            'n_img': 4,
            'rng': 34285940
        },

        # Corner case: no images
        {
            'n_roi': 0,
            'n_img': 0,
            'rng': 52925222
        },

        # Corner cases: few images / rois
        {
            'n_roi': 1,
            'n_img': 1,
            'rng': 1200281
        },
        {
            'n_roi': 2,
            'n_img': 1,
            'rng': 1200282
        },
        {
            'n_roi': 2,
            'n_img': 2,
            'rng': 1200283
        },
        {
            'n_roi': 1,
            'n_img': 2,
            'rng': 1200284
        },

        # Corner case: no rois few images
        {
            'n_roi': 0,
            'n_img': 1,
            'rng': 23955860
        },
        {
            'n_roi': 0,
            'n_img': 2,
            'rng': 25830516
        },

        # Corner case: no rois many images
        {
            'n_roi': 0,
            'n_img': 10,
            'rng': 671346
        },
        {
            'n_roi': 0,
            'n_img': 20,
            'rng': 699807
        },

        # Corner case: cal_similarity num rois and images
        {
            'n_roi': 20,
            'n_img': 20,
            'rng': 1200238
        },
        {
            'n_roi': 10,
            'n_img': 20,
            'rng': 1200238
        },
        {
            'n_roi': 5,
            'n_img': 5,
            'rng': 1200238
        },

        # ----------------------------------
        # Common case: more rois than images
        {
            'n_roi': 100,
            'n_img': 1,
            'rng': 337156
        },
        {
            'n_roi': 150,
            'n_img': 2,
            'rng': 275898
        },
        {
            'n_roi': 500,
            'n_img': 5,
            'rng': 4903221
        },
    ]

    for demokw in test_settings:
        try:
            n_roi = demokw['n_roi']
            n_img = demokw['n_img']
            rng = demokw['rng']

            print(f'Test refine_boxes case: {demokw!r}')
            tup = _demodata_refine_boxes(n_roi, n_img, rng=rng)
            rois, labels, bbox_preds, pos_is_gts, img_metas = tup
            bboxes_list = self.refine_bboxes(rois, labels, bbox_preds,
                                             pos_is_gts, img_metas)
            assert len(bboxes_list) == n_img
            assert sum(map(len, bboxes_list)) <= n_roi
            assert all(b.shape[1] == 4 for b in bboxes_list)
        except Exception:
            print(f'Test failed with demokw={demokw!r}')
            raise


def _demodata_refine_boxes(n_roi, n_img, rng=0):
    """Create random test data for the
    ``mmdet.models.bbox_heads.bbox_head.BBoxHead.refine_boxes`` method."""
    import numpy as np
    from mmdet.core.bbox.demodata import random_boxes
    from mmdet.core.bbox.demodata import ensure_rng
    try:
        import kwarray
    except ImportError:
        import pytest
        pytest.skip('kwarray is required for this test')
    scale = 512
    rng = ensure_rng(rng)
    img_metas = [{'img_shape': (scale, scale)} for _ in range(n_img)]
    # Create rois in the expected format
    roi_boxes = random_boxes(n_roi, scale=scale, rng=rng)
    if n_img == 0:
        assert n_roi == 0, 'cannot have any rois if there are no images'
        img_ids = torch.empty((0, ), dtype=torch.long)
        roi_boxes = torch.empty((0, 4), dtype=torch.float32)
    else:
        img_ids = rng.randint(0, n_img, (n_roi, ))
        img_ids = torch.from_numpy(img_ids)
    rois = torch.cat([img_ids[:, None].float(), roi_boxes], dim=1)
    # Create other args
    labels = rng.randint(0, 2, (n_roi, ))
    labels = torch.from_numpy(labels).long()
    bbox_preds = random_boxes(n_roi, scale=scale, rng=rng)
    # For each image, pretend random positive boxes are gts
    is_label_pos = (labels.numpy() > 0).astype(np.int)
    lbl_per_img = kwarray.group_items(is_label_pos, img_ids.numpy())
    pos_per_img = [sum(lbl_per_img.get(gid, [])) for gid in range(n_img)]
    # randomly generate with numpy then sort with torch
    _pos_is_gts = [
        rng.randint(0, 2, (npos, )).astype(np.uint8) for npos in pos_per_img
    ]
    pos_is_gts = [
        torch.from_numpy(p).sort(descending=True)[0] for p in _pos_is_gts
    ]
    return rois, labels, bbox_preds, pos_is_gts, img_metas


def test_mask_head_loss():
    """Test mask head loss when mask target is empty."""
    self = FCNMaskHead(
        num_convs=1,
        roi_feat_size=6,
        in_channels=8,
        conv_out_channels=8,
        num_classes=8)

    # Dummy proposals
    proposal_list = [
        torch.Tensor([[23.6667, 23.8757, 228.6326, 153.8874]]),
    ]

    gt_bboxes = [
        torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]),
    ]
    gt_labels = [torch.LongTensor([2])]
    sampling_results = _dummy_bbox_sampling(proposal_list, gt_bboxes,
                                            gt_labels)

    # create dummy mask
    import numpy as np
    from mmdet.core import BitmapMasks
    dummy_mask = np.random.randint(0, 2, (1, 160, 240), dtype=np.uint8)
    gt_masks = [BitmapMasks(dummy_mask, 160, 240)]

    # create dummy train_cfg
    train_cfg = mmcv.Config(dict(mask_size=12, mask_thr_binary=0.5))

    # Create dummy features "extracted" for each sampled bbox
    num_sampled = sum(len(res.bboxes) for res in sampling_results)
    dummy_feats = torch.rand(num_sampled, 8, 6, 6)

    mask_pred = self.forward(dummy_feats)
    mask_targets = self.get_targets(sampling_results, gt_masks, train_cfg)
    pos_labels = torch.cat([res.pos_gt_labels for res in sampling_results])
    loss_mask = self.loss(mask_pred, mask_targets, pos_labels)

    onegt_mask_loss = sum(loss_mask['loss_mask'])
    assert onegt_mask_loss.item() > 0, 'mask loss should be non-zero'

    # test mask_iou_head
    mask_iou_head = MaskIoUHead(
        num_convs=1,
        num_fcs=1,
        roi_feat_size=6,
        in_channels=8,
        conv_out_channels=8,
        fc_out_channels=8,
        num_classes=8)

    pos_mask_pred = mask_pred[range(mask_pred.size(0)), pos_labels]
    mask_iou_pred = mask_iou_head(dummy_feats, pos_mask_pred)
    pos_mask_iou_pred = mask_iou_pred[range(mask_iou_pred.size(0)), pos_labels]

    mask_iou_targets = mask_iou_head.get_targets(sampling_results, gt_masks,
                                                 pos_mask_pred, mask_targets,
                                                 train_cfg)
    loss_mask_iou = mask_iou_head.loss(pos_mask_iou_pred, mask_iou_targets)
    onegt_mask_iou_loss = loss_mask_iou['loss_mask_iou'].sum()
    assert onegt_mask_iou_loss.item() >= 0


def _dummy_bbox_sampling(proposal_list, gt_bboxes, gt_labels):
    """Create sample results that can be passed to BBoxHead.get_targets."""
    num_imgs = 1
    feat = torch.rand(1, 1, 3, 3)
    assign_config = dict(
        type='MaxIoUAssigner',
        pos_iou_thr=0.5,
        neg_iou_thr=0.5,
        min_pos_iou=0.5,
        ignore_iof_thr=-1)
    sampler_config = dict(
        type='RandomSampler',
        num=512,
        pos_fraction=0.25,
        neg_pos_ub=-1,
        add_gt_as_proposals=True)
    bbox_assigner = build_assigner(assign_config)
    bbox_sampler = build_sampler(sampler_config)
    gt_bboxes_ignore = [None for _ in range(num_imgs)]
    sampling_results = []
    for i in range(num_imgs):
        assign_result = bbox_assigner.assign(proposal_list[i], gt_bboxes[i],
                                             gt_bboxes_ignore[i], gt_labels[i])
        sampling_result = bbox_sampler.sample(
            assign_result,
            proposal_list[i],
            gt_bboxes[i],
            gt_labels[i],
            feats=feat)
        sampling_results.append(sampling_result)

    return sampling_results


def test_corner_head_loss():
    """Tests corner head loss when truth is empty and non-empty."""
    s = 256
    img_metas = [{
        'img_shape': (s, s, 3),
        'scale_factor': 1,
        'pad_shape': (s, s, 3)
    }]

    self = CornerHead(num_classes=4, in_channels=1)

    # Corner head expects a multiple levels of features per image
    feat = [
        torch.rand(1, 1, s // 4, s // 4) for _ in range(self.num_feat_levels)
    ]
    tl_heats, br_heats, tl_embs, br_embs, tl_offs, br_offs = self.forward(feat)

    # Test that empty ground truth encourages the network to predict background
    gt_bboxes = [torch.empty((0, 4))]
    gt_labels = [torch.LongTensor([])]

    gt_bboxes_ignore = None
    empty_gt_losses = self.loss(tl_heats, br_heats, tl_embs, br_embs, tl_offs,
                                br_offs, gt_bboxes, gt_labels, img_metas,
                                gt_bboxes_ignore)
    empty_det_loss = sum(empty_gt_losses['det_loss'])
    empty_push_loss = sum(empty_gt_losses['push_loss'])
    empty_pull_loss = sum(empty_gt_losses['pull_loss'])
    empty_off_loss = sum(empty_gt_losses['off_loss'])
    assert empty_det_loss.item() > 0, 'det loss should be non-zero'
    assert empty_push_loss.item() == 0, (
        'there should be no push loss when there are no true boxes')
    assert empty_pull_loss.item() == 0, (
        'there should be no pull loss when there are no true boxes')
    assert empty_off_loss.item() == 0, (
        'there should be no box loss when there are no true boxes')

    # When truth is non-empty then both cls and box loss should be nonzero for
    # random inputs
    gt_bboxes = [
        torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]),
    ]
    gt_labels = [torch.LongTensor([2])]
    one_gt_losses = self.loss(tl_heats, br_heats, tl_embs, br_embs, tl_offs,
                              br_offs, gt_bboxes, gt_labels, img_metas,
                              gt_bboxes_ignore)
    onegt_det_loss = sum(one_gt_losses['det_loss'])
    onegt_push_loss = sum(one_gt_losses['push_loss'])
    onegt_pull_loss = sum(one_gt_losses['pull_loss'])
    onegt_off_loss = sum(one_gt_losses['off_loss'])
    assert onegt_det_loss.item() > 0, 'det loss should be non-zero'
    assert onegt_push_loss.item() == 0, (
        'there should be no push loss when there are only one true box')
    assert onegt_pull_loss.item() > 0, 'pull loss should be non-zero'
    assert onegt_off_loss.item() > 0, 'off loss should be non-zero'

    gt_bboxes = [
        torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874],
                      [123.6667, 123.8757, 138.6326, 251.8874]]),
    ]
    gt_labels = [torch.LongTensor([2, 3])]

    # equalize the corners' embedding value of different objects to make the
    # push_loss larger than 0
    gt_bboxes_ind = (gt_bboxes[0] // 4).int().tolist()
    for tl_emb_feat, br_emb_feat in zip(tl_embs, br_embs):
        tl_emb_feat[:, :, gt_bboxes_ind[0][1],
                    gt_bboxes_ind[0][0]] = tl_emb_feat[:, :,
                                                       gt_bboxes_ind[1][1],
                                                       gt_bboxes_ind[1][0]]
        br_emb_feat[:, :, gt_bboxes_ind[0][3],
                    gt_bboxes_ind[0][2]] = br_emb_feat[:, :,
                                                       gt_bboxes_ind[1][3],
                                                       gt_bboxes_ind[1][2]]

    two_gt_losses = self.loss(tl_heats, br_heats, tl_embs, br_embs, tl_offs,
                              br_offs, gt_bboxes, gt_labels, img_metas,
                              gt_bboxes_ignore)
    twogt_det_loss = sum(two_gt_losses['det_loss'])
    twogt_push_loss = sum(two_gt_losses['push_loss'])
    twogt_pull_loss = sum(two_gt_losses['pull_loss'])
    twogt_off_loss = sum(two_gt_losses['off_loss'])
    assert twogt_det_loss.item() > 0, 'det loss should be non-zero'
    assert twogt_push_loss.item() > 0, 'push loss should be non-zero'
    assert twogt_pull_loss.item() > 0, 'pull loss should be non-zero'
    assert twogt_off_loss.item() > 0, 'off loss should be non-zero'


def test_corner_head_encode_and_decode_heatmap():
    """Tests corner head generating and decoding the heatmap."""
    s = 256
    img_metas = [{
        'img_shape': (s, s, 3),
        'scale_factor': 1,
        'pad_shape': (s, s, 3),
        'border': (0, 0, 0, 0)
    }]

    gt_bboxes = [
        torch.Tensor([[10, 20, 200, 240], [40, 50, 100, 200],
                      [10, 20, 200, 240]])
    ]
    gt_labels = [torch.LongTensor([1, 1, 2])]

    self = CornerHead(num_classes=4, in_channels=1, corner_emb_channels=1)

    feat = [
        torch.rand(1, 1, s // 4, s // 4) for _ in range(self.num_feat_levels)
    ]

    targets = self.get_targets(
        gt_bboxes,
        gt_labels,
        feat[0].shape,
        img_metas[0]['pad_shape'],
        with_corner_emb=self.with_corner_emb)

    gt_tl_heatmap = targets['topleft_heatmap']
    gt_br_heatmap = targets['bottomright_heatmap']
    gt_tl_offset = targets['topleft_offset']
    gt_br_offset = targets['bottomright_offset']
    embedding = targets['corner_embedding']
    [top, left], [bottom, right] = embedding[0][0]
    gt_tl_embedding_heatmap = torch.zeros([1, 1, s // 4, s // 4])
    gt_br_embedding_heatmap = torch.zeros([1, 1, s // 4, s // 4])
    gt_tl_embedding_heatmap[0, 0, top, left] = 1
    gt_br_embedding_heatmap[0, 0, bottom, right] = 1

    batch_bboxes, batch_scores, batch_clses = self.decode_heatmap(
        tl_heat=gt_tl_heatmap,
        br_heat=gt_br_heatmap,
        tl_off=gt_tl_offset,
        br_off=gt_br_offset,
        tl_emb=gt_tl_embedding_heatmap,
        br_emb=gt_br_embedding_heatmap,
        img_meta=img_metas[0],
        k=100,
        kernel=3,
        distance_threshold=0.5)

    bboxes = batch_bboxes.view(-1, 4)
    scores = batch_scores.view(-1, 1)
    clses = batch_clses.view(-1, 1)

    idx = scores.argsort(dim=0, descending=True)
    bboxes = bboxes[idx].view(-1, 4)
    scores = scores[idx].view(-1)
    clses = clses[idx].view(-1)

    valid_bboxes = bboxes[torch.where(scores > 0.05)]
    valid_labels = clses[torch.where(scores > 0.05)]
    max_coordinate = valid_bboxes.max()
    offsets = valid_labels.to(valid_bboxes) * (max_coordinate + 1)
    gt_offsets = gt_labels[0].to(gt_bboxes[0]) * (max_coordinate + 1)

    offset_bboxes = valid_bboxes + offsets[:, None]
    offset_gtbboxes = gt_bboxes[0] + gt_offsets[:, None]

    iou_matrix = bbox_overlaps(offset_bboxes.numpy(), offset_gtbboxes.numpy())
    assert (iou_matrix == 1).sum() == 3


def test_yolact_head_loss():
    """Tests yolact head losses when truth is empty and non-empty."""
    s = 550
    img_metas = [{
        'img_shape': (s, s, 3),
        'scale_factor': 1,
        'pad_shape': (s, s, 3)
    }]
    train_cfg = mmcv.Config(
        dict(
            assigner=dict(
                type='MaxIoUAssigner',
                pos_iou_thr=0.5,
                neg_iou_thr=0.4,
                min_pos_iou=0.,
                ignore_iof_thr=-1,
                gt_max_assign_all=False),
            smoothl1_beta=1.,
            allowed_border=-1,
            pos_weight=-1,
            neg_pos_ratio=3,
            debug=False,
            min_gt_box_wh=[4.0, 4.0]))
    bbox_head = YOLACTHead(
        num_classes=80,
        in_channels=256,
        feat_channels=256,
        anchor_generator=dict(
            type='AnchorGenerator',
            octave_base_scale=3,
            scales_per_octave=1,
            base_sizes=[8, 16, 32, 64, 128],
            ratios=[0.5, 1.0, 2.0],
            strides=[550.0 / x for x in [69, 35, 18, 9, 5]],
            centers=[(550 * 0.5 / x, 550 * 0.5 / x)
                     for x in [69, 35, 18, 9, 5]]),
        bbox_coder=dict(
            type='DeltaXYWHBBoxCoder',
            target_means=[.0, .0, .0, .0],
            target_stds=[0.1, 0.1, 0.2, 0.2]),
        loss_cls=dict(
            type='CrossEntropyLoss',
            use_sigmoid=False,
            reduction='none',
            loss_weight=1.0),
        loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.5),
        num_head_convs=1,
        num_protos=32,
        use_ohem=True,
        train_cfg=train_cfg)
    segm_head = YOLACTSegmHead(
        in_channels=256,
        num_classes=80,
        loss_segm=dict(
            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0))
    mask_head = YOLACTProtonet(
        num_classes=80,
        in_channels=256,
        num_protos=32,
        max_masks_to_train=100,
        loss_mask_weight=6.125)
    feat = [
        torch.rand(1, 256, feat_size, feat_size)
        for feat_size in [69, 35, 18, 9, 5]
    ]
    cls_score, bbox_pred, coeff_pred = bbox_head.forward(feat)
    # Test that empty ground truth encourages the network to predict background
    gt_bboxes = [torch.empty((0, 4))]
    gt_labels = [torch.LongTensor([])]
    gt_masks = [torch.empty((0, 550, 550))]
    gt_bboxes_ignore = None
    empty_gt_losses, sampling_results = bbox_head.loss(
        cls_score,
        bbox_pred,
        gt_bboxes,
        gt_labels,
        img_metas,
        gt_bboxes_ignore=gt_bboxes_ignore)
    # When there is no truth, the cls loss should be nonzero but there should
    # be no box loss.
    empty_cls_loss = sum(empty_gt_losses['loss_cls'])
    empty_box_loss = sum(empty_gt_losses['loss_bbox'])
    assert empty_cls_loss.item() > 0, 'cls loss should be non-zero'
    assert empty_box_loss.item() == 0, (
        'there should be no box loss when there are no true boxes')

    # Test segm head and mask head
    segm_head_outs = segm_head(feat[0])
    empty_segm_loss = segm_head.loss(segm_head_outs, gt_masks, gt_labels)
    mask_pred = mask_head(feat[0], coeff_pred, gt_bboxes, img_metas,
                          sampling_results)
    empty_mask_loss = mask_head.loss(mask_pred, gt_masks, gt_bboxes, img_metas,
                                     sampling_results)
    # When there is no truth, the segm and mask loss should be zero.
    empty_segm_loss = sum(empty_segm_loss['loss_segm'])
    empty_mask_loss = sum(empty_mask_loss['loss_mask'])
    assert empty_segm_loss.item() == 0, (
        'there should be no segm loss when there are no true boxes')
    assert empty_mask_loss == 0, (
        'there should be no mask loss when there are no true boxes')

    # When truth is non-empty then cls, box, mask, segm loss should be
    # nonzero for random inputs.
    gt_bboxes = [
        torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]),
    ]
    gt_labels = [torch.LongTensor([2])]
    gt_masks = [(torch.rand((1, 550, 550)) > 0.5).float()]

    one_gt_losses, sampling_results = bbox_head.loss(
        cls_score,
        bbox_pred,
        gt_bboxes,
        gt_labels,
        img_metas,
        gt_bboxes_ignore=gt_bboxes_ignore)
    one_gt_cls_loss = sum(one_gt_losses['loss_cls'])
    one_gt_box_loss = sum(one_gt_losses['loss_bbox'])
    assert one_gt_cls_loss.item() > 0, 'cls loss should be non-zero'
    assert one_gt_box_loss.item() > 0, 'box loss should be non-zero'

    one_gt_segm_loss = segm_head.loss(segm_head_outs, gt_masks, gt_labels)
    mask_pred = mask_head(feat[0], coeff_pred, gt_bboxes, img_metas,
                          sampling_results)
    one_gt_mask_loss = mask_head.loss(mask_pred, gt_masks, gt_bboxes,
                                      img_metas, sampling_results)
    one_gt_segm_loss = sum(one_gt_segm_loss['loss_segm'])
    one_gt_mask_loss = sum(one_gt_mask_loss['loss_mask'])
    assert one_gt_segm_loss.item() > 0, 'segm loss should be non-zero'
    assert one_gt_mask_loss.item() > 0, 'mask loss should be non-zero'


def test_transformer_head_loss():
    """Tests transformer head loss when truth is empty and non-empty."""
    s = 256
    img_metas = [{
        'img_shape': (s, s, 3),
        'scale_factor': 1,
        'pad_shape': (s, s, 3),
        'batch_input_shape': (s, s)
    }]
    train_cfg = dict(
        assigner=dict(
            type='HungarianAssigner',
            cls_cost=dict(type='ClassificationCost', weight=1.0),
            reg_cost=dict(type='BBoxL1Cost', weight=5.0),
            iou_cost=dict(type='IoUCost', iou_mode='giou', weight=2.0)))
    transformer_cfg = dict(
        type='Transformer',
        embed_dims=4,
        num_heads=1,
        num_encoder_layers=1,
        num_decoder_layers=1,
        feedforward_channels=1,
        dropout=0.1,
        act_cfg=dict(type='ReLU', inplace=True),
        norm_cfg=dict(type='LN'),
        num_fcs=2,
        pre_norm=False,
        return_intermediate_dec=True)
    positional_encoding_cfg = dict(
        type='SinePositionalEncoding', num_feats=2, normalize=True)
    self = TransformerHead(
        num_classes=4,
        in_channels=1,
        num_fcs=2,
        train_cfg=train_cfg,
        transformer=transformer_cfg,
        positional_encoding=positional_encoding_cfg)
    self.init_weights()
    feat = [
        torch.rand(1, 1, s // feat_size, s // feat_size)
        for feat_size in [4, 8, 16, 32, 64]
    ]
    cls_scores, bbox_preds = self.forward(feat, img_metas)
    # Test that empty ground truth encourages the network to predict background
    gt_bboxes = [torch.empty((0, 4))]
    gt_labels = [torch.LongTensor([])]
    gt_bboxes_ignore = None
    empty_gt_losses = self.loss(cls_scores, bbox_preds, gt_bboxes, gt_labels,
                                img_metas, gt_bboxes_ignore)
    # When there is no truth, the cls loss should be nonzero but there should
    # be no box loss.
    for key, loss in empty_gt_losses.items():
        if 'cls' in key:
            assert loss.item() > 0, 'cls loss should be non-zero'
        elif 'bbox' in key:
            assert loss.item(
            ) == 0, 'there should be no box loss when there are no true boxes'
        elif 'iou' in key:
            assert loss.item(
            ) == 0, 'there should be no iou loss when there are no true boxes'

    # When truth is non-empty then both cls and box loss should be nonzero for
    # random inputs
    gt_bboxes = [
        torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]),
    ]
    gt_labels = [torch.LongTensor([2])]
    one_gt_losses = self.loss(cls_scores, bbox_preds, gt_bboxes, gt_labels,
                              img_metas, gt_bboxes_ignore)
    for loss in one_gt_losses.values():
        assert loss.item(
        ) > 0, 'cls loss, or box loss, or iou loss should be non-zero'

    # test forward_train
    self.forward_train(feat, img_metas, gt_bboxes, gt_labels)

    # test inference mode
    self.get_bboxes(cls_scores, bbox_preds, img_metas, rescale=True)